import urllib.request import html.parser from bs4 import BeautifulSoup def _get_html(url): with urllib.request.urlopen(url) as response: html = response.read() return html.decode('utf-8') def extract_content(html): soup = BeautifulSoup(html, "html.parser") return soup.find_all("div", class_='lyricbox')[0].contents def clean_content(contents): final_content = "" for e in contents: if e == '\n': continue if e.name == 'script': continue if e.name == 'br': final_content += "\n" continue try: final_content += e.text except AttributeError: final_content += str(e) return final_content