import urllib.request
import html.parser
from bs4 import BeautifulSoup
def _get_html(url):
with urllib.request.urlopen(url) as response:
html = response.read()
return html.decode('utf-8')
def extract_content(html):
soup = BeautifulSoup(html, "html.parser")
return soup.find_all("div", class_='lyricbox')[0].contents
def clean_content(contents):
final_content = ""
for e in contents:
if e == '\n':
continue
if e.name == 'script':
continue
if e.name == 'br':
final_content += "\n"
continue
try:
final_content += e.text
except AttributeError:
final_content += str(e)
return final_content