Fix #799: Removed broken/instable lyrics feature

merge-requests/757/head
Eliot Berriot 2019-04-23 10:31:29 +02:00
rodzic a414461f49
commit 0b94227782
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: DD6965E2476E5C27
16 zmienionych plików z 34 dodań i 384 usunięć

Wyświetl plik

@ -39,22 +39,6 @@ class ImportJobAdmin(admin.ModelAdmin):
list_filter = ["status"]
@admin.register(models.Work)
class WorkAdmin(admin.ModelAdmin):
list_display = ["title", "mbid", "language", "nature"]
list_select_related = True
search_fields = ["title"]
list_filter = ["language", "nature"]
@admin.register(models.Lyrics)
class LyricsAdmin(admin.ModelAdmin):
list_display = ["url", "id", "url"]
list_select_related = True
search_fields = ["url", "work__title"]
list_filter = ["work__language"]
@admin.register(models.Upload)
class UploadAdmin(admin.ModelAdmin):
list_display = [

Wyświetl plik

@ -164,27 +164,6 @@ class UploadVersionFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
model = "music.UploadVersion"
@registry.register
class WorkFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
mbid = factory.Faker("uuid4")
language = "eng"
nature = "song"
title = factory.Faker("sentence", nb_words=3)
class Meta:
model = "music.Work"
@registry.register
class LyricsFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
work = factory.SubFactory(WorkFactory)
url = factory.Faker("url")
content = factory.Faker("paragraphs", nb=4)
class Meta:
model = "music.Lyrics"
@registry.register
class TagFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
name = factory.SelfAttribute("slug")

Wyświetl plik

@ -47,4 +47,4 @@ class Mapping(object):
)
registry = {"Artist": Importer, "Track": Importer, "Album": Importer, "Work": Importer}
registry = {"Artist": Importer, "Track": Importer, "Album": Importer}

Wyświetl plik

@ -1,31 +0,0 @@
import urllib.request
from bs4 import BeautifulSoup
def _get_html(url):
with urllib.request.urlopen(url) as response:
html = response.read()
return html.decode("utf-8")
def extract_content(html):
soup = BeautifulSoup(html, "html.parser")
return soup.find_all("div", class_="lyricbox")[0].contents
def clean_content(contents):
final_content = ""
for e in contents:
if e == "\n":
continue
if e.name == "script":
continue
if e.name == "br":
final_content += "\n"
continue
try:
final_content += e.text
except AttributeError:
final_content += str(e)
return final_content

Wyświetl plik

@ -0,0 +1,31 @@
# Generated by Django 2.1.7 on 2019-04-23 08:20
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('music', '0038_attributed_to'),
]
operations = [
migrations.RemoveField(
model_name='lyrics',
name='work',
),
migrations.RemoveField(
model_name='work',
name='from_activity',
),
migrations.RemoveField(
model_name='track',
name='work',
),
migrations.DeleteModel(
name='Lyrics',
),
migrations.DeleteModel(
name='Work',
),
]

Wyświetl plik

@ -6,7 +6,6 @@ import tempfile
import urllib.parse
import uuid
import markdown
import pendulum
import pydub
from django.conf import settings
@ -379,77 +378,6 @@ def import_album(v):
return a
def link_recordings(instance, cleaned_data, raw_data):
tracks = [r["target"] for r in raw_data["recording-relation-list"]]
Track.objects.filter(mbid__in=tracks).update(work=instance)
def import_lyrics(instance, cleaned_data, raw_data):
try:
url = [
url_data
for url_data in raw_data["url-relation-list"]
if url_data["type"] == "lyrics"
][0]["target"]
except (IndexError, KeyError):
return
l, _ = Lyrics.objects.get_or_create(work=instance, url=url)
return l
class Work(APIModelMixin):
language = models.CharField(max_length=20)
nature = models.CharField(max_length=50)
title = models.CharField(max_length=255)
api = musicbrainz.api.works
api_includes = ["url-rels", "recording-rels"]
musicbrainz_model = "work"
federation_namespace = "works"
musicbrainz_mapping = {
"mbid": {"musicbrainz_field_name": "id"},
"title": {"musicbrainz_field_name": "title"},
"language": {"musicbrainz_field_name": "language"},
"nature": {"musicbrainz_field_name": "type", "converter": lambda v: v.lower()},
}
import_hooks = [import_lyrics, link_recordings]
def fetch_lyrics(self):
lyric = self.lyrics.first()
if lyric:
return lyric
data = self.api.get(self.mbid, includes=["url-rels"])["work"]
lyric = import_lyrics(self, {}, data)
return lyric
def get_federation_id(self):
if self.fid:
return self.fid
return None
class Lyrics(models.Model):
uuid = models.UUIDField(unique=True, db_index=True, default=uuid.uuid4)
work = models.ForeignKey(
Work, related_name="lyrics", null=True, blank=True, on_delete=models.CASCADE
)
url = models.URLField(unique=True)
content = models.TextField(null=True, blank=True)
@property
def content_rendered(self):
return markdown.markdown(
self.content,
safe_mode=True,
enable_attributes=False,
extensions=["markdown.extensions.nl2br"],
)
class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
def for_nested_serialization(self):
return self.select_related().select_related("album__artist", "artist")
@ -499,9 +427,6 @@ class Track(APIModelMixin):
album = models.ForeignKey(
Album, related_name="tracks", null=True, blank=True, on_delete=models.CASCADE
)
work = models.ForeignKey(
Work, related_name="tracks", null=True, blank=True, on_delete=models.CASCADE
)
license = models.ForeignKey(
License,
null=True,
@ -523,7 +448,7 @@ class Track(APIModelMixin):
federation_namespace = "tracks"
musicbrainz_model = "recording"
api = musicbrainz.api.recordings
api_includes = ["artist-credits", "releases", "media", "tags", "work-rels"]
api_includes = ["artist-credits", "releases", "media", "tags"]
musicbrainz_mapping = {
"mbid": {"musicbrainz_field_name": "id"},
"title": {"musicbrainz_field_name": "title"},
@ -552,20 +477,6 @@ class Track(APIModelMixin):
self.artist = self.album.artist
super().save(**kwargs)
def get_work(self):
if self.work:
return self.work
data = self.api.get(self.mbid, includes=["work-rels"])
try:
work_data = data["recording"]["work-relation-list"][0]["work"]
except (IndexError, KeyError):
return
work, _ = Work.get_or_create_from_api(mbid=work_data["id"])
return work
def get_lyrics_url(self):
return reverse("api:v1:tracks-lyrics", kwargs={"pk": self.pk})
@property
def full_name(self):
try:

Wyświetl plik

@ -190,7 +190,6 @@ class TrackUploadSerializer(serializers.ModelSerializer):
class TrackSerializer(serializers.ModelSerializer):
artist = ArtistSimpleSerializer(read_only=True)
album = TrackAlbumSerializer(read_only=True)
lyrics = serializers.SerializerMethodField()
uploads = serializers.SerializerMethodField()
listen_url = serializers.SerializerMethodField()
@ -206,7 +205,6 @@ class TrackSerializer(serializers.ModelSerializer):
"creation_date",
"position",
"disc_number",
"lyrics",
"uploads",
"listen_url",
"copyright",
@ -214,9 +212,6 @@ class TrackSerializer(serializers.ModelSerializer):
"is_local",
)
def get_lyrics(self, obj):
return obj.get_lyrics_url()
def get_listen_url(self, obj):
return obj.listen_url
@ -377,12 +372,6 @@ class SimpleAlbumSerializer(serializers.ModelSerializer):
fields = ("id", "mbid", "title", "release_date", "cover")
class LyricsSerializer(serializers.ModelSerializer):
class Meta:
model = models.Lyrics
fields = ("id", "work", "content", "content_rendered")
class TrackActivitySerializer(activity_serializers.ModelSerializer):
type = serializers.SerializerMethodField()
name = serializers.CharField(source="title")

Wyświetl plik

@ -17,7 +17,6 @@ from funkwhale_api.federation import library as lb
from funkwhale_api.taskapp import celery
from . import licenses
from . import lyrics as lyrics_utils
from . import models
from . import metadata
from . import signals
@ -70,16 +69,6 @@ def get_cover_from_fs(dir_path):
return {"mimetype": m, "content": c.read()}
@celery.app.task(name="Lyrics.fetch_content")
@celery.require_instance(models.Lyrics, "lyrics")
def fetch_content(lyrics):
html = lyrics_utils._get_html(lyrics.url)
content = lyrics_utils.extract_content(html)
cleaned_content = lyrics_utils.clean_content(content)
lyrics.content = cleaned_content
lyrics.save(update_fields=["content"])
@celery.app.task(name="music.start_library_scan")
@celery.require_instance(
models.LibraryScan.objects.select_related().filter(status="pending"), "library_scan"

Wyświetl plik

@ -219,31 +219,6 @@ class TrackViewSet(
)
return queryset
@action(methods=["get"], detail=True)
@transaction.non_atomic_requests
def lyrics(self, request, *args, **kwargs):
try:
track = models.Track.objects.get(pk=kwargs["pk"])
except models.Track.DoesNotExist:
return Response(status=404)
work = track.work
if not work:
work = track.get_work()
if not work:
return Response({"error": "unavailable work "}, status=404)
lyrics = work.fetch_lyrics()
try:
if not lyrics.content:
tasks.fetch_content(lyrics_id=lyrics.pk)
lyrics.refresh_from_db()
except AttributeError:
return Response({"error": "unavailable lyrics"}, status=404)
serializer = serializers.LyricsSerializer(lyrics)
return Response(serializer.data)
libraries = action(methods=["get"], detail=True)(
get_libraries(filter_uploads=lambda o, uploads: uploads.filter(track=o))
)

Wyświetl plik

@ -40,10 +40,6 @@ class API(object):
_api.get_recording_by_id, max_age=settings.MUSICBRAINZ_CACHE_DURATION
)
class works(object):
search = memo(_api.search_works, max_age=settings.MUSICBRAINZ_CACHE_DURATION)
get = memo(_api.get_work_by_id, max_age=settings.MUSICBRAINZ_CACHE_DURATION)
class releases(object):
search = memo(_api.search_releases, max_age=settings.MUSICBRAINZ_CACHE_DURATION)
get = memo(_api.get_release_by_id, max_age=settings.MUSICBRAINZ_CACHE_DURATION)

Wyświetl plik

@ -36,7 +36,6 @@ persisting-theory>=0.2,<0.3
django-versatileimagefield>=1.9,<1.10
django-filter>=2.0,<2.1
django-rest-auth>=0.9,<0.10
beautifulsoup4>=4.6,<4.7
Markdown>=2.6,<2.7
ipython>=6,<7
mutagen>=1.42,<1.43

Wyświetl plik

@ -1,69 +0,0 @@
from django.urls import reverse
from funkwhale_api.music import lyrics as lyrics_utils
from funkwhale_api.music import models, tasks
def test_lyrics_tasks(lyricswiki_content, mocker, factories):
mocker.patch(
"funkwhale_api.music.lyrics._get_html", return_value=lyricswiki_content
)
lyrics = factories["music.Lyrics"](
url="http://lyrics.wikia.com/System_Of_A_Down:Chop_Suey!"
)
tasks.fetch_content(lyrics_id=lyrics.pk)
lyrics.refresh_from_db()
assert "Grab a brush and put on a little makeup" in lyrics.content
def test_clean_content():
c = """<div class="lyricbox">Hello<br /><script>alert('hello');</script>Is it me you're looking for?<br /></div>"""
d = lyrics_utils.extract_content(c)
d = lyrics_utils.clean_content(d)
expected = """Hello
Is it me you're looking for?
"""
assert d == expected
def test_markdown_rendering(factories):
content = """Hello
Is it me you're looking for?"""
lyrics = factories["music.Lyrics"](content=content)
expected = "<p>Hello<br />\nIs it me you're looking for?</p>"
assert expected == lyrics.content_rendered
def test_works_import_lyrics_if_any(
lyricswiki_content, works, tracks, mocker, factories, logged_in_client
):
mocker.patch(
"funkwhale_api.musicbrainz.api.works.get",
return_value=works["get"]["chop_suey"],
)
mocker.patch(
"funkwhale_api.musicbrainz.api.recordings.get",
return_value=tracks["get"]["chop_suey"],
)
mocker.patch(
"funkwhale_api.music.lyrics._get_html", return_value=lyricswiki_content
)
track = factories["music.Track"](
work=None, mbid="07ca77cf-f513-4e9c-b190-d7e24bbad448"
)
url = reverse("api:v1:tracks-lyrics", kwargs={"pk": track.pk})
response = logged_in_client.get(url)
assert response.status_code == 200
track.refresh_from_db()
lyrics = models.Lyrics.objects.latest("id")
work = models.Work.objects.latest("id")
assert track.work == work
assert lyrics.work == work

Wyświetl plik

@ -198,7 +198,6 @@ def test_track_serializer(factories, to_api_date):
"disc_number": track.disc_number,
"uploads": [serializers.TrackUploadSerializer(upload).data],
"creation_date": to_api_date(track.creation_date),
"lyrics": track.get_lyrics_url(),
"listen_url": track.listen_url,
"license": upload.track.license.code,
"copyright": upload.track.copyright,

Wyświetl plik

@ -1,61 +0,0 @@
from funkwhale_api.music import models
def test_can_import_work(factories, mocker, works):
mocker.patch(
"funkwhale_api.musicbrainz.api.works.get",
return_value=works["get"]["chop_suey"],
)
recording = factories["music.Track"](mbid="07ca77cf-f513-4e9c-b190-d7e24bbad448")
mbid = "e2ecabc4-1b9d-30b2-8f30-3596ec423dc5"
work = models.Work.create_from_api(id=mbid)
assert work.title == "Chop Suey!"
assert work.nature == "song"
assert work.language == "eng"
assert work.mbid == mbid
# a imported work should also be linked to corresponding recordings
recording.refresh_from_db()
assert recording.work == work
def test_can_get_work_from_recording(factories, mocker, works, tracks):
mocker.patch(
"funkwhale_api.musicbrainz.api.works.get",
return_value=works["get"]["chop_suey"],
)
mocker.patch(
"funkwhale_api.musicbrainz.api.recordings.get",
return_value=tracks["get"]["chop_suey"],
)
recording = factories["music.Track"](
work=None, mbid="07ca77cf-f513-4e9c-b190-d7e24bbad448"
)
mbid = "e2ecabc4-1b9d-30b2-8f30-3596ec423dc5"
assert recording.work is None
work = recording.get_work()
assert work.title == "Chop Suey!"
assert work.nature == "song"
assert work.language == "eng"
assert work.mbid == mbid
recording.refresh_from_db()
assert recording.work == work
def test_works_import_lyrics_if_any(db, mocker, works):
mocker.patch(
"funkwhale_api.musicbrainz.api.works.get",
return_value=works["get"]["chop_suey"],
)
mbid = "e2ecabc4-1b9d-30b2-8f30-3596ec423dc5"
work = models.Work.create_from_api(id=mbid)
lyrics = models.Lyrics.objects.latest("id")
assert lyrics.work == work
assert lyrics.url == "http://lyrics.wikia.com/System_Of_A_Down:Chop_Suey!"

Wyświetl plik

@ -0,0 +1 @@
Removed broken/instable lyrics feature (#799)

Wyświetl plik

@ -76,24 +76,6 @@
</tbody>
</table>
</section>
<section class="ui vertical stripe center aligned segment">
<h2>
<translate translate-context="Content/Track/Title">Lyrics</translate>
</h2>
<div v-if="isLoadingLyrics" class="ui vertical segment">
<div :class="['ui', 'centered', 'active', 'inline', 'loader']"></div>
</div>
<div v-if="lyrics" v-html="lyrics.content_rendered"></div>
<template v-if="!isLoadingLyrics & !lyrics">
<p>
<translate translate-context="Content/Track/Paragraph">No lyrics available for this track.</translate>
</p>
<a class="ui button" target="_blank" :href="lyricsSearchUrl">
<i class="search icon"></i>
<translate translate-context="Content/Track/Link/Verb">Search on lyrics.wikia.com</translate>
</a>
</template>
</section>
<section class="ui vertical stripe segment">
<h2>
<translate translate-context="Content/*/Title/Noun">User libraries</translate>
@ -123,13 +105,10 @@ export default {
return {
time,
id: this.track.id,
isLoadingLyrics: true,
lyrics: null,
licenseData: null
}
},
created() {
this.fetchLyrics()
if (this.track && this.track.license) {
this.fetchLicenseData(this.track.license)
}
@ -142,22 +121,6 @@ export default {
self.licenseData = response.data
})
},
fetchLyrics() {
var self = this
this.isLoadingLyrics = true
let url = FETCH_URL + this.id + "/lyrics/"
logger.default.debug('Fetching lyrics for track "' + this.id + '"')
axios.get(url).then(
response => {
self.lyrics = response.data
self.isLoadingLyrics = false
},
response => {
console.error("No lyrics available")
self.isLoadingLyrics = false
}
)
}
},
computed: {
labels() {
@ -170,11 +133,6 @@ export default {
return this.track.uploads[0]
}
},
lyricsSearchUrl() {
let base = "http://lyrics.wikia.com/wiki/Special:Search?query="
let query = this.track.artist.name + ":" + this.track.title
return base + encodeURI(query)
},
license() {
if (!this.track || !this.track.license) {
return null