Upgraded celery to 4.1, added endpoint logic for fingerprinting audio files

merge-requests/154/head
Eliot Berriot 2017-12-26 21:12:37 +01:00
rodzic 4834b9e450
commit 5d2dbbc828
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: DD6965E2476E5C27
25 zmienionych plików z 345 dodań i 71 usunięć

Wyświetl plik

@ -6,8 +6,8 @@ ENV PYTHONUNBUFFERED 1
COPY ./requirements.apt /requirements.apt
RUN apt-get update -qq && grep "^[^#;]" requirements.apt | xargs apt-get install -y
RUN curl -L https://github.com/acoustid/chromaprint/releases/download/v1.4.2/chromaprint-fpcalc-1.4.2-linux-x86_64.tar.gz | tar -xz -C /usr/local/bin --strip 1
RUN fcalc yolofkjdssdhf
COPY ./requirements/base.txt /requirements/base.txt
RUN pip install -r /requirements/base.txt
COPY ./requirements/production.txt /requirements/production.txt

Wyświetl plik

@ -47,7 +47,6 @@ THIRD_PARTY_APPS = (
'corsheaders',
'rest_framework',
'rest_framework.authtoken',
'djcelery',
'taggit',
'cachalot',
'rest_auth',
@ -68,6 +67,7 @@ LOCAL_APPS = (
'funkwhale_api.playlists',
'funkwhale_api.providers.audiofile',
'funkwhale_api.providers.youtube',
'funkwhale_api.providers.acoustid',
)
# See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
@ -266,14 +266,14 @@ CACHES["default"]["OPTIONS"] = {
########## CELERY
INSTALLED_APPS += ('funkwhale_api.taskapp.celery.CeleryConfig',)
BROKER_URL = env(
CELERY_BROKER_URL = env(
"CELERY_BROKER_URL", default=env('CACHE_URL', default=CACHE_DEFAULT))
########## END CELERY
# Location of root django.contrib.admin URL, use {% url 'admin:index' %}
ADMIN_URL = r'^admin/'
# Your common stuff: Below this line define 3rd party library settings
CELERY_DEFAULT_RATE_LIMIT = 1
CELERYD_TASK_TIME_LIMIT = 300
CELERY_TASK_DEFAULT_RATE_LIMIT = 1
CELERY_TASK_TIME_LIMIT = 300
import datetime
JWT_AUTH = {
'JWT_ALLOW_REFRESH': True,

Wyświetl plik

@ -54,7 +54,7 @@ TEST_RUNNER = 'django.test.runner.DiscoverRunner'
########## CELERY
# In development, all tasks will be executed locally by blocking until the task returns
CELERY_ALWAYS_EAGER = False
CELERY_TASK_ALWAYS_EAGER = False
########## END CELERY
# Your local stuff: Below this line define 3rd party library settings

Wyświetl plik

@ -23,7 +23,7 @@ CACHES = {
}
}
BROKER_URL = 'memory://'
CELERY_BROKER_URL = 'memory://'
# TESTING
# ------------------------------------------------------------------------------
@ -31,7 +31,7 @@ TEST_RUNNER = 'django.test.runner.DiscoverRunner'
########## CELERY
# In development, all tasks will be executed locally by blocking until the task returns
CELERY_ALWAYS_EAGER = True
CELERY_TASK_ALWAYS_EAGER = True
########## END CELERY
# Your local stuff: Below this line define 3rd party library settings

Wyświetl plik

@ -7,6 +7,7 @@ ENV PYTHONDONTWRITEBYTECODE 1
COPY ./requirements.apt /requirements.apt
COPY ./install_os_dependencies.sh /install_os_dependencies.sh
RUN bash install_os_dependencies.sh install
RUN curl -L https://github.com/acoustid/chromaprint/releases/download/v1.4.2/chromaprint-fpcalc-1.4.2-linux-x86_64.tar.gz | tar -xz -C /usr/local/bin --strip 1
RUN mkdir /requirements
COPY ./requirements/base.txt /requirements/base.txt

Wyświetl plik

@ -0,0 +1,18 @@
# Generated by Django 2.0 on 2017-12-26 16:39
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('music', '0015_bind_track_file_to_import_job'),
]
operations = [
migrations.AddField(
model_name='trackfile',
name='acoustid_track_id',
field=models.UUIDField(blank=True, null=True),
),
]

Wyświetl plik

@ -15,11 +15,9 @@ from django.utils import timezone
from taggit.managers import TaggableManager
from versatileimagefield.fields import VersatileImageField
from funkwhale_api.taskapp import celery
from funkwhale_api import downloader
from funkwhale_api import musicbrainz
from . import importers
from . import lyrics as lyrics_utils
class APIModelMixin(models.Model):
@ -255,14 +253,6 @@ class Lyrics(models.Model):
url = models.URLField(unique=True)
content = models.TextField(null=True, blank=True)
@celery.app.task(name='Lyrics.fetch_content', filter=celery.task_method)
def fetch_content(self):
html = lyrics_utils._get_html(self.url)
content = lyrics_utils.extract_content(html)
cleaned_content = lyrics_utils.clean_content(content)
self.content = cleaned_content
self.save()
@property
def content_rendered(self):
return markdown.markdown(
@ -362,6 +352,7 @@ class TrackFile(models.Model):
audio_file = models.FileField(upload_to='tracks/%Y/%m/%d', max_length=255)
source = models.URLField(null=True, blank=True)
duration = models.IntegerField(null=True, blank=True)
acoustid_track_id = models.UUIDField(null=True, blank=True)
def download_file(self):
# import the track file, since there is not any
@ -429,26 +420,3 @@ class ImportJob(models.Model):
class Meta:
ordering = ('id', )
@celery.app.task(name='ImportJob.run', filter=celery.task_method)
def run(self, replace=False):
try:
track, created = Track.get_or_create_from_api(mbid=self.mbid)
track_file = None
if replace:
track_file = track.files.first()
elif track.files.count() > 0:
return
track_file = track_file or TrackFile(
track=track, source=self.source)
track_file.download_file()
track_file.save()
self.status = 'finished'
self.track_file = track_file
self.save()
return track.pk
except Exception as exc:
if not settings.DEBUG:
raise ImportJob.run.retry(args=[self], exc=exc, countdown=30, max_retries=3)
raise

Wyświetl plik

@ -0,0 +1,56 @@
from funkwhale_api.taskapp import celery
from funkwhale_api.providers.acoustid import get_acoustid_client
from django.conf import settings
from . import models
from . import lyrics as lyrics_utils
@celery.app.task(name='acoustid.set_on_track_file')
@celery.require_instance(models.TrackFile, 'track_file')
def set_acoustid_on_track_file(track_file):
client = get_acoustid_client()
result = client.get_best_match(track_file.audio_file.path)
def update(id):
track_file.acoustid_track_id = id
track_file.save(update_fields=['acoustid_track_id'])
return id
if result:
return update(result['id'])
@celery.app.task(name='ImportJob.run', bind=True)
@celery.require_instance(models.ImportJob, 'import_job')
def import_job_run(self, import_job, replace=False):
try:
track, created = models.Track.get_or_create_from_api(mbid=import_job.mbid)
track_file = None
if replace:
track_file = track.files.first()
elif track.files.count() > 0:
return
track_file = track_file or models.TrackFile(
track=track, source=import_job.source)
track_file.download_file()
track_file.save()
import_job.status = 'finished'
import_job.track_file = track_file
import_job.save()
return track.pk
except Exception as exc:
if not settings.DEBUG:
raise import_job_run.retry(args=[self], exc=exc, countdown=30, max_retries=3)
raise
@celery.app.task(name='Lyrics.fetch_content')
@celery.require_instance(models.Lyrics, 'lyrics')
def fetch_content(lyrics):
html = lyrics_utils._get_html(lyrics.url)
content = lyrics_utils.extract_content(html)
cleaned_content = lyrics_utils.clean_content(content)
lyrics.content = cleaned_content
lyrics.save(update_fields=['content'])

Wyświetl plik

@ -22,6 +22,7 @@ from . import models
from . import serializers
from . import importers
from . import filters
from . import tasks
from . import utils
@ -129,7 +130,8 @@ class TrackViewSet(TagViewSetMixin, SearchMixin, viewsets.ReadOnlyModelViewSet):
lyrics = work.fetch_lyrics()
try:
if not lyrics.content:
lyrics.fetch_content()
tasks.fetch_content(lyrics_id=lyrics.pk)
lyrics.refresh_from_db()
except AttributeError:
return Response({'error': 'unavailable lyrics'}, status=404)
serializer = serializers.LyricsSerializer(lyrics)
@ -244,7 +246,7 @@ class SubmitViewSet(viewsets.ViewSet):
pass
batch = models.ImportBatch.objects.create(submitted_by=request.user)
job = models.ImportJob.objects.create(mbid=request.POST['mbid'], batch=batch, source=request.POST['import_url'])
job.run.delay()
tasks.import_job_run.delay(import_job_id=job.pk)
serializer = serializers.ImportBatchSerializer(batch)
return Response(serializer.data)
@ -272,7 +274,7 @@ class SubmitViewSet(viewsets.ViewSet):
models.TrackFile.objects.get(track__mbid=row['mbid'])
except models.TrackFile.DoesNotExist:
job = models.ImportJob.objects.create(mbid=row['mbid'], batch=batch, source=row['source'])
job.run.delay()
tasks.import_job_run.delay(import_job_id=job.pk)
serializer = serializers.ImportBatchSerializer(batch)
return serializer.data, batch

Wyświetl plik

@ -0,0 +1,27 @@
import acoustid
from dynamic_preferences.registries import global_preferences_registry
class Client(object):
def __init__(self, api_key):
self.api_key = api_key
def match(self, file_path):
return acoustid.match(self.api_key, file_path, parse=False)
def get_best_match(self, file_path):
results = self.match(file_path=file_path)
MIN_SCORE_FOR_MATCH = 0.8
try:
rows = results['results']
except KeyError:
return
for row in rows:
if row['score'] >= MIN_SCORE_FOR_MATCH:
return row
def get_acoustid_client():
manager = global_preferences_registry.manager()
return Client(api_key=manager['providers_acoustid__api_key'])

Wyświetl plik

@ -0,0 +1,13 @@
from dynamic_preferences.types import StringPreference, Section
from dynamic_preferences.registries import global_preferences_registry
acoustid = Section('providers_acoustid')
@global_preferences_registry.register
class APIKey(StringPreference):
section = acoustid
name = 'api_key'
default = ''
verbose_name = 'Acoustid API key'
help_text = 'The API key used to query AcoustID. Get one at https://acoustid.org/new-application.'

Wyświetl plik

@ -1,20 +1,20 @@
import acoustid
import os
import datetime
from django.core.files import File
from funkwhale_api.taskapp import celery
from funkwhale_api.providers.acoustid import get_acoustid_client
from funkwhale_api.music import models, metadata
@celery.app.task(name='audiofile.from_path')
def from_path(path):
def import_metadata_without_musicbrainz(path):
data = metadata.Metadata(path)
artist = models.Artist.objects.get_or_create(
name__iexact=data.get('artist'),
defaults={
'name': data.get('artist'),
'mbid': data.get('musicbrainz_artistid', None),
},
)[0]
@ -39,11 +39,33 @@ def from_path(path):
'mbid': data.get('musicbrainz_recordingid', None),
},
)[0]
return track
def import_metadata_with_musicbrainz(path):
pass
@celery.app.task(name='audiofile.from_path')
def from_path(path):
acoustid_track_id = None
try:
client = get_acoustid_client()
result = client.get_best_match(path)
acoustid_track_id = result['id']
except acoustid.WebServiceError:
track = import_metadata_without_musicbrainz(path)
except (TypeError, KeyError):
track = import_metadata_without_musicbrainz(path)
else:
track, created = models.Track.get_or_create_from_api(
mbid=result['recordings'][0]['id']
)
if track.files.count() > 0:
raise ValueError('File already exists for track {}'.format(track.pk))
track_file = models.TrackFile(track=track)
track_file = models.TrackFile(
track=track, acoustid_track_id=acoustid_track_id)
track_file.audio_file.save(
os.path.basename(path),
File(open(path, 'rb'))

Wyświetl plik

@ -1,10 +1,12 @@
from __future__ import absolute_import
import os
import functools
from celery import Celery
from django.apps import AppConfig
from django.conf import settings
from celery.contrib.methods import task_method
if not settings.configured:
# set the default Django settings module for the 'celery' program.
@ -21,12 +23,20 @@ class CeleryConfig(AppConfig):
def ready(self):
# Using a string here means the worker will not have to
# pickle the object when using Windows.
app.config_from_object('django.conf:settings')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS, force=True)
@app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request)) # pragma: no cover
def require_instance(model_or_qs, parameter_name):
def decorator(function):
@functools.wraps(function)
def inner(*args, **kwargs):
pk = kwargs.pop('_'.join([parameter_name, 'id']))
try:
instance = model_or_qs.get(pk=pk)
except AttributeError:
instance = model_or_qs.objects.get(pk=pk)
kwargs[parameter_name] = instance
return function(*args, **kwargs)
return inner
return decorator

Wyświetl plik

@ -79,4 +79,3 @@ case "$1" in
help) usage_message;;
*) wrong_command $1;;
esac

Wyświetl plik

@ -7,3 +7,4 @@ libpq-dev
postgresql-client
libav-tools
python3-dev
curl

Wyświetl plik

@ -24,7 +24,7 @@ django-redis>=4.5,<4.6
redis>=2.10,<2.11
celery>=3.1,<3.2
celery>=4.1,<4.2
# Your custom requirements go here
@ -33,7 +33,6 @@ musicbrainzngs==0.6
youtube_dl>=2017.12.14
djangorestframework>=3.7,<3.8
djangorestframework-jwt>=1.11,<1.12
django-celery>=3.2,<3.3
django-mptt>=0.9,<0.10
google-api-python-client>=1.6,<1.7
arrow>=0.12,<0.13
@ -57,3 +56,4 @@ git+https://github.com/EliotBerriot/PyMemoize.git@django
git+https://github.com/EliotBerriot/django-cachalot.git@django-2
django-dynamic-preferences>=1.5,<1.6
pyacoustid>=1.1.5,<1.2

Wyświetl plik

@ -1,6 +1,10 @@
import tempfile
import shutil
import pytest
from django.core.cache import cache as django_cache
from dynamic_preferences.registries import global_preferences_registry
from funkwhale_api.taskapp import celery
@pytest.fixture(scope="session", autouse=True)
@ -11,12 +15,23 @@ def factories_autodiscover():
factories.registry.autodiscover(app_names)
@pytest.fixture(autouse=True)
def cache():
yield django_cache
django_cache.clear()
@pytest.fixture
def factories(db):
from funkwhale_api import factories
yield factories.registry
@pytest.fixture
def preferences(db):
yield global_preferences_registry.manager()
@pytest.fixture
def tmpdir():
d = tempfile.mkdtemp()

Wyświetl plik

@ -34,11 +34,11 @@ def test_can_submit_youtube_url_for_track_import(mocker, superuser_client):
assert track.album.title == 'Marsupial Madness'
def test_import_creates_an_import_with_correct_data(superuser_client, settings):
def test_import_creates_an_import_with_correct_data(mocker, superuser_client):
mocker.patch('funkwhale_api.music.tasks.import_job_run')
mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
video_id = 'tPEE9ZwTmy0'
url = reverse('api:v1:submit-single')
settings.CELERY_ALWAYS_EAGER = False
response = superuser_client.post(
url,
{'import_url': 'https://www.youtube.com/watch?v={0}'.format(video_id),
@ -54,7 +54,8 @@ def test_import_creates_an_import_with_correct_data(superuser_client, settings):
assert job.source == 'https://www.youtube.com/watch?v={0}'.format(video_id)
def test_can_import_whole_album(mocker, superuser_client, settings):
def test_can_import_whole_album(mocker, superuser_client):
mocker.patch('funkwhale_api.music.tasks.import_job_run')
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=api_data.artists['get']['soad'])
@ -82,7 +83,6 @@ def test_can_import_whole_album(mocker, superuser_client, settings):
]
}
url = reverse('api:v1:submit-album')
settings.CELERY_ALWAYS_EAGER = False
response = superuser_client.post(
url, json.dumps(payload), content_type="application/json")
@ -109,7 +109,8 @@ def test_can_import_whole_album(mocker, superuser_client, settings):
assert job.source == row['source']
def test_can_import_whole_artist(mocker, superuser_client, settings):
def test_can_import_whole_artist(mocker, superuser_client):
mocker.patch('funkwhale_api.music.tasks.import_job_run')
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=api_data.artists['get']['soad'])
@ -142,7 +143,6 @@ def test_can_import_whole_artist(mocker, superuser_client, settings):
]
}
url = reverse('api:v1:submit-artist')
settings.CELERY_ALWAYS_EAGER = False
response = superuser_client.post(
url, json.dumps(payload), content_type="application/json")

Wyświetl plik

@ -4,6 +4,7 @@ from django.urls import reverse
from funkwhale_api.music import models
from funkwhale_api.musicbrainz import api
from funkwhale_api.music import serializers
from funkwhale_api.music import tasks
from funkwhale_api.music import lyrics as lyrics_utils
from .mocking import lyricswiki
@ -18,7 +19,8 @@ def test_works_import_lyrics_if_any(mocker, factories):
lyrics = factories['music.Lyrics'](
url='http://lyrics.wikia.com/System_Of_A_Down:Chop_Suey!')
lyrics.fetch_content()
tasks.fetch_content(lyrics_id=lyrics.pk)
lyrics.refresh_from_db()
self.assertIn(
'Grab a brush and put on a little makeup',
lyrics.content,

Wyświetl plik

@ -2,6 +2,7 @@ import pytest
from funkwhale_api.music import models
from funkwhale_api.music import importers
from funkwhale_api.music import tasks
def test_can_store_release_group_id_on_album(factories):
@ -44,6 +45,6 @@ def test_import_job_is_bound_to_track_file(factories, mocker):
job = factories['music.ImportJob'](mbid=track.mbid)
mocker.patch('funkwhale_api.music.models.TrackFile.download_file')
job.run()
tasks.import_job_run(import_job_id=job.pk)
job.refresh_from_db()
assert job.track_file.track == track

Wyświetl plik

@ -0,0 +1,42 @@
from funkwhale_api.providers.acoustid import get_acoustid_client
from funkwhale_api.music import tasks
def test_set_acoustid_on_track_file(factories, mocker):
track_file = factories['music.TrackFile'](acoustid_track_id=None)
id = 'e475bf79-c1ce-4441-bed7-1e33f226c0a2'
payload = {
'results': [
{'id': id,
'recordings': [
{'artists': [
{'id': '9c6bddde-6228-4d9f-ad0d-03f6fcb19e13',
'name': 'Binärpilot'}],
'duration': 268,
'id': 'f269d497-1cc0-4ae4-a0c4-157ec7d73fcb',
'title': 'Bend'}],
'score': 0.860825}],
'status': 'ok'
}
m = mocker.patch('acoustid.match', return_value=payload)
r = tasks.set_acoustid_on_track_file(track_file_id=track_file.pk)
track_file.refresh_from_db()
assert str(track_file.acoustid_track_id) == id
assert r == id
m.assert_called_once_with('', track_file.audio_file.path, parse=False)
def test_set_acoustid_on_track_file_required_high_score(factories, mocker):
track_file = factories['music.TrackFile'](acoustid_track_id=None)
id = 'e475bf79-c1ce-4441-bed7-1e33f226c0a2'
payload = {
'results': [{'score': 0.79}],
'status': 'ok'
}
m = mocker.patch('acoustid.match', return_value=payload)
r = tasks.set_acoustid_on_track_file(track_file_id=track_file.pk)
track_file.refresh_from_db()
assert track_file.acoustid_track_id is None

Wyświetl plik

@ -0,0 +1,34 @@
from funkwhale_api.providers.acoustid import get_acoustid_client
def test_client_is_configured_with_correct_api_key(preferences):
api_key = 'hello world'
preferences['providers_acoustid__api_key'] = api_key
client = get_acoustid_client()
assert client.api_key == api_key
def test_client_returns_raw_results(db, mocker, preferences):
api_key = 'test'
preferences['providers_acoustid__api_key'] = api_key
payload = {
'results': [
{'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
'recordings': [
{'artists': [
{'id': '9c6bddde-6228-4d9f-ad0d-03f6fcb19e13',
'name': 'Binärpilot'}],
'duration': 268,
'id': 'f269d497-1cc0-4ae4-a0c4-157ec7d73fcb',
'title': 'Bend'}],
'score': 0.860825}],
'status': 'ok'
}
m = mocker.patch('acoustid.match', return_value=payload)
client = get_acoustid_client()
response = client.match('/tmp/noopfile.mp3')
assert response == payload
m.assert_called_once_with('test', '/tmp/noopfile.mp3', parse=False)

Wyświetl plik

@ -1,6 +1,8 @@
import os
import acoustid
import datetime
import os
from .music import data as api_data
from funkwhale_api.providers.audiofile import tasks
DATA_DIR = os.path.join(
@ -9,7 +11,36 @@ DATA_DIR = os.path.join(
)
def test_can_import_single_audio_file(db, mocker):
def test_import_file_with_acoustid(db, mocker, preferences):
mbid = api_data.tracks['get']['8bitadventures']['recording']['id']
payload = {
'results': [{
'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
'recordings': [{'id': mbid}],
'score': 0.86
}]
}
path = os.path.join(DATA_DIR, 'dummy_file.ogg')
m = mocker.patch('acoustid.match', return_value=payload)
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=api_data.artists['get']['adhesive_wombat'])
mocker.patch(
'funkwhale_api.musicbrainz.api.releases.get',
return_value=api_data.albums['get']['marsupial'])
mocker.patch(
'funkwhale_api.musicbrainz.api.recordings.get',
return_value=api_data.tracks['get']['8bitadventures'])
track_file = tasks.from_path(path)
result = payload['results'][0]
assert track_file.acoustid_track_id == result['id']
assert track_file.track.mbid == result['recordings'][0]['id']
m.assert_called_once_with('', path, parse=False)
def test_can_import_single_audio_file_without_acoustid(db, mocker):
mocker.patch('acoustid.match', side_effect=acoustid.WebServiceError('test'))
metadata = {
'artist': ['Test artist'],
'album': ['Test album'],
@ -20,7 +51,6 @@ def test_can_import_single_audio_file(db, mocker):
'musicbrainz_trackid': ['bd21ac48-46d8-4e78-925f-d9cc2a294656'],
'musicbrainz_artistid': ['013c8e5b-d72a-4cd3-8dee-6c64d6125823'],
}
m1 = mocker.patch('mutagen.File', return_value=metadata)
m2 = mocker.patch(
'funkwhale_api.music.metadata.Metadata.get_file_type',

Wyświetl plik

@ -0,0 +1,33 @@
import pytest
from funkwhale_api.taskapp import celery
class Dummy:
@staticmethod
def noop(instance):
pass
def test_require_instance_decorator(factories, mocker):
user = factories['users.User']()
@celery.require_instance(user.__class__, 'user')
def t(user):
Dummy.noop(user)
m = mocker.patch.object(Dummy, 'noop')
t(user_id=user.pk)
m.assert_called_once_with(user)
def test_require_instance_decorator_accepts_qs(factories, mocker):
user = factories['users.User'](is_active=False)
qs = user.__class__.objects.filter(is_active=True)
@celery.require_instance(qs, 'user')
def t(user):
pass
with pytest.raises(user.__class__.DoesNotExist):
t(user_id=user.pk)

Wyświetl plik

@ -30,7 +30,7 @@ services:
links:
- postgres
- redis
command: python manage.py celery worker
command: celery -A funkwhale_api.taskapp worker
environment:
- "DJANGO_ALLOWED_HOSTS=localhost"
- "DJANGO_SETTINGS_MODULE=config.settings.local"