diff --git a/api/funkwhale_api/music/metadata.py b/api/funkwhale_api/music/metadata.py index 3fe61e652..d1be9a4e1 100644 --- a/api/funkwhale_api/music/metadata.py +++ b/api/funkwhale_api/music/metadata.py @@ -1,34 +1,130 @@ import mutagen +import arrow NODEFAULT = object() -class Metadata(object): - ALIASES = { - 'release': 'musicbrainz_albumid', - 'artist': 'musicbrainz_artistid', - 'recording': 'musicbrainz_trackid', + +class TagNotFound(KeyError): + pass + + +def get_id3_tag(f, k): + # First we try to grab the standard key + try: + return f.tags[k].text[0] + except KeyError: + pass + # then we fallback on parsing non standard tags + all_tags = f.tags.getall('TXXX') + try: + matches = [ + t + for t in all_tags + if t.desc.lower() == k.lower() + ] + return matches[0].text[0] + except (KeyError, IndexError): + raise TagNotFound(k) + + +def get_mp3_recording_id(f, k): + try: + return [ + t + for t in f.tags.getall('UFID') + if 'musicbrainz.org' in t.owner + ][0].data.decode('utf-8') + except IndexError: + raise TagNotFound(k) + +CONF = { + 'OggVorbis': { + 'getter': lambda f, k: f[k][0], + 'fields': { + 'track_number': { + 'field': 'TRACKNUMBER', + 'to_application': int + }, + 'title': { + 'field': 'title' + }, + 'artist': { + 'field': 'artist' + }, + 'album': { + 'field': 'album' + }, + 'date': { + 'field': 'date', + 'to_application': lambda v: arrow.get(v).date() + }, + 'musicbrainz_albumid': { + 'field': 'musicbrainz_albumid' + }, + 'musicbrainz_artistid': { + 'field': 'musicbrainz_artistid' + }, + 'musicbrainz_recordingid': { + 'field': 'musicbrainz_trackid' + }, + } + }, + 'MP3': { + 'getter': get_id3_tag, + 'fields': { + 'track_number': { + 'field': 'TPOS', + 'to_application': lambda v: int(v.split('/')[0]) + }, + 'title': { + 'field': 'TIT2' + }, + 'artist': { + 'field': 'TPE1' + }, + 'album': { + 'field': 'TALB' + }, + 'date': { + 'field': 'TDRC', + 'to_application': lambda v: arrow.get(str(v)).date() + }, + 'musicbrainz_albumid': { + 'field': 'MusicBrainz Album Id' + }, + 'musicbrainz_artistid': { + 'field': 'MusicBrainz Artist Id' + }, + 'musicbrainz_recordingid': { + 'field': 'UFID', + 'getter': get_mp3_recording_id, + }, + } } +} + + +class Metadata(object): def __init__(self, path): self._file = mutagen.File(path) + self._conf = CONF[self.get_file_type(self._file)] - def get(self, key, default=NODEFAULT, single=True): + def get_file_type(self, f): + return f.__class__.__name__ + + def get(self, key, default=NODEFAULT): + field_conf = self._conf['fields'][key] + real_key = field_conf['field'] try: - v = self._file[key] + getter = field_conf.get('getter', self._conf['getter']) + v = getter(self._file, real_key) except KeyError: if default == NODEFAULT: - raise + raise TagNotFound(real_key) return default - # Some tags are returned as lists of string - if single: - return v[0] + converter = field_conf.get('to_application') + if converter: + v = converter(v) return v - - def __getattr__(self, key): - try: - alias = self.ALIASES[key] - except KeyError: - raise ValueError('Invalid alias {}'.format(key)) - - return self.get(alias, single=True) diff --git a/api/funkwhale_api/music/tests/test.mp3 b/api/funkwhale_api/music/tests/test.mp3 new file mode 100644 index 000000000..35a6e5fce Binary files /dev/null and b/api/funkwhale_api/music/tests/test.mp3 differ diff --git a/api/funkwhale_api/music/tests/test_metadata.py b/api/funkwhale_api/music/tests/test_metadata.py index 7832baedb..9b8c76653 100644 --- a/api/funkwhale_api/music/tests/test_metadata.py +++ b/api/funkwhale_api/music/tests/test_metadata.py @@ -1,5 +1,6 @@ import unittest import os +import datetime from test_plus.test import TestCase from funkwhale_api.music import metadata @@ -8,20 +9,72 @@ DATA_DIR = os.path.dirname(os.path.abspath(__file__)) class TestMetadata(TestCase): - def test_can_get_metadata_from_file(self, *mocks): + def test_can_get_metadata_from_ogg_file(self, *mocks): path = os.path.join(DATA_DIR, 'test.ogg') data = metadata.Metadata(path) + self.assertEqual( + data.get('title'), + 'Peer Gynt Suite no. 1, op. 46: I. Morning' + ) + self.assertEqual( + data.get('artist'), + 'Edvard Grieg' + ) + self.assertEqual( + data.get('album'), + 'Peer Gynt Suite no. 1, op. 46' + ) + self.assertEqual( + data.get('date'), + datetime.date(2012, 8, 15), + ) + self.assertEqual( + data.get('track_number'), + 1 + ) + self.assertEqual( data.get('musicbrainz_albumid'), 'a766da8b-8336-47aa-a3ee-371cc41ccc75') self.assertEqual( - data.get('musicbrainz_trackid'), + data.get('musicbrainz_recordingid'), 'bd21ac48-46d8-4e78-925f-d9cc2a294656') self.assertEqual( data.get('musicbrainz_artistid'), '013c8e5b-d72a-4cd3-8dee-6c64d6125823') - self.assertEqual(data.release, data.get('musicbrainz_albumid')) - self.assertEqual(data.artist, data.get('musicbrainz_artistid')) - self.assertEqual(data.recording, data.get('musicbrainz_trackid')) + def test_can_get_metadata_from_id3_mp3_file(self, *mocks): + path = os.path.join(DATA_DIR, 'test.mp3') + data = metadata.Metadata(path) + + self.assertEqual( + data.get('title'), + 'Bend' + ) + self.assertEqual( + data.get('artist'), + 'Binärpilot' + ) + self.assertEqual( + data.get('album'), + 'You Can\'t Stop Da Funk' + ) + self.assertEqual( + data.get('date'), + datetime.date(2006, 2, 7), + ) + self.assertEqual( + data.get('track_number'), + 1 + ) + + self.assertEqual( + data.get('musicbrainz_albumid'), + 'ce40cdb1-a562-4fd8-a269-9269f98d4124') + self.assertEqual( + data.get('musicbrainz_recordingid'), + 'f269d497-1cc0-4ae4-a0c4-157ec7d73fcb') + self.assertEqual( + data.get('musicbrainz_artistid'), + '9c6bddde-6228-4d9f-ad0d-03f6fcb19e13') diff --git a/api/funkwhale_api/providers/audiofile/importer.py b/api/funkwhale_api/providers/audiofile/importer.py index d95c120e1..9e1b0fb3f 100644 --- a/api/funkwhale_api/providers/audiofile/importer.py +++ b/api/funkwhale_api/providers/audiofile/importer.py @@ -9,41 +9,34 @@ from funkwhale_api.music import models, metadata @celery.app.task(name='audiofile.from_path') def from_path(path): data = metadata.Metadata(path) - artist = models.Artist.objects.get_or_create( name__iexact=data.get('artist'), - defaults={'name': data.get('artist')}, + defaults={ + 'name': data.get('artist'), + 'mbid': data.get('musicbrainz_artistid', None), + + }, )[0] - release_date = None - try: - year, month, day = data.get('date', None).split('-') - release_date = datetime.date( - int(year), int(month), int(day) - ) - except (ValueError, TypeError): - pass - + release_date = data.get('date', default=None) album = models.Album.objects.get_or_create( title__iexact=data.get('album'), artist=artist, defaults={ 'title': data.get('album'), 'release_date': release_date, + 'mbid': data.get('musicbrainz_albumid', None), }, )[0] - position = None - try: - position = int(data.get('tracknumber', None)) - except ValueError: - pass + position = data.get('track_number', default=None) track = models.Track.objects.get_or_create( title__iexact=data.get('title'), album=album, defaults={ 'title': data.get('title'), 'position': position, + 'mbid': data.get('musicbrainz_recordingid', None), }, )[0] diff --git a/api/funkwhale_api/providers/audiofile/tests/test_disk_import.py b/api/funkwhale_api/providers/audiofile/tests/test_disk_import.py index 26532a8c9..4a91a36eb 100644 --- a/api/funkwhale_api/providers/audiofile/tests/test_disk_import.py +++ b/api/funkwhale_api/providers/audiofile/tests/test_disk_import.py @@ -14,21 +14,37 @@ class TestAudioFile(TestCase): 'artist': ['Test artist'], 'album': ['Test album'], 'title': ['Test track'], - 'tracknumber': ['4'], - 'date': ['2012-08-15'] + 'TRACKNUMBER': ['4'], + 'date': ['2012-08-15'], + 'musicbrainz_albumid': ['a766da8b-8336-47aa-a3ee-371cc41ccc75'], + 'musicbrainz_trackid': ['bd21ac48-46d8-4e78-925f-d9cc2a294656'], + 'musicbrainz_artistid': ['013c8e5b-d72a-4cd3-8dee-6c64d6125823'], } - with unittest.mock.patch('mutagen.File', return_value=metadata): + m1 = unittest.mock.patch('mutagen.File', return_value=metadata) + m2 = unittest.mock.patch( + 'funkwhale_api.music.metadata.Metadata.get_file_type', + return_value='OggVorbis', + ) + with m1, m2: track_file = importer.from_path( os.path.join(DATA_DIR, 'dummy_file.ogg')) self.assertEqual( track_file.track.title, metadata['title'][0]) + self.assertEqual( + track_file.track.mbid, metadata['musicbrainz_trackid'][0]) self.assertEqual( track_file.track.position, 4) self.assertEqual( track_file.track.album.title, metadata['album'][0]) + self.assertEqual( + track_file.track.album.mbid, + metadata['musicbrainz_albumid'][0]) self.assertEqual( track_file.track.album.release_date, datetime.date(2012, 8, 15)) self.assertEqual( track_file.track.artist.name, metadata['artist'][0]) + self.assertEqual( + track_file.track.artist.mbid, + metadata['musicbrainz_artistid'][0]) diff --git a/docs/importing-music.rst b/docs/importing-music.rst index 29f33ffc7..5fdff7e93 100644 --- a/docs/importing-music.rst +++ b/docs/importing-music.rst @@ -14,14 +14,18 @@ least an ``artist``, ``album`` and ``title`` tag, you can import those tracks as docker-compose run --rm api python manage.py import_files "/music/**/*.ogg" --recursive --noinput +For the best results, we recommand tagging your music collection through +`Picard `_ in order to have the best quality metadata. + .. note:: This command is idempotent, meaning you can run it multiple times on the same files and already imported files will simply be skipped. -.. warning:: +.. note:: + + At the moment, only OGG/Vorbis and MP3 files with ID3 tags are supported - At the moment, only ogg files are supported. MP3 support will be implemented soon. Getting demo tracks ^^^^^^^^^^^^^^^^^^^