diff --git a/api/funkwhale_api/music/metadata.py b/api/funkwhale_api/music/metadata.py index f0ea67b1a..77f85aef5 100644 --- a/api/funkwhale_api/music/metadata.py +++ b/api/funkwhale_api/music/metadata.py @@ -2,6 +2,7 @@ import base64 import datetime import logging import pendulum +import re import mutagen._util import mutagen.oggtheora @@ -144,6 +145,7 @@ CONF = { "mbid": {"field": "musicbrainz_trackid"}, "license": {}, "copyright": {}, + "genre": {}, }, }, "OggVorbis": { @@ -162,6 +164,7 @@ CONF = { "mbid": {"field": "musicbrainz_trackid"}, "license": {}, "copyright": {}, + "genre": {}, "pictures": { "field": "metadata_block_picture", "to_application": clean_ogg_pictures, @@ -184,6 +187,7 @@ CONF = { "mbid": {"field": "MusicBrainz Track Id"}, "license": {}, "copyright": {}, + "genre": {}, }, }, "MP3": { @@ -199,6 +203,7 @@ CONF = { "date": {"field": "TDRC"}, "musicbrainz_albumid": {"field": "MusicBrainz Album Id"}, "musicbrainz_artistid": {"field": "MusicBrainz Artist Id"}, + "genre": {"field": "TCON"}, "musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"}, "mbid": {"field": "UFID", "getter": get_mp3_recording_id}, "pictures": {}, @@ -220,6 +225,7 @@ CONF = { "musicbrainz_albumid": {}, "musicbrainz_artistid": {}, "musicbrainz_albumartistid": {}, + "genre": {}, "mbid": {"field": "musicbrainz_trackid"}, "test": {}, "pictures": {}, @@ -485,6 +491,61 @@ class PermissiveDateField(serializers.CharField): return None +TAG_REGEX = re.compile(r"^((\w+)([\d_]*))$") + + +def extract_tags_from_genre(string): + tags = [] + delimiter = "@@@@@" + for d in [" - ", ",", ";", "/"]: + # Replace common tags separators by a custom delimiter + string = string.replace(d, delimiter) + + # loop on the parts (splitting on our custom delimiter) + for tag in string.split(delimiter): + tag = tag.strip() + for d in ["-"]: + # preparation for replacement so that Pop-Rock becomes Pop Rock, then PopRock + # (step 1, step 2 happens below) + tag = tag.replace(d, " ") + if not tag: + continue + final_tag = "" + if not TAG_REGEX.match(tag.replace(" ", "")): + # the string contains some non words chars ($, €, etc.), right now + # we simply skip such tags + continue + # concatenate the parts and uppercase them so that 'pop rock' becomes 'PopRock' + if len(tag.split(" ")) == 1: + # we append the tag "as is", because it doesn't contain any space + tags.append(tag) + continue + for part in tag.split(" "): + # the tag contains space, there's work to do to have consistent case + # 'pop rock' -> 'PopRock' + # (step 2) + if not part: + continue + final_tag += part[0].upper() + part[1:] + if final_tag: + tags.append(final_tag) + return tags + + +class TagsField(serializers.CharField): + def get_value(self, data): + return data + + def to_internal_value(self, data): + try: + value = data.get("genre") or "" + except TagNotFound: + return [] + value = super().to_internal_value(str(value)) + + return extract_tags_from_genre(value) + + class MBIDField(serializers.UUIDField): def __init__(self, *args, **kwargs): kwargs.setdefault("allow_null", True) @@ -533,6 +594,7 @@ class TrackMetadataSerializer(serializers.Serializer): copyright = serializers.CharField(allow_blank=True, allow_null=True, required=False) license = serializers.CharField(allow_blank=True, allow_null=True, required=False) mbid = MBIDField() + tags = TagsField(allow_blank=True, allow_null=True, required=False) album = AlbumField() artists = ArtistField() @@ -544,6 +606,7 @@ class TrackMetadataSerializer(serializers.Serializer): "position", "disc_number", "mbid", + "tags", ] def validate(self, validated_data): @@ -553,7 +616,7 @@ class TrackMetadataSerializer(serializers.Serializer): v = validated_data[field] except KeyError: continue - if v in ["", None]: + if v in ["", None, []]: validated_data.pop(field) return validated_data diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py index ff3cde440..7372f82c5 100644 --- a/api/funkwhale_api/music/tasks.py +++ b/api/funkwhale_api/music/tasks.py @@ -14,6 +14,7 @@ from requests.exceptions import RequestException from funkwhale_api.common import channels, preferences from funkwhale_api.federation import routes from funkwhale_api.federation import library as lb +from funkwhale_api.tags import models as tags_models from funkwhale_api.taskapp import celery from . import licenses @@ -541,10 +542,12 @@ def _get_track(data, attributed_to=None): if data.get("fdate"): defaults["creation_date"] = data.get("fdate") - track = get_best_candidate_or_create( + track, created = get_best_candidate_or_create( models.Track, query, defaults=defaults, sort_fields=["mbid", "fid"] - )[0] + ) + if created: + tags_models.add_tags(track, *data.get("tags", [])) return track diff --git a/api/tests/music/sample.flac b/api/tests/music/sample.flac index fe3ec6e4a..a8aafa392 100644 Binary files a/api/tests/music/sample.flac and b/api/tests/music/sample.flac differ diff --git a/api/tests/music/test.mp3 b/api/tests/music/test.mp3 index 8e7c0adbb..5f8dc2c72 100644 Binary files a/api/tests/music/test.mp3 and b/api/tests/music/test.mp3 differ diff --git a/api/tests/music/test.ogg b/api/tests/music/test.ogg index 5ca0a7f55..9975cd9fe 100644 Binary files a/api/tests/music/test.ogg and b/api/tests/music/test.ogg differ diff --git a/api/tests/music/test.opus b/api/tests/music/test.opus index ac39fd327..92634ce50 100644 Binary files a/api/tests/music/test.opus and b/api/tests/music/test.opus differ diff --git a/api/tests/music/test_metadata.py b/api/tests/music/test_metadata.py index 539fa49a2..121fef4b2 100644 --- a/api/tests/music/test_metadata.py +++ b/api/tests/music/test_metadata.py @@ -57,6 +57,7 @@ def test_can_get_metadata_all(): "musicbrainz_albumartistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823;5b4d7d2d-36df-4b38-95e3-a964234f520f", "license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/", "copyright": "Someone", + "genre": "Classical", } assert data.all() == expected @@ -249,6 +250,7 @@ def test_metadata_fallback_ogg_theora(mocker): "mbid": uuid.UUID("f269d497-1cc0-4ae4-a0c4-157ec7d73fcb"), "license": "https://creativecommons.org/licenses/by-nc-nd/2.5/", "copyright": "Someone", + "tags": ["Funk"], }, ), ( @@ -281,6 +283,7 @@ def test_metadata_fallback_ogg_theora(mocker): "mbid": uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656"), "license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/", "copyright": "Someone", + "tags": ["Classical"], }, ), ( @@ -313,6 +316,7 @@ def test_metadata_fallback_ogg_theora(mocker): "mbid": uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656"), "license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/", "copyright": "Someone", + "tags": ["Classical"], }, ), ( @@ -336,6 +340,7 @@ def test_metadata_fallback_ogg_theora(mocker): } ], }, + "tags": ["Rock"], "position": 1, "disc_number": 1, "mbid": uuid.UUID("124d0150-8627-46bc-bc14-789a3bc960c8"), @@ -371,6 +376,7 @@ def test_metadata_fallback_ogg_theora(mocker): "mbid": uuid.UUID("30f3f33e-8d0c-4e69-8539-cbd701d18f28"), "license": "http://creativecommons.org/licenses/by-nc-sa/3.0/us/", "copyright": "2008 nin", + "tags": ["Industrial"], }, ), ], @@ -607,3 +613,43 @@ def test_artist_field_featuring(): value = field.get_value(data) assert field.to_internal_value(value) == expected + + +@pytest.mark.parametrize( + "genre, expected_tags", + [ + ("Pop", ["Pop"]), + ("pop", ["pop"]), + ("Pop-Rock", ["PopRock"]), + ("Pop - Rock", ["Pop", "Rock"]), + ("Soundtrack - Cute Anime", ["Soundtrack", "CuteAnime"]), + ("Pop, Rock", ["Pop", "Rock"]), + ("Chanson française", ["ChansonFrançaise"]), + ("Unhandled❤️", []), + ("tag with non-breaking spaces", []), + ], +) +def test_acquire_tags_from_genre(genre, expected_tags): + data = { + "title": "Track Title", + "artist": "Track Artist", + "album": "Track Album", + "genre": genre, + } + expected = { + "title": "Track Title", + "artists": [{"name": "Track Artist", "mbid": None}], + "album": { + "title": "Track Album", + "mbid": None, + "release_date": None, + "artists": [], + }, + "cover_data": None, + } + if expected_tags: + expected["tags"] = expected_tags + + serializer = metadata.TrackMetadataSerializer(data=metadata.FakeMetadata(data)) + assert serializer.is_valid(raise_exception=True) is True + assert serializer.validated_data == expected diff --git a/api/tests/music/test_models.py b/api/tests/music/test_models.py index 5aa29b3cc..344273673 100644 --- a/api/tests/music/test_models.py +++ b/api/tests/music/test_models.py @@ -448,7 +448,7 @@ def test_get_audio_data(factories): result = upload.get_audio_data() - assert result == {"duration": 1, "bitrate": 112000, "size": 14858} + assert result == {"duration": 1, "bitrate": 112000, "size": 15918} def test_library_queryset_with_follows(factories): diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py index 78f4622ba..08beaa94e 100644 --- a/api/tests/music/test_tasks.py +++ b/api/tests/music/test_tasks.py @@ -18,6 +18,7 @@ DATA_DIR = os.path.dirname(os.path.abspath(__file__)) def test_can_create_track_from_file_metadata_no_mbid(db, mocker): + add_tags = mocker.patch("funkwhale_api.tags.models.add_tags") metadata = { "title": "Test track", "artists": [{"name": "Test artist"}], @@ -26,6 +27,7 @@ def test_can_create_track_from_file_metadata_no_mbid(db, mocker): "disc_number": 2, "license": "Hello world: http://creativecommons.org/licenses/by-sa/4.0/", "copyright": "2018 Someone", + "tags": ["Punk", "Rock"], } match_license = mocker.spy(licenses, "match") @@ -44,6 +46,7 @@ def test_can_create_track_from_file_metadata_no_mbid(db, mocker): assert track.artist.mbid is None assert track.artist.attributed_to is None match_license.assert_called_once_with(metadata["license"], metadata["copyright"]) + add_tags.assert_called_once_with(track, *metadata["tags"]) def test_can_create_track_from_file_metadata_attributed_to(factories, mocker): diff --git a/api/tests/music/with_other_picture.mp3 b/api/tests/music/with_other_picture.mp3 index 3118f067e..e83b610b0 100644 Binary files a/api/tests/music/with_other_picture.mp3 and b/api/tests/music/with_other_picture.mp3 differ