From 7bc8109732a426316c806b2b240213e93567c62f Mon Sep 17 00:00:00 2001 From: Eliot Berriot Date: Fri, 12 Jul 2019 15:06:39 +0200 Subject: [PATCH] See #432: tags acquisition (from audio files) --- api/funkwhale_api/music/metadata.py | 65 ++++++++++++++++++++++++- api/funkwhale_api/music/tasks.py | 7 ++- api/tests/music/sample.flac | Bin 91522 -> 91522 bytes api/tests/music/test.mp3 | Bin 297745 -> 297745 bytes api/tests/music/test.ogg | Bin 14858 -> 15918 bytes api/tests/music/test.opus | Bin 14583 -> 15643 bytes api/tests/music/test_metadata.py | 46 +++++++++++++++++ api/tests/music/test_models.py | 2 +- api/tests/music/test_tasks.py | 3 ++ api/tests/music/with_other_picture.mp3 | Bin 116211 -> 116339 bytes 10 files changed, 119 insertions(+), 4 deletions(-) diff --git a/api/funkwhale_api/music/metadata.py b/api/funkwhale_api/music/metadata.py index f0ea67b1a..77f85aef5 100644 --- a/api/funkwhale_api/music/metadata.py +++ b/api/funkwhale_api/music/metadata.py @@ -2,6 +2,7 @@ import base64 import datetime import logging import pendulum +import re import mutagen._util import mutagen.oggtheora @@ -144,6 +145,7 @@ CONF = { "mbid": {"field": "musicbrainz_trackid"}, "license": {}, "copyright": {}, + "genre": {}, }, }, "OggVorbis": { @@ -162,6 +164,7 @@ CONF = { "mbid": {"field": "musicbrainz_trackid"}, "license": {}, "copyright": {}, + "genre": {}, "pictures": { "field": "metadata_block_picture", "to_application": clean_ogg_pictures, @@ -184,6 +187,7 @@ CONF = { "mbid": {"field": "MusicBrainz Track Id"}, "license": {}, "copyright": {}, + "genre": {}, }, }, "MP3": { @@ -199,6 +203,7 @@ CONF = { "date": {"field": "TDRC"}, "musicbrainz_albumid": {"field": "MusicBrainz Album Id"}, "musicbrainz_artistid": {"field": "MusicBrainz Artist Id"}, + "genre": {"field": "TCON"}, "musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"}, "mbid": {"field": "UFID", "getter": get_mp3_recording_id}, "pictures": {}, @@ -220,6 +225,7 @@ CONF = { "musicbrainz_albumid": {}, "musicbrainz_artistid": {}, "musicbrainz_albumartistid": {}, + "genre": {}, "mbid": {"field": "musicbrainz_trackid"}, "test": {}, "pictures": {}, @@ -485,6 +491,61 @@ class PermissiveDateField(serializers.CharField): return None +TAG_REGEX = re.compile(r"^((\w+)([\d_]*))$") + + +def extract_tags_from_genre(string): + tags = [] + delimiter = "@@@@@" + for d in [" - ", ",", ";", "/"]: + # Replace common tags separators by a custom delimiter + string = string.replace(d, delimiter) + + # loop on the parts (splitting on our custom delimiter) + for tag in string.split(delimiter): + tag = tag.strip() + for d in ["-"]: + # preparation for replacement so that Pop-Rock becomes Pop Rock, then PopRock + # (step 1, step 2 happens below) + tag = tag.replace(d, " ") + if not tag: + continue + final_tag = "" + if not TAG_REGEX.match(tag.replace(" ", "")): + # the string contains some non words chars ($, €, etc.), right now + # we simply skip such tags + continue + # concatenate the parts and uppercase them so that 'pop rock' becomes 'PopRock' + if len(tag.split(" ")) == 1: + # we append the tag "as is", because it doesn't contain any space + tags.append(tag) + continue + for part in tag.split(" "): + # the tag contains space, there's work to do to have consistent case + # 'pop rock' -> 'PopRock' + # (step 2) + if not part: + continue + final_tag += part[0].upper() + part[1:] + if final_tag: + tags.append(final_tag) + return tags + + +class TagsField(serializers.CharField): + def get_value(self, data): + return data + + def to_internal_value(self, data): + try: + value = data.get("genre") or "" + except TagNotFound: + return [] + value = super().to_internal_value(str(value)) + + return extract_tags_from_genre(value) + + class MBIDField(serializers.UUIDField): def __init__(self, *args, **kwargs): kwargs.setdefault("allow_null", True) @@ -533,6 +594,7 @@ class TrackMetadataSerializer(serializers.Serializer): copyright = serializers.CharField(allow_blank=True, allow_null=True, required=False) license = serializers.CharField(allow_blank=True, allow_null=True, required=False) mbid = MBIDField() + tags = TagsField(allow_blank=True, allow_null=True, required=False) album = AlbumField() artists = ArtistField() @@ -544,6 +606,7 @@ class TrackMetadataSerializer(serializers.Serializer): "position", "disc_number", "mbid", + "tags", ] def validate(self, validated_data): @@ -553,7 +616,7 @@ class TrackMetadataSerializer(serializers.Serializer): v = validated_data[field] except KeyError: continue - if v in ["", None]: + if v in ["", None, []]: validated_data.pop(field) return validated_data diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py index ff3cde440..7372f82c5 100644 --- a/api/funkwhale_api/music/tasks.py +++ b/api/funkwhale_api/music/tasks.py @@ -14,6 +14,7 @@ from requests.exceptions import RequestException from funkwhale_api.common import channels, preferences from funkwhale_api.federation import routes from funkwhale_api.federation import library as lb +from funkwhale_api.tags import models as tags_models from funkwhale_api.taskapp import celery from . import licenses @@ -541,10 +542,12 @@ def _get_track(data, attributed_to=None): if data.get("fdate"): defaults["creation_date"] = data.get("fdate") - track = get_best_candidate_or_create( + track, created = get_best_candidate_or_create( models.Track, query, defaults=defaults, sort_fields=["mbid", "fid"] - )[0] + ) + if created: + tags_models.add_tags(track, *data.get("tags", [])) return track diff --git a/api/tests/music/sample.flac b/api/tests/music/sample.flac index fe3ec6e4a270da0ff0a3f1ca677da4990ddb0ea5..a8aafa39239a199663c6673c678064826ecf965b 100644 GIT binary patch delta 493 zcmYk2&1xGl6osvs%&LEB0ts%Rb?L4&W~RZ84N-`!szHfH<4PK8ca>>ulYz;kBypF8 zrY}%)7g_Wv3Wd;J3*DGJKwctb)d#3#=+8nRopbKJpYG{JaC#AZT3P&Zt5T_m?A@rb zR&O-xQ5daGHn%6AtKbC`4!rwM?%F`2_!gH$^chk3Ss1pQsF+|xGt=)foE2}z2XN`F zFW#{Z(OlwA7)JC((v4rj(2wEF+k|;kU0Sey%zI2>QeFe+b*&WB87i>n1ToJm;Ef9Nn~Wca>?}v0#xapZP`+x?=+&pt)sW!8-i(@jXXH zl;PT7nU}*FFZcFqGRuo&_z-M7;zlssZnvM><1o2a6sOwIM%Ze$)|wIgLe#ju&)WYP z>I9m^T`q{>iNxW&6}FgE4D)g@K0Z0J-~L-Q!?R#+VGe!v{x}WJrvbWt?fF;dKX%cT KfA(#FroRCN6@!xi delta 508 zcmY+AyGjE=6oxa|-NHK>Vn_s8u=B=lFwuYm*^H}>?j`O{&{!p!6@?fjM2nR11rknS zWup%fd;$@%u&`7tt$YFtXQG0_3=IGI|8veatp}y`ptR>AS3{vt$SR*7q!#B>>3Ln# zlb!6wv`~p_=c#2d>N;49av`YgpHzZ6{%42l@TM#5j55o_nM`JuUtzb39wpRgnAL+? zL-m4Y?W8`+3!Z72hHnzL;4rU10;X_`(}-Mqa8&(hR%jO(sP z*Qmqi|8G3NOFr>vo^j?z4en4DX3#Qxh7)}gqMem1*zhEe#|1AjIDxzNv&A}ptI0)Z zBD;su^I~=wsDAb66xuQdEXSgM{(}|sCN5X^&W=G1j{`zcNQbMSE71|Lh#>p{f=4;g uxgo&hWjVo1@G2!hQPh3`{XPHUMd`jM=wlbj-KP?ImoDE*D1LccLHAz*?tpaw diff --git a/api/tests/music/test.mp3 b/api/tests/music/test.mp3 index 8e7c0adbbc1a36f51bd32cf4be12c8e3326f1007..5f8dc2c727d63873ed7cada78379030b9cadb0d7 100644 GIT binary patch delta 82 zcmbO@O=#jYp$T)vL!ABn7#J9M85kJ<|8rv~WyoX52J$zqSj@;5#1O*Z$q)=AgMmU5 gbtE?zG4&KSbCtAnl`sM^6A&|R=PF@g6K7-v0H9_R>Hq)$ delta 75 zcmbO@O=#jYp$T)jJp+7#85kHCfoS8F#f*GK3?&Si48=gQ7$`hBkx_DU9aB%?WQ9VW YX0DQUt`bHdW&&d7?OY`+Y~qan0TO={e*gdg diff --git a/api/tests/music/test.ogg b/api/tests/music/test.ogg index 5ca0a7f55bec448220af35a368ec557b2f42fae6..9975cd9fe447824358da6dc9a89780c61ade8aee 100644 GIT binary patch delta 262 zcmeAwSywY5fX`)jLxJRfD7ZQ?N`p~xVzbZW{VbxBKeH%LJbz^JB1Xr_GE8^sc^McO zd>oxzeQbS8i}MRo^Z0=RE{-9twnheqM!E(Tx`w9QK%rpgAkTmhTc5;|JV78g$koTy zG1%4FKh!TIDAG0}oChcv;veGZ*^i~r7~}$XSHB=vTj!j_;^NHY#2m2cAdO%nY)wEe a0+|ls8ZvHHA%~xfvK3f}Mjr143+l5=-(XpJ#Mb z<_C(oIEJ{|8W|WG=^9w*8k+I~g?t>HTzzbPON;XhQYXtXi%k|_x?{)#lnC(;arANV z40aB-H3TUH^Fx9hoxOu?O+X4ja(ONXc+P=T|5 iKxB}oyGMv^aDHxTeqQQiK34gOFO(S>H(PUlHUR+dVL2TD diff --git a/api/tests/music/test.opus b/api/tests/music/test.opus index ac39fd3270e27f5cadaf0927472ad17926dfcff6..92634ce507bbe0bc0f0d36b7ea6abe34965cafe9 100644 GIT binary patch delta 419 zcmexfIJ;_sE8m?nT3a~(L&4>V{*o+;3=9ktt34+-u!v5+$)Y&%$dSp@8677JGTo`? z1xooiI=TAT`j!^w7o_I#0|i_hLtJf*3=EBQ4J>pGO}T+W!OlUR0U@?Ni6wc0KyHw$ zkE>&_tFwQoUr11-ZA3T^P%y+l#L>saGuS!U))2%8G5tb)om_)#4MFyRkQ3Zp{eoO=opTb4i!+lGbHK)fbb`&W eH32yZWITjxIGNExW3qsSeZVKuN3j!v#Vw!Wps`30$Y zf1UWi;2iux}6oBOX zLVcZFgKP~adol-4KF{1Sc@DGj#P{Np^H^*}LB>1#2Sf&Wx_gA!2IuFd=I5nOUd>`6 V05%?E62##q6VEAbmgD?u3;>k5O4tAZ diff --git a/api/tests/music/test_metadata.py b/api/tests/music/test_metadata.py index 539fa49a2..121fef4b2 100644 --- a/api/tests/music/test_metadata.py +++ b/api/tests/music/test_metadata.py @@ -57,6 +57,7 @@ def test_can_get_metadata_all(): "musicbrainz_albumartistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823;5b4d7d2d-36df-4b38-95e3-a964234f520f", "license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/", "copyright": "Someone", + "genre": "Classical", } assert data.all() == expected @@ -249,6 +250,7 @@ def test_metadata_fallback_ogg_theora(mocker): "mbid": uuid.UUID("f269d497-1cc0-4ae4-a0c4-157ec7d73fcb"), "license": "https://creativecommons.org/licenses/by-nc-nd/2.5/", "copyright": "Someone", + "tags": ["Funk"], }, ), ( @@ -281,6 +283,7 @@ def test_metadata_fallback_ogg_theora(mocker): "mbid": uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656"), "license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/", "copyright": "Someone", + "tags": ["Classical"], }, ), ( @@ -313,6 +316,7 @@ def test_metadata_fallback_ogg_theora(mocker): "mbid": uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656"), "license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/", "copyright": "Someone", + "tags": ["Classical"], }, ), ( @@ -336,6 +340,7 @@ def test_metadata_fallback_ogg_theora(mocker): } ], }, + "tags": ["Rock"], "position": 1, "disc_number": 1, "mbid": uuid.UUID("124d0150-8627-46bc-bc14-789a3bc960c8"), @@ -371,6 +376,7 @@ def test_metadata_fallback_ogg_theora(mocker): "mbid": uuid.UUID("30f3f33e-8d0c-4e69-8539-cbd701d18f28"), "license": "http://creativecommons.org/licenses/by-nc-sa/3.0/us/", "copyright": "2008 nin", + "tags": ["Industrial"], }, ), ], @@ -607,3 +613,43 @@ def test_artist_field_featuring(): value = field.get_value(data) assert field.to_internal_value(value) == expected + + +@pytest.mark.parametrize( + "genre, expected_tags", + [ + ("Pop", ["Pop"]), + ("pop", ["pop"]), + ("Pop-Rock", ["PopRock"]), + ("Pop - Rock", ["Pop", "Rock"]), + ("Soundtrack - Cute Anime", ["Soundtrack", "CuteAnime"]), + ("Pop, Rock", ["Pop", "Rock"]), + ("Chanson française", ["ChansonFrançaise"]), + ("Unhandled❤️", []), + ("tag with non-breaking spaces", []), + ], +) +def test_acquire_tags_from_genre(genre, expected_tags): + data = { + "title": "Track Title", + "artist": "Track Artist", + "album": "Track Album", + "genre": genre, + } + expected = { + "title": "Track Title", + "artists": [{"name": "Track Artist", "mbid": None}], + "album": { + "title": "Track Album", + "mbid": None, + "release_date": None, + "artists": [], + }, + "cover_data": None, + } + if expected_tags: + expected["tags"] = expected_tags + + serializer = metadata.TrackMetadataSerializer(data=metadata.FakeMetadata(data)) + assert serializer.is_valid(raise_exception=True) is True + assert serializer.validated_data == expected diff --git a/api/tests/music/test_models.py b/api/tests/music/test_models.py index 5aa29b3cc..344273673 100644 --- a/api/tests/music/test_models.py +++ b/api/tests/music/test_models.py @@ -448,7 +448,7 @@ def test_get_audio_data(factories): result = upload.get_audio_data() - assert result == {"duration": 1, "bitrate": 112000, "size": 14858} + assert result == {"duration": 1, "bitrate": 112000, "size": 15918} def test_library_queryset_with_follows(factories): diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py index 78f4622ba..08beaa94e 100644 --- a/api/tests/music/test_tasks.py +++ b/api/tests/music/test_tasks.py @@ -18,6 +18,7 @@ DATA_DIR = os.path.dirname(os.path.abspath(__file__)) def test_can_create_track_from_file_metadata_no_mbid(db, mocker): + add_tags = mocker.patch("funkwhale_api.tags.models.add_tags") metadata = { "title": "Test track", "artists": [{"name": "Test artist"}], @@ -26,6 +27,7 @@ def test_can_create_track_from_file_metadata_no_mbid(db, mocker): "disc_number": 2, "license": "Hello world: http://creativecommons.org/licenses/by-sa/4.0/", "copyright": "2018 Someone", + "tags": ["Punk", "Rock"], } match_license = mocker.spy(licenses, "match") @@ -44,6 +46,7 @@ def test_can_create_track_from_file_metadata_no_mbid(db, mocker): assert track.artist.mbid is None assert track.artist.attributed_to is None match_license.assert_called_once_with(metadata["license"], metadata["copyright"]) + add_tags.assert_called_once_with(track, *metadata["tags"]) def test_can_create_track_from_file_metadata_attributed_to(factories, mocker): diff --git a/api/tests/music/with_other_picture.mp3 b/api/tests/music/with_other_picture.mp3 index 3118f067e37064f91f48f4265fac9745236169e0..e83b610b09126c26384fa48428584c8fe255f5bf 100644 GIT binary patch delta 1888 zcmbuAO-~b16oyYBNHlS!8%@xOi9utKcKSiPY^Q}(fi}z#%W~)kX@m}zVnh>X;ldc* z`#(&SjsL*E!G<+^@HuzNfQUdMleu&6x#ynqp7*?$9(*1@_%a?=%jr=Ox%TFsSM_XW zH$@Uhhb3uCPg<qYC0vJQ(I&iVLIhiE_$&!DKnB3h3>6X zmSVGwF)2AUJGDBWTLzCy<_(GqNZ1w$$+)a&qupn~(OoS)(4+N=2EMR(*F?Y2rO{}_ zQ9P;1lB}>kIK}J3kLy!(WRjG@Xd2~1rtO1|^v`{JYxo_Oc^k9v5!1g*cE`Yqn z+E+){t;CLc{JZN;^FtHTqd7H0rfT5aW=HOijeCF+A`Cr)VFv_0 zwy3x>wLLLMid5!No6G2G{!0=?ryMEbUzB;!&B>xHQ*22p#L@sMYRd4$#S~1c1!T1a zz9z1HaA`Sj;i(8)tL$xvMgL_7DIx{pw3)>*s#lF+^sEiiY#Jtj>W;~)y;wg9S^Fzw z_zzqZ*5;(Lxy7jWNAFROEPViLmWI4IiM%%CwrSk9aO)xk3!e23Yh@X(NtUuZdv?gCLuF~X_8{qr1K@r z65BQyI+*j!G)vAvEDUEIR(W<6qs7JOg)K($@e&lV*bv@XL`MIALeh@>C!$XmB04QC>bJO1eIpzF zk9;hcCMr;_ZEE%nBT88$oizB;u%Rr?z*~};%1T+ua%P5=o>8V^md?i%9^c8+pcIp5 zZfuhug%eCEa;=g#Pvo4*s;yL=PVwf!sv7pekTacYWAVY`W7ns{nViv2v0il~@$J2g zh>D+q!Nh##BV&b@OpALu5T#1d^x$V@R^-nk!TXfhKfGsU`a z1edz^54i9TND?1r@Z zL#WnLiA3R`L!iD{)p7FcYc`F?X^z67{|e-5Ak%Gz!sc!Z%6m}mr-kA3c{(-P z+;~wWhrsCvXA5MTps+pkp}bM;n`UWalVa0Bq2C{Xya95MPE?vGSlp}7mGqKNz*36B z(P#wHb&$gTa32A|u8da3FBuKy#7pu5QOSWnEW3jc?eaW9VXxB$$}P~_@j;yPvC&g9 z<|qt;&RJuZAL)j>Md%hW&83;!m6@PV(YcfsuiSxNngK51<736MGsBDKCNxXG2?_Rh zUhnlm*Oi4=1hk#;;Rh)|d@ zMsTF0gNd>%O{pzRPM5pS7H^do`~yWOW(pY*%o{evOiLGYYJ1p&kM>;22>qMU^NWt> zc~xFKVMkI*g>6b*%%rC=m!`y~rEM)D;wfT*mvVkjaAqsAT!jB0usI8k`y6W--%gem z(+VaW%4ozjj-=z2*s?8wx#OBxJH%=*M|17L@h^_xU$ije;;^C-HxjOAn^@S&!j?