From 0ee0db7ea5d0bd8c24a5d0cbcccdba67844b898b Mon Sep 17 00:00:00 2001 From: Agate Date: Wed, 6 May 2020 00:23:42 +0200 Subject: [PATCH 1/5] Fix #1107: Fix HTML not including instance name in some situations --- api/funkwhale_api/common/middleware.py | 10 ++++++++++ api/tests/common/test_middleware.py | 11 +++++++---- changes/changelog.d/1107.enhancement | 1 + front/src/App.vue | 4 ++-- 4 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 changes/changelog.d/1107.enhancement diff --git a/api/funkwhale_api/common/middleware.py b/api/funkwhale_api/common/middleware.py index 1143255e3..de06fd1d4 100644 --- a/api/funkwhale_api/common/middleware.py +++ b/api/funkwhale_api/common/middleware.py @@ -39,6 +39,9 @@ def serve_spa(request): settings.FUNKWHALE_SPA_REWRITE_MANIFEST_URL or federation_utils.full_url(urls.reverse("api:v1:instance:spa-manifest")) ) + title = preferences.get("instance__name") + if title: + head = replace_title(head, title) head = replace_manifest_url(head, new_url) if not preferences.get("common__api_authentication_required"): @@ -82,6 +85,7 @@ def serve_spa(request): MANIFEST_LINK_REGEX = re.compile(r"<link [^>]*rel=(?:'|\")?manifest(?:'|\")?[^>]*>") +TITLE_REGEX = re.compile(r"<title>.*") def replace_manifest_url(head, new_url): @@ -90,6 +94,12 @@ def replace_manifest_url(head, new_url): return head +def replace_title(head, new_title): + replacement = "{}".format(html.escape(new_title)) + head = TITLE_REGEX.sub(replacement, head) + return head + + def get_spa_html(spa_url): return get_spa_file(spa_url, "index.html") diff --git a/api/tests/common/test_middleware.py b/api/tests/common/test_middleware.py index 40ff279cb..8f04ba318 100644 --- a/api/tests/common/test_middleware.py +++ b/api/tests/common/test_middleware.py @@ -1,6 +1,6 @@ +import html import time import pytest - from django.http import HttpResponse from django.urls import reverse @@ -55,10 +55,12 @@ def test_should_fallback(path, expected, mocker): def test_serve_spa_from_cache(mocker, settings, preferences, no_api_auth): - + preferences["instance__name"] = 'Best Funkwhale "pod"' request = mocker.Mock(path="/") get_spa_html = mocker.patch.object( - middleware, "get_spa_html", return_value="" + middleware, + "get_spa_html", + return_value="Funkwhale", ) mocker.patch.object( middleware, @@ -84,7 +86,8 @@ def test_serve_spa_from_cache(mocker, settings, preferences, no_api_auth): assert response.status_code == 200 expected = [ - "", + "" + "{}".format(html.escape(preferences["instance__name"])), '', '', '', diff --git a/changes/changelog.d/1107.enhancement b/changes/changelog.d/1107.enhancement new file mode 100644 index 000000000..2b4ff8863 --- /dev/null +++ b/changes/changelog.d/1107.enhancement @@ -0,0 +1 @@ +Fix HTML not including instance name in some situations (#1107) diff --git a/front/src/App.vue b/front/src/App.vue index f85d8e920..0cfbb5bdc 100644 --- a/front/src/App.vue +++ b/front/src/App.vue @@ -66,6 +66,7 @@ export default { instanceUrl: null, showShortcutsModal: false, showSetInstanceModal: false, + initialTitle: document.title, } }, async created () { @@ -147,7 +148,6 @@ export default { }, mounted () { let self = this - // slight hack to allow use to have internal links in <translate> tags // while preserving router behaviour document.documentElement.addEventListener('click', function (event) { @@ -281,7 +281,7 @@ export default { if (this.$store.state.ui.pageTitle) { parts.push(this.$store.state.ui.pageTitle) } - parts.push(this.$store.state.instance.settings.instance.name.value || 'Funkwhale') + parts.push(this.initialTitle || 'Funkwhale') document.title = parts.join(' – ') }, From be251ac37eb8a3e9bcaa757733724f4b3f863ee5 Mon Sep 17 00:00:00 2001 From: Agate <me@agate.blue> Date: Wed, 6 May 2020 00:26:23 +0200 Subject: [PATCH 2/5] Fixed espacing issue in instance name in footer --- front/src/components/Footer.vue | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/front/src/components/Footer.vue b/front/src/components/Footer.vue index 3b2442831..6e9f76167 100644 --- a/front/src/components/Footer.vue +++ b/front/src/components/Footer.vue @@ -4,10 +4,10 @@ <div class="ui stackable equal height stackable grid"> <section class="four wide column"> <h4 v-if="podName" class="ui header ellipsis"> - <translate translate-context="Footer/About/Title" :translate-params="{instanceName: podName}" >About %{instanceName}</translate> + <span v-translate="{instanceName: podName}" translate-context="Footer/About/Title">About %{instanceName}</span> </h4> <h4 v-else class="ui header ellipsis"> - <translate translate-context="Footer/About/Title" :translate-params="{instanceUrl: instanceHostname}" >About %{instanceUrl}</translate> + <span v-translate="{instanceUrl: instanceHostname}" translate-context="Footer/About/Title">About %{instanceUrl}</span> </h4> <div class="ui link list"> <router-link class="item" to="/about"> From fda56d582b3707d41c8c33972abba4ff0f215410 Mon Sep 17 00:00:00 2001 From: Creak <romain.failliot@foolstep.com> Date: Wed, 6 May 2020 00:30:31 +0200 Subject: [PATCH 3/5] Simplify all-in-one docker installation Use docker-compose only to deploy the service on the server. --- docs/installation/docker.rst | 135 +++++++++++++++-------------------- 1 file changed, 57 insertions(+), 78 deletions(-) diff --git a/docs/installation/docker.rst b/docs/installation/docker.rst index 7bb912b81..08c531480 100644 --- a/docs/installation/docker.rst +++ b/docs/installation/docker.rst @@ -23,7 +23,21 @@ Mono-container installation This installation method was contributed by @thetarkus, at https://github.com/thetarkus/docker-funkwhale -First, ensure you have `Docker <https://docs.docker.com/engine/installation/>`_ installed. +These are the installation steps: + +1. Install docker +2. Create ``funkwhale`` user +3. Create ``.env`` file +4. Create ``docker-compose.yml`` file +5. Start Funkwhale service + +Install docker +~~~~~~~~~~~~~~ + +Ensure you have `Docker <https://docs.docker.com/engine/installation/>`_ and `docker-compose <https://docs.docker.com/compose/install/>`_ installed. + +Create ``funkwhale`` user +~~~~~~~~~~~~~~~~~~~~~~~~~ Create the user and the directory: @@ -38,11 +52,10 @@ Log in as the newly created user from now on: sudo -u funkwhale -H bash -Export the `version you want <https://hub.docker.com/r/funkwhale/all-in-one/tags>`_ to deploy (e.g., ``0.19.1``): +Create ``.env`` file +~~~~~~~~~~~~~~~~~~~~ -.. parsed-literal:: - - export FUNKWHALE_VERSION="|version|" +Export the `version you want <https://hub.docker.com/r/funkwhale/all-in-one/tags>`_ to deploy (e.g., ``0.21``): Create an env file to store a few important configuration options: @@ -50,7 +63,7 @@ Create an env file to store a few important configuration options: touch .env chmod 600 .env # reduce permissions on the .env file since it contains sensitive data - cat > .env <<EOD + cat > .env << EOF # Replace 'your.funkwhale.example' with your actual domain FUNKWHALE_HOSTNAME=your.funkwhale.example # Protocol may also be: http @@ -65,29 +78,44 @@ Create an env file to store a few important configuration options: DJANGO_SECRET_KEY=$(openssl rand -hex 45) # Remove this if you expose the container directly on ports 80/443 NESTED_PROXY=1 - EOD + EOF +Create ``docker-compose.yml`` file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Then start the container: +.. code-block:: yaml -.. code-block:: shell - - docker run \ - --name=funkwhale \ - --restart=unless-stopped \ - --env-file=/srv/funkwhale/.env \ - -v /srv/funkwhale/data:/data \ - -v /path/to/your/music/dir:/music:ro \ - -e PUID=$UID \ - -e PGID=$GID \ - -p 5000:80 \ - -d \ - funkwhale/all-in-one:$FUNKWHALE_VERSION + version: "3" + services: + funkwhale: + container_name: funkwhale + restart: unless-stopped + # change version number here when you want to do an upgrade + image: funkwhale/all-in-one:|version| + env_file: .env + environment: + # adapt to the pid/gid that own /srv/funkwhale/data + - PUID=1000 + - PGID=1000 + volumes: + - /srv/funkwhale/data:/data + - /path/to/your/music/dir:/music:ro + ports: + - "5000:80" .. note:: - - ``-e PUID`` and ``-e PGID`` are optional but useful to prevent permission issues with docker volumes - - ``-v /path/to/your/music/dir`` should point to a path on your host were is located music you want to import in your Funkwhale instance. You can safely remove the volume if you don't want to import music that way. + - ``PUID`` and ``PGID`` are optional but useful to prevent permission issues with docker volumes + - ``/path/to/your/music/dir`` should point to a path on your host where music you would like to import is located. You can safely remove the volume if you don't want to import music that way. + +Start Funkwhale service +~~~~~~~~~~~~~~~~~~~~~~~ + +Start the container: + +.. code-block:: shell + + docker-compose up -d Your container should start in the background, and your instance be available at ``yourip:5000`` shortly. @@ -95,66 +123,17 @@ You will need an admin account to login and manage your account, create one usin Useful commands: +- You can start and stop your instance using ``docker-compose start`` and ``docker-compose stop``, respectively - You can examine the logs by running ``docker logs -f --tail=50 funkwhale`` -- You can start and stop your instance using ``docker start funkwhale`` and ``docker stop funkwhale``, respectively - To have a better idea of the resource usage of your instance (CPU, memory), run ``docker stats funkwhale`` +Now, you just need to configure your :ref:`reverse-proxy <reverse-proxy-setup>`. Don't worry, it's quite easy. + .. note:: - The container will not pick up changes made in .env file automatically. - In order to load new configuration, run: + To upgrade your service, change the version number in ``docker-compose.yml`` and re-run ``docker-compose up -d``. - .. parsed-literal:: - - export FUNKWHALE_VERSION="|version|" - - .. code-block:: shell - - # stop and remove the existing container - docker stop funkwhale - docker rm funkwhale - # relaunch a new container - docker run \ - --name=funkwhale \ - --restart=unless-stopped \ - --env-file=/srv/funkwhale/.env \ - -v /srv/funkwhale/data:/data \ - -v /path/to/your/music/dir:/music:ro \ - -e PUID=$UID \ - -e PGID=$GID \ - -p 5000:80 \ - -d \ - funkwhale/all-in-one:$FUNKWHALE_VERSION - - - You can use the following docker-compose file to make the management process easier: - - .. code-block:: yaml - - version: "3" - - services: - funkwhale: - container_name: funkwhale - restart: unless-stopped - # add the version number in your .env file, or hardcode it - image: funkwhale/all-in-one:${FUNKWHALE_VERSION} - env_file: .env - environment: - # adapt to the pid/gid that own /srv/funkwhale/data - - PUID=1000 - - PGID=1000 - volumes: - - /srv/funkwhale/data:/data - - /path/to/your/music/dir:/music:ro - ports: - - "5000:80" - - Then start the container: - - .. code-block:: shell - - docker-compose up -d + Don't forget you might have manual changes to do when upgrading to a newer version. .. _docker-multi-container: @@ -163,7 +142,7 @@ Multi-container installation First, ensure you have `Docker <https://docs.docker.com/engine/installation/>`_ and `docker-compose <https://docs.docker.com/compose/install/>`_ installed. -Export the `version you want <https://hub.docker.com/r/funkwhale/all-in-one/tags>`_ to deploy (e.g., ``0.19.1``): +Export the `version you want <https://hub.docker.com/r/funkwhale/all-in-one/tags>`_ to deploy (e.g., ``0.21``): .. parsed-literal:: From 0f9a2ae2ef2607499f5164208d742ae2ca8f8a40 Mon Sep 17 00:00:00 2001 From: Agate <me@agate.blue> Date: Wed, 6 May 2020 00:42:47 +0200 Subject: [PATCH 4/5] Fix #1091: page not refreshing when switching between My Library and Explore sections --- changes/changelog.d/1091.bugfix | 1 + front/src/components/library/Library.vue | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changes/changelog.d/1091.bugfix diff --git a/changes/changelog.d/1091.bugfix b/changes/changelog.d/1091.bugfix new file mode 100644 index 000000000..d51cd00df --- /dev/null +++ b/changes/changelog.d/1091.bugfix @@ -0,0 +1 @@ +Fixed page not refreshing when switching between My Library and Explore sections (#1091) diff --git a/front/src/components/library/Library.vue b/front/src/components/library/Library.vue index 2b22a4ca6..c9b162be0 100644 --- a/front/src/components/library/Library.vue +++ b/front/src/components/library/Library.vue @@ -1,6 +1,6 @@ <template> <div class="main library pusher"> - <router-view></router-view> + <router-view :key="$router.currentRoute.name"></router-view> </div> </template> From 752c993e8e910858a1dca0c935432d64c6e7d16e Mon Sep 17 00:00:00 2001 From: Agate <me@agate.blue> Date: Thu, 7 May 2020 09:55:29 +0200 Subject: [PATCH 5/5] Importer updates: watch directories, handle metadata updates --- api/config/settings/common.py | 3 + api/funkwhale_api/common/utils.py | 17 + .../music/management/commands/fix_uploads.py | 138 +++-- .../music/management/commands/import_files.py | 472 ++++++++++++++++-- .../migrations/0052_auto_20200505_0810.py | 23 + api/funkwhale_api/music/models.py | 22 +- api/funkwhale_api/music/tasks.py | 68 +++ api/requirements/base.txt | 1 + api/tests/common/test_utils.py | 9 + api/tests/music/test_commands.py | 26 +- api/tests/music/test_models.py | 12 + api/tests/music/test_tasks.py | 37 ++ api/tests/test_import_audio_file.py | 193 +++++++ changes/changelog.d/1093.bugfix | 2 +- changes/changelog.d/721.feature | 1 + docs/admin/importing-music.rst | 101 ++-- 16 files changed, 1005 insertions(+), 120 deletions(-) create mode 100644 api/funkwhale_api/music/migrations/0052_auto_20200505_0810.py create mode 100644 changes/changelog.d/721.feature diff --git a/api/config/settings/common.py b/api/config/settings/common.py index 675d3e8ce..9ba1ecd09 100644 --- a/api/config/settings/common.py +++ b/api/config/settings/common.py @@ -1309,3 +1309,6 @@ IGNORE_FORWARDED_HOST_AND_PROTO = env.bool( """ Use :attr:`FUNKWHALE_HOSTNAME` and :attr:`FUNKWHALE_PROTOCOL ` instead of request header. """ + +HASHING_ALGORITHM = "sha256" +HASHING_CHUNK_SIZE = 1024 * 100 diff --git a/api/funkwhale_api/common/utils.py b/api/funkwhale_api/common/utils.py index 0a38ef05f..2a91524d5 100644 --- a/api/funkwhale_api/common/utils.py +++ b/api/funkwhale_api/common/utils.py @@ -1,4 +1,5 @@ import datetime +import hashlib from django.core.files.base import ContentFile from django.http import request @@ -458,3 +459,19 @@ def monkey_patch_request_build_absolute_uri(): request.HttpRequest.scheme = property(scheme) request.HttpRequest.get_host = get_host + + +def get_file_hash(file, algo=None, chunk_size=None, full_read=False): + algo = algo or settings.HASHING_ALGORITHM + chunk_size = chunk_size or settings.HASHING_CHUNK_SIZE + handler = getattr(hashlib, algo) + hash = handler() + file.seek(0) + if full_read: + for byte_block in iter(lambda: file.read(chunk_size), b""): + hash.update(byte_block) + else: + # sometimes, it's useful to only hash the beginning of the file, e.g + # to avoid a lot of I/O when crawling large libraries + hash.update(file.read(chunk_size)) + return "{}:{}".format(algo, hash.hexdigest()) diff --git a/api/funkwhale_api/music/management/commands/fix_uploads.py b/api/funkwhale_api/music/management/commands/fix_uploads.py index 582a837c4..02f3d2232 100644 --- a/api/funkwhale_api/music/management/commands/fix_uploads.py +++ b/api/funkwhale_api/music/management/commands/fix_uploads.py @@ -2,6 +2,7 @@ from django.core.management.base import BaseCommand from django.db import transaction from django.db.models import Q +from funkwhale_api.common import utils as common_utils from funkwhale_api.music import models, utils @@ -17,9 +18,9 @@ class Command(BaseCommand): help="Do not execute anything", ) parser.add_argument( - "--mimetypes", + "--mimetype", action="store_true", - dest="mimetypes", + dest="mimetype", default=True, help="Check and fix mimetypes", ) @@ -37,16 +38,33 @@ class Command(BaseCommand): default=False, help="Check and fix file size, can be really slow because it needs to access files", ) + parser.add_argument( + "--checksum", + action="store_true", + dest="checksum", + default=False, + help="Check and fix file size, can be really slow because it needs to access files", + ) + parser.add_argument( + "--batch-size", + "-s", + dest="batch_size", + default=1000, + type=int, + help="Size of each updated batch", + ) def handle(self, *args, **options): if options["dry_run"]: self.stdout.write("Dry-run on, will not commit anything") - if options["mimetypes"]: + if options["mimetype"]: self.fix_mimetypes(**options) if options["data"]: self.fix_file_data(**options) if options["size"]: self.fix_file_size(**options) + if options["checksum"]: + self.fix_file_checksum(**options) @transaction.atomic def fix_mimetypes(self, dry_run, **kwargs): @@ -54,11 +72,12 @@ class Command(BaseCommand): matching = models.Upload.objects.filter( Q(source__startswith="file://") | Q(source__startswith="upload://") ).exclude(mimetype__startswith="audio/") + total = matching.count() self.stdout.write( - "[mimetypes] {} entries found with bad or no mimetype".format( - matching.count() - ) + "[mimetypes] {} entries found with bad or no mimetype".format(total) ) + if not total: + return for extension, mimetype in utils.EXTENSION_TO_MIMETYPE.items(): qs = matching.filter(source__endswith=".{}".format(extension)) self.stdout.write( @@ -81,24 +100,36 @@ class Command(BaseCommand): ) if dry_run: return - for i, upload in enumerate(matching.only("audio_file")): - self.stdout.write( - "[bitrate/length] {}/{} fixing file #{}".format(i + 1, total, upload.pk) - ) - try: - audio_file = upload.get_audio_file() - if audio_file: + chunks = common_utils.chunk_queryset( + matching.only("id", "audio_file", "source"), kwargs["batch_size"] + ) + handled = 0 + for chunk in chunks: + updated = [] + for upload in chunk: + handled += 1 + self.stdout.write( + "[bitrate/length] {}/{} fixing file #{}".format( + handled, total, upload.pk + ) + ) + + try: + audio_file = upload.get_audio_file() data = utils.get_audio_file_data(audio_file) upload.bitrate = data["bitrate"] upload.duration = data["length"] - upload.save(update_fields=["duration", "bitrate"]) + except Exception as e: + self.stderr.write( + "[bitrate/length] error with file #{}: {}".format( + upload.pk, str(e) + ) + ) else: - self.stderr.write("[bitrate/length] no file found") - except Exception as e: - self.stderr.write( - "[bitrate/length] error with file #{}: {}".format(upload.pk, str(e)) - ) + updated.append(upload) + + models.Upload.objects.bulk_update(updated, ["bitrate", "duration"]) def fix_file_size(self, dry_run, **kwargs): self.stdout.write("Fixing missing size...") @@ -107,15 +138,64 @@ class Command(BaseCommand): self.stdout.write("[size] {} entries found with missing values".format(total)) if dry_run: return - for i, upload in enumerate(matching.only("size")): - self.stdout.write( - "[size] {}/{} fixing file #{}".format(i + 1, total, upload.pk) - ) - try: - upload.size = upload.get_file_size() - upload.save(update_fields=["size"]) - except Exception as e: - self.stderr.write( - "[size] error with file #{}: {}".format(upload.pk, str(e)) + chunks = common_utils.chunk_queryset( + matching.only("id", "audio_file", "source"), kwargs["batch_size"] + ) + handled = 0 + for chunk in chunks: + updated = [] + for upload in chunk: + handled += 1 + + self.stdout.write( + "[size] {}/{} fixing file #{}".format(handled, total, upload.pk) ) + + try: + upload.size = upload.get_file_size() + except Exception as e: + self.stderr.write( + "[size] error with file #{}: {}".format(upload.pk, str(e)) + ) + else: + updated.append(upload) + + models.Upload.objects.bulk_update(updated, ["size"]) + + def fix_file_checksum(self, dry_run, **kwargs): + self.stdout.write("Fixing missing checksums...") + matching = models.Upload.objects.filter( + Q(checksum=None) + & (Q(audio_file__isnull=False) | Q(source__startswith="file://")) + ) + total = matching.count() + self.stdout.write( + "[checksum] {} entries found with missing values".format(total) + ) + if dry_run: + return + chunks = common_utils.chunk_queryset( + matching.only("id", "audio_file", "source"), kwargs["batch_size"] + ) + handled = 0 + for chunk in chunks: + updated = [] + for upload in chunk: + handled += 1 + self.stdout.write( + "[checksum] {}/{} fixing file #{}".format(handled, total, upload.pk) + ) + + try: + upload.checksum = common_utils.get_file_hash( + upload.get_audio_file() + ) + except Exception as e: + self.stderr.write( + "[checksum] error with file #{}: {}".format(upload.pk, str(e)) + ) + else: + updated.append(upload) + + models.Upload.objects.bulk_update(updated, ["checksum"]) diff --git a/api/funkwhale_api/music/management/commands/import_files.py b/api/funkwhale_api/music/management/commands/import_files.py index cbffca63d..fab980510 100644 --- a/api/funkwhale_api/music/management/commands/import_files.py +++ b/api/funkwhale_api/music/management/commands/import_files.py @@ -1,17 +1,29 @@ +import collections +import datetime import itertools import os -import urllib.parse +import queue +import threading import time +import urllib.parse + +import watchdog.events +import watchdog.observers from django.conf import settings from django.core.files import File +from django.core.management import call_command from django.core.management.base import BaseCommand, CommandError +from django.db.models import Q from django.utils import timezone +from rest_framework import serializers + +from funkwhale_api.common import utils as common_utils from funkwhale_api.music import models, tasks, utils -def crawl_dir(dir, extensions, recursive=True): +def crawl_dir(dir, extensions, recursive=True, ignored=[]): if os.path.isfile(dir): yield dir return @@ -20,9 +32,12 @@ def crawl_dir(dir, extensions, recursive=True): if entry.is_file(): for e in extensions: if entry.name.lower().endswith(".{}".format(e.lower())): - yield entry.path + if entry.path not in ignored: + yield entry.path elif recursive and entry.is_dir(): - yield from crawl_dir(entry, extensions, recursive=recursive) + yield from crawl_dir( + entry, extensions, recursive=recursive, ignored=ignored + ) def batch(iterable, n=1): @@ -116,6 +131,17 @@ class Command(BaseCommand): "of overhead on your server and on servers you are federating with." ), ) + parser.add_argument( + "--watch", + action="store_true", + dest="watch", + default=False, + help=( + "Start the command in watch mode. Instead of running a full import, " + "and exit, watch the given path and import new files, remove deleted " + "files, and update metadata corresponding to updated files." + ), + ) parser.add_argument("-e", "--extension", nargs="+") parser.add_argument( @@ -128,6 +154,15 @@ class Command(BaseCommand): "This causes some overhead, so it's disabled by default." ), ) + parser.add_argument( + "--prune", + action="store_true", + dest="prune", + default=False, + help=( + "Once the import is completed, prune tracks, ablums and artists that aren't linked to any upload." + ), + ) parser.add_argument( "--reference", @@ -157,6 +192,8 @@ class Command(BaseCommand): ) def handle(self, *args, **options): + # handle relative directories + options["path"] = [os.path.abspath(path) for path in options["path"]] self.is_confirmed = False try: library = models.Library.objects.select_related("actor__user").get( @@ -182,22 +219,12 @@ class Command(BaseCommand): ) if p and not import_path.startswith(p): raise CommandError( - "Importing in-place only works if importing" + "Importing in-place only works if importing " "from {} (MUSIC_DIRECTORY_PATH), as this directory" "needs to be accessible by the webserver." "Culprit: {}".format(p, import_path) ) - extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS - crawler = itertools.chain( - *[ - crawl_dir(p, extensions=extensions, recursive=options["recursive"]) - for p in options["path"] - ] - ) - errors = [] - total = 0 - start_time = time.time() reference = options["reference"] or "cli-{}".format(timezone.now().isoformat()) import_url = "{}://{}/library/{}/upload?{}" @@ -212,8 +239,62 @@ class Command(BaseCommand): reference, import_url ) ) + extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS + if options["watch"]: + if len(options["path"]) > 1: + raise CommandError("Watch only work with a single directory") + + return self.setup_watcher( + extensions=extensions, + path=options["path"][0], + reference=reference, + library=library, + in_place=options["in_place"], + prune=options["prune"], + recursive=options["recursive"], + replace=options["replace"], + dispatch_outbox=options["outbox"], + broadcast=options["broadcast"], + ) + + update = True + checked_paths = set() + if options["in_place"] and update: + self.stdout.write("Checking existing files for updates…") + message = ( + "Are you sure you want to do this?\n\n" + "Type 'yes' to continue, or 'no' to skip checking for updates in " + "already imported files: " + ) + if options["interactive"] and input("".join(message)) != "yes": + pass + else: + checked_paths = check_updates( + stdout=self.stdout, + paths=options["path"], + extensions=extensions, + library=library, + batch_size=options["batch_size"], + ) + self.stdout.write("Existing files checked, moving on to next step!") + + crawler = itertools.chain( + *[ + crawl_dir( + p, + extensions=extensions, + recursive=options["recursive"], + ignored=checked_paths, + ) + for p in options["path"] + ] + ) + errors = [] + total = 0 + start_time = time.time() batch_start = None batch_duration = None + self.stdout.write("Starting import of new files…") for i, entries in enumerate(batch(crawler, options["batch_size"])): total += len(entries) batch_start = time.time() @@ -225,7 +306,7 @@ class Command(BaseCommand): if entries: self.stdout.write( "Handling batch {} ({} items){}".format( - i + 1, options["batch_size"], time_stats, + i + 1, len(entries), time_stats, ) ) batch_errors = self.handle_batch( @@ -240,9 +321,9 @@ class Command(BaseCommand): batch_duration = time.time() - batch_start - message = "Successfully imported {} tracks in {}s" + message = "Successfully imported {} new tracks in {}s" if options["async_"]: - message = "Successfully launched import for {} tracks in {}s" + message = "Successfully launched import for {} new tracks in {}s" self.stdout.write( message.format(total - len(errors), int(time.time() - start_time)) @@ -259,6 +340,12 @@ class Command(BaseCommand): ) ) + if options["prune"]: + self.stdout.write( + "Pruning dangling tracks, albums and artists from library…" + ) + prune() + def handle_batch(self, library, paths, batch, reference, options): matching = [] for m in paths: @@ -362,15 +449,15 @@ class Command(BaseCommand): message.format(batch=batch, path=path, i=i + 1, total=len(paths)) ) try: - self.create_upload( - path, - reference, - library, - async_, - options["replace"], - options["in_place"], - options["outbox"], - options["broadcast"], + create_upload( + path=path, + reference=reference, + library=library, + async_=async_, + replace=options["replace"], + in_place=options["in_place"], + dispatch_outbox=options["outbox"], + broadcast=options["broadcast"], ) except Exception as e: if options["exit_on_failure"]: @@ -382,34 +469,311 @@ class Command(BaseCommand): errors.append((path, "{} {}".format(e.__class__.__name__, e))) return errors - def create_upload( - self, - path, - reference, - library, - async_, - replace, - in_place, - dispatch_outbox, - broadcast, - ): - import_handler = tasks.process_upload.delay if async_ else tasks.process_upload - upload = models.Upload(library=library, import_reference=reference) - upload.source = "file://" + path - upload.import_metadata = { - "funkwhale": { - "config": { - "replace": replace, - "dispatch_outbox": dispatch_outbox, - "broadcast": broadcast, - } + def setup_watcher(self, path, extensions, recursive, **kwargs): + watchdog_queue = queue.Queue() + # Set up a worker thread to process database load + worker = threading.Thread( + target=process_load_queue(self.stdout, **kwargs), args=(watchdog_queue,), + ) + worker.setDaemon(True) + worker.start() + + # setup watchdog to monitor directory for trigger files + patterns = ["*.{}".format(e) for e in extensions] + event_handler = Watcher( + stdout=self.stdout, queue=watchdog_queue, patterns=patterns, + ) + observer = watchdog.observers.Observer() + observer.schedule(event_handler, path, recursive=recursive) + observer.start() + + try: + while True: + self.stdout.write( + "Watching for changes at {}…".format(path), ending="\r" + ) + time.sleep(10) + if kwargs["prune"] and GLOBAL["need_pruning"]: + self.stdout.write("Some files were deleted, pruning library…") + prune() + GLOBAL["need_pruning"] = False + except KeyboardInterrupt: + self.stdout.write("Exiting…") + observer.stop() + + observer.join() + + +GLOBAL = {"need_pruning": False} + + +def prune(): + call_command( + "prune_library", + dry_run=False, + prune_artists=True, + prune_albums=True, + prune_tracks=True, + ) + + +def create_upload( + path, reference, library, async_, replace, in_place, dispatch_outbox, broadcast, +): + import_handler = tasks.process_upload.delay if async_ else tasks.process_upload + upload = models.Upload(library=library, import_reference=reference) + upload.source = "file://" + path + upload.import_metadata = { + "funkwhale": { + "config": { + "replace": replace, + "dispatch_outbox": dispatch_outbox, + "broadcast": broadcast, } } - if not in_place: - name = os.path.basename(path) - with open(path, "rb") as f: - upload.audio_file.save(name, File(f), save=False) + } + if not in_place: + name = os.path.basename(path) + with open(path, "rb") as f: + upload.audio_file.save(name, File(f), save=False) - upload.save() + upload.save() - import_handler(upload_id=upload.pk) + import_handler(upload_id=upload.pk) + + +def process_load_queue(stdout, **kwargs): + def inner(q): + # we batch events, to avoid calling same methods multiple times if a file is modified + # a lot in a really short time + flush_delay = 2 + batched_events = collections.OrderedDict() + while True: + while True: + if not q.empty(): + event = q.get() + batched_events[event["path"]] = event + else: + break + for path, event in batched_events.copy().items(): + if time.time() - event["time"] <= flush_delay: + continue + now = datetime.datetime.utcnow() + stdout.write( + "{} -- Processing {}:{}...\n".format( + now.strftime("%Y/%m/%d %H:%M:%S"), event["type"], event["path"] + ) + ) + del batched_events[path] + handle_event(event, stdout=stdout, **kwargs) + time.sleep(1) + + return inner + + +class Watcher(watchdog.events.PatternMatchingEventHandler): + def __init__(self, stdout, queue, patterns): + self.stdout = stdout + self.queue = queue + super().__init__(patterns=patterns) + + def enqueue(self, event): + e = { + "is_directory": event.is_directory, + "type": event.event_type, + "path": event.src_path, + "src_path": event.src_path, + "dest_path": getattr(event, "dest_path", None), + "time": time.time(), + } + self.queue.put(e) + + def on_moved(self, event): + self.enqueue(event) + + def on_created(self, event): + self.enqueue(event) + + def on_deleted(self, event): + self.enqueue(event) + + def on_modified(self, event): + self.enqueue(event) + + +def handle_event(event, stdout, **kwargs): + handlers = { + "modified": handle_modified, + "created": handle_created, + "moved": handle_moved, + "deleted": handle_deleted, + } + handlers[event["type"]](event=event, stdout=stdout, **kwargs) + + +def handle_modified(event, stdout, library, in_place, **kwargs): + existing_candidates = library.uploads.filter(import_status="finished") + with open(event["path"], "rb") as f: + checksum = common_utils.get_file_hash(f) + + existing = existing_candidates.filter(checksum=checksum).first() + if existing: + # found an existing file with same checksum, nothing to do + stdout.write(" File already imported and metadata is up-to-date") + return + + to_update = None + if in_place: + source = "file://{}".format(event["path"]) + to_update = ( + existing_candidates.in_place() + .filter(source=source) + .select_related( + "track__attributed_to", "track__artist", "track__album__artist", + ) + .first() + ) + if to_update: + if ( + to_update.track.attributed_to + and to_update.track.attributed_to != library.actor + ): + stdout.write( + " Cannot update track metadata, track belongs to someone else".format( + to_update.pk + ) + ) + return + else: + stdout.write( + " Updating existing file #{} with new metadata…".format( + to_update.pk + ) + ) + audio_metadata = to_update.get_metadata() + try: + tasks.update_track_metadata(audio_metadata, to_update.track) + except serializers.ValidationError as e: + stdout.write(" Invalid metadata: {}".format(e)) + else: + to_update.checksum = checksum + to_update.save(update_fields=["checksum"]) + return + + stdout.write(" Launching import for new file") + create_upload( + path=event["path"], + reference=kwargs["reference"], + library=library, + async_=False, + replace=kwargs["replace"], + in_place=in_place, + dispatch_outbox=kwargs["dispatch_outbox"], + broadcast=kwargs["broadcast"], + ) + + +def handle_created(event, stdout, **kwargs): + """ + Created is essentially an alias for modified, because for instance when copying a file in the watched directory, + a created event will be fired on the initial touch, then many modified event (as the file is written). + """ + return handle_modified(event, stdout, **kwargs) + + +def handle_moved(event, stdout, library, in_place, **kwargs): + if not in_place: + return + + old_source = "file://{}".format(event["src_path"]) + new_source = "file://{}".format(event["dest_path"]) + existing_candidates = library.uploads.filter(import_status="finished") + existing_candidates = existing_candidates.in_place().filter(source=old_source) + existing = existing_candidates.first() + if existing: + stdout.write(" Updating path of existing file #{}".format(existing.pk)) + existing.source = new_source + existing.save(update_fields=["source"]) + + +def handle_deleted(event, stdout, library, in_place, **kwargs): + if not in_place: + return + source = "file://{}".format(event["path"]) + existing_candidates = library.uploads.filter(import_status="finished") + existing_candidates = existing_candidates.in_place().filter(source=source) + if existing_candidates.count(): + stdout.write(" Removing file from DB") + existing_candidates.delete() + GLOBAL["need_pruning"] = True + + +def check_updates(stdout, library, extensions, paths, batch_size): + existing = ( + library.uploads.in_place() + .filter(import_status="finished") + .exclude(checksum=None) + .select_related("library", "track") + ) + queries = [] + checked_paths = set() + for path in paths: + for ext in extensions: + queries.append( + Q(source__startswith="file://{}".format(path)) + & Q(source__endswith=".{}".format(ext)) + ) + query, remainder = queries[0], queries[1:] + for q in remainder: + query = q | query + existing = existing.filter(query) + total = existing.count() + stdout.write("Found {} files to check in database!".format(total)) + uploads = existing.order_by("source") + for i, rows in enumerate(batch(uploads.iterator(), batch_size)): + stdout.write("Handling batch {} ({} items)".format(i + 1, len(rows),)) + + for upload in rows: + + check_upload(stdout, upload) + checked_paths.add(upload.source.replace("file://", "", 1)) + + return checked_paths + + +def check_upload(stdout, upload): + try: + audio_file = upload.get_audio_file() + except FileNotFoundError: + stdout.write( + " Removing file #{} missing from disk at {}".format( + upload.pk, upload.source + ) + ) + return upload.delete() + + checksum = common_utils.get_file_hash(audio_file) + if upload.checksum != checksum: + stdout.write( + " File #{} at {} was modified, updating metadata…".format( + upload.pk, upload.source + ) + ) + if upload.library.actor_id != upload.track.attributed_to_id: + stdout.write( + " Cannot update track metadata, track belongs to someone else".format( + upload.pk + ) + ) + else: + track = models.Track.objects.select_related("artist", "album__artist").get( + pk=upload.track_id + ) + try: + tasks.update_track_metadata(upload.get_metadata(), track) + except serializers.ValidationError as e: + stdout.write(" Invalid metadata: {}".format(e)) + return + else: + upload.checksum = checksum + upload.save(update_fields=["checksum"]) diff --git a/api/funkwhale_api/music/migrations/0052_auto_20200505_0810.py b/api/funkwhale_api/music/migrations/0052_auto_20200505_0810.py new file mode 100644 index 000000000..2a0526643 --- /dev/null +++ b/api/funkwhale_api/music/migrations/0052_auto_20200505_0810.py @@ -0,0 +1,23 @@ +# Generated by Django 3.0.4 on 2020-05-05 08:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('music', '0051_auto_20200319_1249'), + ] + + operations = [ + migrations.AddField( + model_name='upload', + name='checksum', + field=models.CharField(blank=True, db_index=True, max_length=100, null=True), + ), + migrations.AlterField( + model_name='uploadversion', + name='mimetype', + field=models.CharField(choices=[('audio/mp3', 'mp3'), ('audio/mpeg3', 'mp3'), ('audio/x-mp3', 'mp3'), ('audio/mpeg', 'mp3'), ('video/ogg', 'ogg'), ('audio/ogg', 'ogg'), ('audio/opus', 'opus'), ('audio/x-m4a', 'aac'), ('audio/x-m4a', 'm4a'), ('audio/x-flac', 'flac'), ('audio/flac', 'flac')], max_length=50), + ), + ] diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py index ecb616b41..43371d5d8 100644 --- a/api/funkwhale_api/music/models.py +++ b/api/funkwhale_api/music/models.py @@ -655,6 +655,14 @@ class Track(APIModelMixin): class UploadQuerySet(common_models.NullsLastQuerySet): + def in_place(self, include=True): + query = models.Q(source__startswith="file://") & ( + models.Q(audio_file="") | models.Q(audio_file=None) + ) + if not include: + query = ~query + return self.filter(query) + def playable_by(self, actor, include=True): libraries = Library.objects.viewable_by(actor) @@ -754,6 +762,9 @@ class Upload(models.Model): ) downloads_count = models.PositiveIntegerField(default=0) + # stores checksums such as `sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855` + checksum = models.CharField(max_length=100, db_index=True, null=True, blank=True) + objects = UploadQuerySet.as_manager() @property @@ -833,7 +844,7 @@ class Upload(models.Model): def get_audio_file(self): if self.audio_file: return self.audio_file.open() - if self.source.startswith("file://"): + if self.source and self.source.startswith("file://"): return open(self.source.replace("file://", "", 1), "rb") def get_audio_data(self): @@ -866,6 +877,15 @@ class Upload(models.Model): self.mimetype = mimetypes.guess_type(self.source)[0] if not self.size and self.audio_file: self.size = self.audio_file.size + if not self.checksum: + try: + audio_file = self.get_audio_file() + except FileNotFoundError: + pass + else: + if audio_file: + self.checksum = common_utils.get_file_hash(audio_file) + if not self.pk and not self.fid and self.library.actor.get_user(): self.fid = self.get_federation_id() return super().save(**kwargs) diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py index 89ee1a88f..722a9eefb 100644 --- a/api/funkwhale_api/music/tasks.py +++ b/api/funkwhale_api/music/tasks.py @@ -851,3 +851,71 @@ def update_library_entity(obj, data): obj.save(update_fields=list(data.keys())) return obj + + +UPDATE_CONFIG = { + "track": { + "position": {}, + "title": {}, + "mbid": {}, + "disc_number": {}, + "copyright": {}, + "license": { + "getter": lambda data, field: licenses.match( + data.get("license"), data.get("copyright") + ) + }, + }, + "album": {"title": {}, "mbid": {}, "release_date": {}}, + "artist": {"name": {}, "mbid": {}}, + "album_artist": {"name": {}, "mbid": {}}, +} + + +@transaction.atomic +def update_track_metadata(audio_metadata, track): + # XXX: implement this to support updating metadata when an imported files + # is updated by an outside tool (e.g beets). + serializer = metadata.TrackMetadataSerializer(data=audio_metadata) + serializer.is_valid(raise_exception=True) + new_data = serializer.validated_data + + to_update = [ + ("track", track, lambda data: data), + ("album", track.album, lambda data: data["album"]), + ("artist", track.artist, lambda data: data["artists"][0]), + ( + "album_artist", + track.album.artist if track.album else None, + lambda data: data["album"]["artists"][0], + ), + ] + for id, obj, data_getter in to_update: + if not obj: + continue + obj_updated_fields = [] + try: + obj_data = data_getter(new_data) + except IndexError: + continue + for field, config in UPDATE_CONFIG[id].items(): + getter = config.get( + "getter", lambda data, field: data[config.get("field", field)] + ) + try: + new_value = getter(obj_data, field) + except KeyError: + continue + old_value = getattr(obj, field) + if new_value == old_value: + continue + obj_updated_fields.append(field) + setattr(obj, field, new_value) + + if obj_updated_fields: + obj.save(update_fields=obj_updated_fields) + + if track.album and "album" in new_data and new_data["album"].get("cover_data"): + common_utils.attach_file( + track.album, "attachment_cover", new_data["album"].get("cover_data") + ) diff --git a/api/requirements/base.txt b/api/requirements/base.txt index ecb3d9b5e..6e31f857e 100644 --- a/api/requirements/base.txt +++ b/api/requirements/base.txt @@ -83,3 +83,4 @@ service_identity==18.1.0 markdown>=3.2,<4 bleach>=3,<4 feedparser==6.0.0b3 +watchdog==0.10.2 diff --git a/api/tests/common/test_utils.py b/api/tests/common/test_utils.py index ffe9eae3b..303c8cbca 100644 --- a/api/tests/common/test_utils.py +++ b/api/tests/common/test_utils.py @@ -258,3 +258,12 @@ def test_monkey_patch_request_build_absolute_uri( request = fake_request.get("/", **meta) assert request.build_absolute_uri(path) == expected + + +def test_get_file_hash(tmpfile, settings): + settings.HASHING_ALGORITHM = "sha256" + content = b"hello" + tmpfile.write(content) + # echo -n "hello" | sha256sum + expected = "sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" + assert utils.get_file_hash(tmpfile) == expected diff --git a/api/tests/music/test_commands.py b/api/tests/music/test_commands.py index a08f1b10b..329dcffb5 100644 --- a/api/tests/music/test_commands.py +++ b/api/tests/music/test_commands.py @@ -1,6 +1,7 @@ import os import pytest +from funkwhale_api.common import utils as common_utils from funkwhale_api.music.management.commands import check_inplace_files from funkwhale_api.music.management.commands import fix_uploads from funkwhale_api.music.management.commands import prune_library @@ -18,7 +19,7 @@ def test_fix_uploads_bitrate_length(factories, mocker): return_value={"bitrate": 42, "length": 43}, ) - c.fix_file_data(dry_run=False) + c.fix_file_data(dry_run=False, batch_size=100) upload1.refresh_from_db() upload2.refresh_from_db() @@ -41,7 +42,7 @@ def test_fix_uploads_size(factories, mocker): mocker.patch("funkwhale_api.music.models.Upload.get_file_size", return_value=2) - c.fix_file_size(dry_run=False) + c.fix_file_size(dry_run=False, batch_size=100) upload1.refresh_from_db() upload2.refresh_from_db() @@ -69,7 +70,7 @@ def test_fix_uploads_mimetype(factories, mocker): mimetype="audio/something", ) c = fix_uploads.Command() - c.fix_mimetypes(dry_run=False) + c.fix_mimetypes(dry_run=False, batch_size=100) upload1.refresh_from_db() upload2.refresh_from_db() @@ -78,6 +79,25 @@ def test_fix_uploads_mimetype(factories, mocker): assert upload2.mimetype == "audio/something" +def test_fix_uploads_checksum(factories, mocker): + upload1 = factories["music.Upload"]() + upload2 = factories["music.Upload"]() + upload1.__class__.objects.filter(pk=upload1.pk).update(checksum="test") + upload2.__class__.objects.filter(pk=upload2.pk).update(checksum=None) + c = fix_uploads.Command() + + c.fix_file_checksum(dry_run=False, batch_size=100) + + upload1.refresh_from_db() + upload2.refresh_from_db() + + # not updated + assert upload1.checksum == "test" + + # updated + assert upload2.checksum == common_utils.get_file_hash(upload2.audio_file) + + def test_prune_library_dry_run(factories): prunable = factories["music.Track"]() not_prunable = factories["music.Track"]() diff --git a/api/tests/music/test_models.py b/api/tests/music/test_models.py index 74d5e9604..d8e3bb444 100644 --- a/api/tests/music/test_models.py +++ b/api/tests/music/test_models.py @@ -5,6 +5,7 @@ import pytest from django.utils import timezone from django.urls import reverse +from funkwhale_api.common import utils as common_utils from funkwhale_api.music import importers, models, tasks from funkwhale_api.federation import utils as federation_utils @@ -164,6 +165,17 @@ def test_audio_track_mime_type(extention, mimetype, factories): assert upload.mimetype == mimetype +@pytest.mark.parametrize("name", ["test.ogg", "test.mp3"]) +def test_audio_track_checksum(name, factories): + + path = os.path.join(DATA_DIR, name) + upload = factories["music.Upload"](audio_file__from_path=path, mimetype=None) + + with open(path, "rb") as f: + expected = common_utils.get_file_hash(f) + assert upload.checksum == expected + + def test_upload_file_name(factories): name = "test.mp3" path = os.path.join(DATA_DIR, name) diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py index 4fbe6b513..96ba451aa 100644 --- a/api/tests/music/test_tasks.py +++ b/api/tests/music/test_tasks.py @@ -1329,3 +1329,40 @@ def test_can_import_track_with_same_position_in_same_discs_skipped(factories, mo new_upload.refresh_from_db() assert new_upload.import_status == "skipped" + + +def test_update_track_metadata(factories): + track = factories["music.Track"]() + data = { + "title": "Peer Gynt Suite no. 1, op. 46: I. Morning", + "artist": "Edvard Grieg", + "album_artist": "Edvard Grieg; Musopen Symphony Orchestra", + "album": "Peer Gynt Suite no. 1, op. 46", + "date": "2012-08-15", + "position": "4", + "disc_number": "2", + "musicbrainz_albumid": "a766da8b-8336-47aa-a3ee-371cc41ccc75", + "mbid": "bd21ac48-46d8-4e78-925f-d9cc2a294656", + "musicbrainz_artistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823", + "musicbrainz_albumartistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823;5b4d7d2d-36df-4b38-95e3-a964234f520f", + "license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/", + "copyright": "Someone", + "comment": "hello there", + } + tasks.update_track_metadata(metadata.FakeMetadata(data), track) + + track.refresh_from_db() + + assert track.title == data["title"] + assert track.position == int(data["position"]) + assert track.disc_number == int(data["disc_number"]) + assert track.license.code == "cc-by-sa-4.0" + assert track.copyright == data["copyright"] + assert str(track.mbid) == data["mbid"] + assert track.album.title == data["album"] + assert track.album.release_date == datetime.date(2012, 8, 15) + assert str(track.album.mbid) == data["musicbrainz_albumid"] + assert track.artist.name == data["artist"] + assert str(track.artist.mbid) == data["musicbrainz_artistid"] + assert track.album.artist.name == "Edvard Grieg" + assert str(track.album.artist.mbid) == "013c8e5b-d72a-4cd3-8dee-6c64d6125823" diff --git a/api/tests/test_import_audio_file.py b/api/tests/test_import_audio_file.py index c7d5a976c..04c06a8f4 100644 --- a/api/tests/test_import_audio_file.py +++ b/api/tests/test_import_audio_file.py @@ -4,6 +4,8 @@ import pytest from django.core.management import call_command from django.core.management.base import CommandError +from funkwhale_api.common import utils as common_utils +from funkwhale_api.music.management.commands import import_files DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "files") @@ -159,3 +161,194 @@ def test_import_files_in_place(factories, mocker, settings): def test_storage_rename_utf_8_files(factories): upload = factories["music.Upload"](audio_file__filename="été.ogg") assert upload.audio_file.name.endswith("ete.ogg") + + +@pytest.mark.parametrize("name", ["modified", "moved", "created", "deleted"]) +def test_handle_event(name, mocker): + handler = mocker.patch.object(import_files, "handle_{}".format(name)) + + event = {"type": name} + stdout = mocker.Mock() + kwargs = {"hello": "world"} + import_files.handle_event(event, stdout, **kwargs) + + handler.assert_called_once_with(event=event, stdout=stdout, **kwargs) + + +def test_handle_created(mocker): + handle_modified = mocker.patch.object(import_files, "handle_modified") + + event = mocker.Mock() + stdout = mocker.Mock() + kwargs = {"hello": "world"} + import_files.handle_created(event, stdout, **kwargs) + + handle_modified.assert_called_once_with(event, stdout, **kwargs) + + +def test_handle_deleted(factories, mocker): + stdout = mocker.Mock() + event = { + "path": "/path.mp3", + } + library = factories["music.Library"]() + deleted = factories["music.Upload"]( + library=library, + source="file://{}".format(event["path"]), + import_status="finished", + audio_file=None, + ) + kept = [ + factories["music.Upload"]( + library=library, + source="file://{}".format(event["path"]), + import_status="finished", + ), + factories["music.Upload"]( + source="file://{}".format(event["path"]), + import_status="finished", + audio_file=None, + ), + ] + + import_files.handle_deleted( + event=event, stdout=stdout, library=library, in_place=True + ) + + with pytest.raises(deleted.DoesNotExist): + deleted.refresh_from_db() + + for upload in kept: + upload.refresh_from_db() + + +def test_handle_moved(factories, mocker): + stdout = mocker.Mock() + event = { + "src_path": "/path.mp3", + "dest_path": "/new_path.mp3", + } + library = factories["music.Library"]() + updated = factories["music.Upload"]( + library=library, + source="file://{}".format(event["src_path"]), + import_status="finished", + audio_file=None, + ) + untouched = [ + factories["music.Upload"]( + library=library, + source="file://{}".format(event["src_path"]), + import_status="finished", + ), + factories["music.Upload"]( + source="file://{}".format(event["src_path"]), + import_status="finished", + audio_file=None, + ), + ] + + import_files.handle_moved( + event=event, stdout=stdout, library=library, in_place=True + ) + + updated.refresh_from_db() + assert updated.source == "file://{}".format(event["dest_path"]) + for upload in untouched: + source = upload.source + upload.refresh_from_db() + assert source == upload.source + + +def test_handle_modified_creates_upload(tmpfile, factories, mocker): + stdout = mocker.Mock() + event = { + "path": tmpfile.name, + } + process_upload = mocker.patch("funkwhale_api.music.tasks.process_upload") + library = factories["music.Library"]() + import_files.handle_modified( + event=event, + stdout=stdout, + library=library, + in_place=True, + reference="hello", + replace=False, + dispatch_outbox=False, + broadcast=False, + ) + upload = library.uploads.latest("id") + assert upload.source == "file://{}".format(event["path"]) + + process_upload.assert_called_once_with(upload_id=upload.pk) + + +def test_handle_modified_skips_existing_checksum(tmpfile, factories, mocker): + stdout = mocker.Mock() + event = { + "path": tmpfile.name, + } + tmpfile.write(b"hello") + + library = factories["music.Library"]() + factories["music.Upload"]( + checksum=common_utils.get_file_hash(tmpfile), + library=library, + import_status="finished", + ) + import_files.handle_modified( + event=event, stdout=stdout, library=library, in_place=True, + ) + assert library.uploads.count() == 1 + + +def test_handle_modified_update_existing_path_if_found(tmpfile, factories, mocker): + stdout = mocker.Mock() + event = { + "path": tmpfile.name, + } + update_track_metadata = mocker.patch( + "funkwhale_api.music.tasks.update_track_metadata" + ) + get_metadata = mocker.patch("funkwhale_api.music.models.Upload.get_metadata") + library = factories["music.Library"]() + track = factories["music.Track"](attributed_to=library.actor) + upload = factories["music.Upload"]( + source="file://{}".format(event["path"]), + track=track, + checksum="old", + library=library, + import_status="finished", + audio_file=None, + ) + import_files.handle_modified( + event=event, stdout=stdout, library=library, in_place=True, + ) + update_track_metadata.assert_called_once_with( + get_metadata.return_value, upload.track, + ) + + +def test_handle_modified_update_existing_path_if_found_and_attributed_to( + tmpfile, factories, mocker +): + stdout = mocker.Mock() + event = { + "path": tmpfile.name, + } + update_track_metadata = mocker.patch( + "funkwhale_api.music.tasks.update_track_metadata" + ) + library = factories["music.Library"]() + factories["music.Upload"]( + source="file://{}".format(event["path"]), + checksum="old", + library=library, + track__attributed_to=factories["federation.Actor"](), + import_status="finished", + audio_file=None, + ) + import_files.handle_modified( + event=event, stdout=stdout, library=library, in_place=True, + ) + update_track_metadata.assert_not_called() diff --git a/changes/changelog.d/1093.bugfix b/changes/changelog.d/1093.bugfix index 1d9a15e59..0295841af 100644 --- a/changes/changelog.d/1093.bugfix +++ b/changes/changelog.d/1093.bugfix @@ -1 +1 @@ -Fixed mimetype detection issue that broke transcoding on some tracks (#1093). Run ``python manage.py fix_uploads --mimetypes`` to set proper mimetypes on existing uploads. +Fixed mimetype detection issue that broke transcoding on some tracks (#1093). Run ``python manage.py fix_uploads --mimetype`` to set proper mimetypes on existing uploads. diff --git a/changes/changelog.d/721.feature b/changes/changelog.d/721.feature new file mode 100644 index 000000000..332f992eb --- /dev/null +++ b/changes/changelog.d/721.feature @@ -0,0 +1 @@ +Support a --watch mode with ``import_files`` to automatically add, update and remove files when filesystem is updated (#721) diff --git a/docs/admin/importing-music.rst b/docs/admin/importing-music.rst index bc0a642cb..bff23845b 100644 --- a/docs/admin/importing-music.rst +++ b/docs/admin/importing-music.rst @@ -1,15 +1,21 @@ -Importing music -================ +Importing music from the server +=============================== -From music directory on the server ----------------------------------- - -You can import music files in Funkwhale assuming they are located on the server -and readable by the Funkwhale application. Your music files should contain at +Funkwhale can import music files that are located on the server assuming +they readable by the Funkwhale application. Your music files should contain at least an ``artist``, ``album`` and ``title`` tags, but we recommend you tag it extensively using a proper tool, such as Beets or Musicbrainz Picard. -You can import those tracks as follows, assuming they are located in +Funkwhale supports two different import modes: + +- copy (the default): files are copied into Funkwhale's internal storage. This means importing a 1GB library will result in the same amount of space being used by Funkwhale. +- :ref:`in-place <in-place-import>` (when the ``--in-place`` is provided): files are referenced in Funkwhale's DB but not copied or touched in anyway. This is useful if you have a huge library, or one that is updated by an external tool such as Beets.. + +.. note:: + + In Funkwhale 1.0, **the default behaviour will change to in-place import** + +Regardless of the mode you're choosing, import works as described below, assuming your files are located in ``/srv/funkwhale/data/music``: .. code-block:: bash @@ -17,6 +23,17 @@ You can import those tracks as follows, assuming they are located in export LIBRARY_ID="<your_libary_id>" python api/manage.py import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput +.. note:: + You'll have to create a library in the Web UI before to get your library ID. Simply visit + https://yourdomain/content/libraries/ to create one. + + Library IDs are available in library urls or sharing link. In this example: + https://funkwhale.instance/content/libraries/769a2ae3-eb3d-4aff-9f94-2c4d80d5c2d1, + the library ID is 769a2bc3-eb1d-4aff-9f84-2c4d80d5c2d1 + + You can use only the first characters of the ID when calling the command, like that: + ``export LIBRARY_ID="769a2bc3"`` + When you use docker, the ``/srv/funkwhale/data/music`` is mounted from the host to the ``/music`` directory on the container: @@ -32,16 +49,6 @@ When you installed Funkwhale via ansible, you need to call a script instead of P export LIBRARY_ID="<your_libary_id>" /srv/funkwhale/manage import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput -.. note:: - You'll have to create a library in the Web UI before to get your library ID. Simply visit - https://yourdomain/content/libraries/ to create one. - - Library IDs are available in library urls or sharing link. In this example: - https://funkwhale.instance/content/libraries/769a2ae3-eb3d-4aff-9f94-2c4d80d5c2d1, - the library ID is 769a2bc3-eb1d-4aff-9f84-2c4d80d5c2d1 - - You can use only the first characters of the ID when calling the command, like that: - ``export LIBRARY_ID="769a2bc3"`` The import command supports several options, and you can check the help to get details:: @@ -63,6 +70,7 @@ get details:: At the moment, only Flac, OGG/Vorbis and MP3 files with ID3 tags are supported + .. _in-place-import: In-place import @@ -88,14 +96,6 @@ configuration options to ensure the webserver can serve them properly: - :ref:`setting-MUSIC_DIRECTORY_PATH` - :ref:`setting-MUSIC_DIRECTORY_SERVE_PATH` -.. warning:: - - While in-place import is faster and less disk-space-hungry, it's also - more fragile: if, for some reason, you move or rename the source files, - Funkwhale will not be able to serve those files anymore. - - Thus, be especially careful when you manipulate the source files. - We recommend you symlink all your music directories into ``/srv/funkwhale/data/music`` and run the `import_files` command from that directory. This will make it possible to use multiple music directories, without any additional configuration @@ -134,6 +134,49 @@ If you want to go with symlinks, ensure each symlinked directory is mounted as a # add your symlinked dirs here - /media/nfsshare:/media/nfsshare:ro +Metadata updates +^^^^^^^^^^^^^^^^ + +When doing an import with in ``in-place`` mode, the importer will also check and update existing entries +found in the database. For instance, if a file was imported, the ID3 Title tag was updated, and you rerun a scan, +Funkwhale will pick up the new title. The following fields can be updated this way: + +- Track mbid +- Track title +- Track position and disc number +- Track license and copyright +- Album cover +- Album title +- Album mbid +- Album release date +- Artist name +- Artist mbid +- Album artist name +- Album artist mbid + + +React to filesystem events with ``--watch`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you have a really big library or one that is updated quite often, running the ``import_files`` command by hand +may not be practical. To help with this use case, the ``import_files`` command supports a ``--watch`` flag that will observes filesystem events +instead of performing a full import. + +File creation, move, update and removal are handled when ``--watch`` is provided: + +- Files created in the watched directory are imported immediatly +- If using ``in-place`` mode, files updates trigger a metadata update on the corresponding entries +- If using ``in-place`` mode, files that are moved and known by Funkwhale will see their path updated in Funkwhale's DB +- If using ``in-place`` mode, files that are removed and known by Funkwhale will be removed from Funkwhale's DB + +Pruning dangling metadata with ``--prune`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Funkwhale is, by design, conservative with music metadata in its database. If you remove a file from Funkwhale's DB, +the corresponding artist, album and track object won't be deleted by default. + +If you want to prune dangling metadata from the database once the ``import_files`` command is over, simply add the ``--prune`` flag. +This also works in with ``--watch``. Album covers ^^^^^^^^^^^^ @@ -159,9 +202,3 @@ under creative commons (courtesy of Jamendo): ./download-tracks.sh music.txt This will download a bunch of zip archives (one per album) under the ``data/music`` directory and unzip their content. - -From other instances --------------------- - -Funkwhale also supports importing music from other instances. Please refer -to :doc:`../federation/index` for more details.