Importer updates: watch directories, handle metadata updates

2020-05-07 09:55:29 +02:00 · 2020-05-07 09:55:29 +02:00 · 6eb049b2d9
commit 6eb049b2d9
--- a/api/config/settings/common.py
+++ b/api/config/settings/common.py
@ -1309,3 +1309,6 @@ IGNORE_FORWARDED_HOST_AND_PROTO = env.bool(
 """
 Use :attr:`FUNKWHALE_HOSTNAME` and :attr:`FUNKWHALE_PROTOCOL ` instead of request header.
 """
+
+HASHING_ALGORITHM = "sha256"
+HASHING_CHUNK_SIZE = 1024 * 100
--- a/api/funkwhale_api/common/utils.py
+++ b/api/funkwhale_api/common/utils.py
@ -1,4 +1,5 @@
 import datetime
+import hashlib

 from django.core.files.base import ContentFile
 from django.http import request
@ -458,3 +459,19 @@ def monkey_patch_request_build_absolute_uri():

    request.HttpRequest.scheme = property(scheme)
    request.HttpRequest.get_host = get_host
+
+
+def get_file_hash(file, algo=None, chunk_size=None, full_read=False):
+    algo = algo or settings.HASHING_ALGORITHM
+    chunk_size = chunk_size or settings.HASHING_CHUNK_SIZE
+    handler = getattr(hashlib, algo)
+    hash = handler()
+    file.seek(0)
+    if full_read:
+        for byte_block in iter(lambda: file.read(chunk_size), b""):
+            hash.update(byte_block)
+    else:
+        # sometimes, it's useful to only hash the beginning of the file, e.g
+        # to avoid a lot of I/O when crawling large libraries
+        hash.update(file.read(chunk_size))
+    return "{}:{}".format(algo, hash.hexdigest())
--- a/api/funkwhale_api/music/management/commands/fix_uploads.py
+++ b/api/funkwhale_api/music/management/commands/fix_uploads.py
@ -2,6 +2,7 @@ from django.core.management.base import BaseCommand
 from django.db import transaction
 from django.db.models import Q

+from funkwhale_api.common import utils as common_utils
 from funkwhale_api.music import models, utils


@ -17,9 +18,9 @@ class Command(BaseCommand):
            help="Do not execute anything",
        )
        parser.add_argument(
-            "--mimetypes",
+            "--mimetype",
            action="store_true",
-            dest="mimetypes",
+            dest="mimetype",
            default=True,
            help="Check and fix mimetypes",
        )
@ -37,16 +38,33 @@ class Command(BaseCommand):
            default=False,
            help="Check and fix file size, can be really slow because it needs to access files",
        )
+        parser.add_argument(
+            "--checksum",
+            action="store_true",
+            dest="checksum",
+            default=False,
+            help="Check and fix file size, can be really slow because it needs to access files",
+        )
+        parser.add_argument(
+            "--batch-size",
+            "-s",
+            dest="batch_size",
+            default=1000,
+            type=int,
+            help="Size of each updated batch",
+        )

    def handle(self, *args, **options):
        if options["dry_run"]:
            self.stdout.write("Dry-run on, will not commit anything")
-        if options["mimetypes"]:
+        if options["mimetype"]:
            self.fix_mimetypes(**options)
        if options["data"]:
            self.fix_file_data(**options)
        if options["size"]:
            self.fix_file_size(**options)
+        if options["checksum"]:
+            self.fix_file_checksum(**options)

    @transaction.atomic
    def fix_mimetypes(self, dry_run, **kwargs):
@ -54,11 +72,12 @@ class Command(BaseCommand):
        matching = models.Upload.objects.filter(
            Q(source__startswith="file://") | Q(source__startswith="upload://")
        ).exclude(mimetype__startswith="audio/")
+        total = matching.count()
        self.stdout.write(
-            "[mimetypes] {} entries found with bad or no mimetype".format(
-                matching.count()
-            )
+            "[mimetypes] {} entries found with bad or no mimetype".format(total)
        )
+        if not total:
+            return
        for extension, mimetype in utils.EXTENSION_TO_MIMETYPE.items():
            qs = matching.filter(source__endswith=".{}".format(extension))
            self.stdout.write(
@ -81,24 +100,36 @@ class Command(BaseCommand):
        )
        if dry_run:
            return
-        for i, upload in enumerate(matching.only("audio_file")):
-            self.stdout.write(
-                "[bitrate/length] {}/{} fixing file #{}".format(i + 1, total, upload.pk)
-            )

-            try:
-                audio_file = upload.get_audio_file()
-                if audio_file:
+        chunks = common_utils.chunk_queryset(
+            matching.only("id", "audio_file", "source"), kwargs["batch_size"]
+        )
+        handled = 0
+        for chunk in chunks:
+            updated = []
+            for upload in chunk:
+                handled += 1
+                self.stdout.write(
+                    "[bitrate/length] {}/{} fixing file #{}".format(
+                        handled, total, upload.pk
+                    )
+                )
+
+                try:
+                    audio_file = upload.get_audio_file()
                    data = utils.get_audio_file_data(audio_file)
                    upload.bitrate = data["bitrate"]
                    upload.duration = data["length"]
-                    upload.save(update_fields=["duration", "bitrate"])
+                except Exception as e:
+                    self.stderr.write(
+                        "[bitrate/length] error with file #{}: {}".format(
+                            upload.pk, str(e)
+                        )
+                    )
                else:
-                    self.stderr.write("[bitrate/length] no file found")
-            except Exception as e:
-                self.stderr.write(
-                    "[bitrate/length] error with file #{}: {}".format(upload.pk, str(e))
-                )
+                    updated.append(upload)
+
+            models.Upload.objects.bulk_update(updated, ["bitrate", "duration"])

    def fix_file_size(self, dry_run, **kwargs):
        self.stdout.write("Fixing missing size...")
@ -107,15 +138,64 @@ class Command(BaseCommand):
        self.stdout.write("[size] {} entries found with missing values".format(total))
        if dry_run:
            return
-        for i, upload in enumerate(matching.only("size")):
-            self.stdout.write(
-                "[size] {}/{} fixing file #{}".format(i + 1, total, upload.pk)
-            )

-            try:
-                upload.size = upload.get_file_size()
-                upload.save(update_fields=["size"])
-            except Exception as e:
-                self.stderr.write(
-                    "[size] error with file #{}: {}".format(upload.pk, str(e))
+        chunks = common_utils.chunk_queryset(
+            matching.only("id", "audio_file", "source"), kwargs["batch_size"]
+        )
+        handled = 0
+        for chunk in chunks:
+            updated = []
+            for upload in chunk:
+                handled += 1
+
+                self.stdout.write(
+                    "[size] {}/{} fixing file #{}".format(handled, total, upload.pk)
                )
+
+                try:
+                    upload.size = upload.get_file_size()
+                except Exception as e:
+                    self.stderr.write(
+                        "[size] error with file #{}: {}".format(upload.pk, str(e))
+                    )
+                else:
+                    updated.append(upload)
+
+            models.Upload.objects.bulk_update(updated, ["size"])
+
+    def fix_file_checksum(self, dry_run, **kwargs):
+        self.stdout.write("Fixing missing checksums...")
+        matching = models.Upload.objects.filter(
+            Q(checksum=None)
+            & (Q(audio_file__isnull=False) | Q(source__startswith="file://"))
+        )
+        total = matching.count()
+        self.stdout.write(
+            "[checksum] {} entries found with missing values".format(total)
+        )
+        if dry_run:
+            return
+        chunks = common_utils.chunk_queryset(
+            matching.only("id", "audio_file", "source"), kwargs["batch_size"]
+        )
+        handled = 0
+        for chunk in chunks:
+            updated = []
+            for upload in chunk:
+                handled += 1
+                self.stdout.write(
+                    "[checksum] {}/{} fixing file #{}".format(handled, total, upload.pk)
+                )
+
+                try:
+                    upload.checksum = common_utils.get_file_hash(
+                        upload.get_audio_file()
+                    )
+                except Exception as e:
+                    self.stderr.write(
+                        "[checksum] error with file #{}: {}".format(upload.pk, str(e))
+                    )
+                else:
+                    updated.append(upload)
+
+            models.Upload.objects.bulk_update(updated, ["checksum"])
--- a/api/funkwhale_api/music/management/commands/import_files.py
+++ b/api/funkwhale_api/music/management/commands/import_files.py
@ -1,17 +1,29 @@
+import collections
+import datetime
 import itertools
 import os
-import urllib.parse
+import queue
+import threading
 import time
+import urllib.parse
+
+import watchdog.events
+import watchdog.observers

 from django.conf import settings
 from django.core.files import File
+from django.core.management import call_command
 from django.core.management.base import BaseCommand, CommandError
+from django.db.models import Q
 from django.utils import timezone

+from rest_framework import serializers
+
+from funkwhale_api.common import utils as common_utils
 from funkwhale_api.music import models, tasks, utils


-def crawl_dir(dir, extensions, recursive=True):
+def crawl_dir(dir, extensions, recursive=True, ignored=[]):
    if os.path.isfile(dir):
        yield dir
        return
@ -20,9 +32,12 @@ def crawl_dir(dir, extensions, recursive=True):
            if entry.is_file():
                for e in extensions:
                    if entry.name.lower().endswith(".{}".format(e.lower())):
-                        yield entry.path
+                        if entry.path not in ignored:
+                            yield entry.path
            elif recursive and entry.is_dir():
-                yield from crawl_dir(entry, extensions, recursive=recursive)
+                yield from crawl_dir(
+                    entry, extensions, recursive=recursive, ignored=ignored
+                )


 def batch(iterable, n=1):
@ -116,6 +131,17 @@ class Command(BaseCommand):
                "of overhead on your server and on servers you are federating with."
            ),
        )
+        parser.add_argument(
+            "--watch",
+            action="store_true",
+            dest="watch",
+            default=False,
+            help=(
+                "Start the command in watch mode. Instead of running a full import, "
+                "and exit, watch the given path and import new files, remove deleted "
+                "files, and update metadata corresponding to updated files."
+            ),
+        )
        parser.add_argument("-e", "--extension", nargs="+")

        parser.add_argument(
@ -128,6 +154,15 @@ class Command(BaseCommand):
                "This causes some overhead, so it's disabled by default."
            ),
        )
+        parser.add_argument(
+            "--prune",
+            action="store_true",
+            dest="prune",
+            default=False,
+            help=(
+                "Once the import is completed, prune tracks, ablums and artists that aren't linked to any upload."
+            ),
+        )

        parser.add_argument(
            "--reference",
@ -157,6 +192,8 @@ class Command(BaseCommand):
        )

    def handle(self, *args, **options):
+        # handle relative directories
+        options["path"] = [os.path.abspath(path) for path in options["path"]]
        self.is_confirmed = False
        try:
            library = models.Library.objects.select_related("actor__user").get(
@ -182,22 +219,12 @@ class Command(BaseCommand):
                    )
                if p and not import_path.startswith(p):
                    raise CommandError(
-                        "Importing in-place only works if importing"
+                        "Importing in-place only works if importing "
                        "from {} (MUSIC_DIRECTORY_PATH), as this directory"
                        "needs to be accessible by the webserver."
                        "Culprit: {}".format(p, import_path)
                    )

-        extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
-        crawler = itertools.chain(
-            *[
-                crawl_dir(p, extensions=extensions, recursive=options["recursive"])
-                for p in options["path"]
-            ]
-        )
-        errors = []
-        total = 0
-        start_time = time.time()
        reference = options["reference"] or "cli-{}".format(timezone.now().isoformat())

        import_url = "{}://{}/library/{}/upload?{}"
@ -212,8 +239,62 @@ class Command(BaseCommand):
                reference, import_url
            )
        )
+        extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
+        if options["watch"]:
+            if len(options["path"]) > 1:
+                raise CommandError("Watch only work with a single directory")
+
+            return self.setup_watcher(
+                extensions=extensions,
+                path=options["path"][0],
+                reference=reference,
+                library=library,
+                in_place=options["in_place"],
+                prune=options["prune"],
+                recursive=options["recursive"],
+                replace=options["replace"],
+                dispatch_outbox=options["outbox"],
+                broadcast=options["broadcast"],
+            )
+
+        update = True
+        checked_paths = set()
+        if options["in_place"] and update:
+            self.stdout.write("Checking existing files for updates…")
+            message = (
+                "Are you sure you want to do this?\n\n"
+                "Type 'yes' to continue, or 'no' to skip checking for updates in "
+                "already imported files: "
+            )
+            if options["interactive"] and input("".join(message)) != "yes":
+                pass
+            else:
+                checked_paths = check_updates(
+                    stdout=self.stdout,
+                    paths=options["path"],
+                    extensions=extensions,
+                    library=library,
+                    batch_size=options["batch_size"],
+                )
+                self.stdout.write("Existing files checked, moving on to next step!")
+
+        crawler = itertools.chain(
+            *[
+                crawl_dir(
+                    p,
+                    extensions=extensions,
+                    recursive=options["recursive"],
+                    ignored=checked_paths,
+                )
+                for p in options["path"]
+            ]
+        )
+        errors = []
+        total = 0
+        start_time = time.time()
        batch_start = None
        batch_duration = None
+        self.stdout.write("Starting import of new files…")
        for i, entries in enumerate(batch(crawler, options["batch_size"])):
            total += len(entries)
            batch_start = time.time()
@ -225,7 +306,7 @@ class Command(BaseCommand):
            if entries:
                self.stdout.write(
                    "Handling batch {} ({} items){}".format(
-                        i + 1, options["batch_size"], time_stats,
+                        i + 1, len(entries), time_stats,
                    )
                )
                batch_errors = self.handle_batch(
@ -240,9 +321,9 @@ class Command(BaseCommand):

            batch_duration = time.time() - batch_start

-        message = "Successfully imported {} tracks in {}s"
+        message = "Successfully imported {} new tracks in {}s"
        if options["async_"]:
-            message = "Successfully launched import for {} tracks in {}s"
+            message = "Successfully launched import for {} new tracks in {}s"

        self.stdout.write(
            message.format(total - len(errors), int(time.time() - start_time))
@ -259,6 +340,12 @@ class Command(BaseCommand):
            )
        )

+        if options["prune"]:
+            self.stdout.write(
+                "Pruning dangling tracks, albums and artists from library…"
+            )
+            prune()
+
    def handle_batch(self, library, paths, batch, reference, options):
        matching = []
        for m in paths:
@ -362,15 +449,15 @@ class Command(BaseCommand):
                    message.format(batch=batch, path=path, i=i + 1, total=len(paths))
                )
            try:
-                self.create_upload(
-                    path,
-                    reference,
-                    library,
-                    async_,
-                    options["replace"],
-                    options["in_place"],
-                    options["outbox"],
-                    options["broadcast"],
+                create_upload(
+                    path=path,
+                    reference=reference,
+                    library=library,
+                    async_=async_,
+                    replace=options["replace"],
+                    in_place=options["in_place"],
+                    dispatch_outbox=options["outbox"],
+                    broadcast=options["broadcast"],
                )
            except Exception as e:
                if options["exit_on_failure"]:
@ -382,34 +469,311 @@ class Command(BaseCommand):
                errors.append((path, "{} {}".format(e.__class__.__name__, e)))
        return errors

-    def create_upload(
-        self,
-        path,
-        reference,
-        library,
-        async_,
-        replace,
-        in_place,
-        dispatch_outbox,
-        broadcast,
-    ):
-        import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
-        upload = models.Upload(library=library, import_reference=reference)
-        upload.source = "file://" + path
-        upload.import_metadata = {
-            "funkwhale": {
-                "config": {
-                    "replace": replace,
-                    "dispatch_outbox": dispatch_outbox,
-                    "broadcast": broadcast,
-                }
+    def setup_watcher(self, path, extensions, recursive, **kwargs):
+        watchdog_queue = queue.Queue()
+        # Set up a worker thread to process database load
+        worker = threading.Thread(
+            target=process_load_queue(self.stdout, **kwargs), args=(watchdog_queue,),
+        )
+        worker.setDaemon(True)
+        worker.start()
+
+        # setup watchdog to monitor directory for trigger files
+        patterns = ["*.{}".format(e) for e in extensions]
+        event_handler = Watcher(
+            stdout=self.stdout, queue=watchdog_queue, patterns=patterns,
+        )
+        observer = watchdog.observers.Observer()
+        observer.schedule(event_handler, path, recursive=recursive)
+        observer.start()
+
+        try:
+            while True:
+                self.stdout.write(
+                    "Watching for changes at {}…".format(path), ending="\r"
+                )
+                time.sleep(10)
+                if kwargs["prune"] and GLOBAL["need_pruning"]:
+                    self.stdout.write("Some files were deleted, pruning library…")
+                    prune()
+                    GLOBAL["need_pruning"] = False
+        except KeyboardInterrupt:
+            self.stdout.write("Exiting…")
+            observer.stop()
+
+        observer.join()
+
+
+GLOBAL = {"need_pruning": False}
+
+
+def prune():
+    call_command(
+        "prune_library",
+        dry_run=False,
+        prune_artists=True,
+        prune_albums=True,
+        prune_tracks=True,
+    )
+
+
+def create_upload(
+    path, reference, library, async_, replace, in_place, dispatch_outbox, broadcast,
+):
+    import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
+    upload = models.Upload(library=library, import_reference=reference)
+    upload.source = "file://" + path
+    upload.import_metadata = {
+        "funkwhale": {
+            "config": {
+                "replace": replace,
+                "dispatch_outbox": dispatch_outbox,
+                "broadcast": broadcast,
            }
        }
-        if not in_place:
-            name = os.path.basename(path)
-            with open(path, "rb") as f:
-                upload.audio_file.save(name, File(f), save=False)
+    }
+    if not in_place:
+        name = os.path.basename(path)
+        with open(path, "rb") as f:
+            upload.audio_file.save(name, File(f), save=False)

-        upload.save()
+    upload.save()

-        import_handler(upload_id=upload.pk)
+    import_handler(upload_id=upload.pk)
+
+
+def process_load_queue(stdout, **kwargs):
+    def inner(q):
+        # we batch events, to avoid calling same methods multiple times if a file is modified
+        # a lot in a really short time
+        flush_delay = 2
+        batched_events = collections.OrderedDict()
+        while True:
+            while True:
+                if not q.empty():
+                    event = q.get()
+                    batched_events[event["path"]] = event
+                else:
+                    break
+            for path, event in batched_events.copy().items():
+                if time.time() - event["time"] <= flush_delay:
+                    continue
+                now = datetime.datetime.utcnow()
+                stdout.write(
+                    "{} -- Processing {}:{}...\n".format(
+                        now.strftime("%Y/%m/%d %H:%M:%S"), event["type"], event["path"]
+                    )
+                )
+                del batched_events[path]
+                handle_event(event, stdout=stdout, **kwargs)
+            time.sleep(1)
+
+    return inner
+
+
+class Watcher(watchdog.events.PatternMatchingEventHandler):
+    def __init__(self, stdout, queue, patterns):
+        self.stdout = stdout
+        self.queue = queue
+        super().__init__(patterns=patterns)
+
+    def enqueue(self, event):
+        e = {
+            "is_directory": event.is_directory,
+            "type": event.event_type,
+            "path": event.src_path,
+            "src_path": event.src_path,
+            "dest_path": getattr(event, "dest_path", None),
+            "time": time.time(),
+        }
+        self.queue.put(e)
+
+    def on_moved(self, event):
+        self.enqueue(event)
+
+    def on_created(self, event):
+        self.enqueue(event)
+
+    def on_deleted(self, event):
+        self.enqueue(event)
+
+    def on_modified(self, event):
+        self.enqueue(event)
+
+
+def handle_event(event, stdout, **kwargs):
+    handlers = {
+        "modified": handle_modified,
+        "created": handle_created,
+        "moved": handle_moved,
+        "deleted": handle_deleted,
+    }
+    handlers[event["type"]](event=event, stdout=stdout, **kwargs)
+
+
+def handle_modified(event, stdout, library, in_place, **kwargs):
+    existing_candidates = library.uploads.filter(import_status="finished")
+    with open(event["path"], "rb") as f:
+        checksum = common_utils.get_file_hash(f)
+
+    existing = existing_candidates.filter(checksum=checksum).first()
+    if existing:
+        # found an existing file with same checksum, nothing to do
+        stdout.write("  File already imported and metadata is up-to-date")
+        return
+
+    to_update = None
+    if in_place:
+        source = "file://{}".format(event["path"])
+        to_update = (
+            existing_candidates.in_place()
+            .filter(source=source)
+            .select_related(
+                "track__attributed_to", "track__artist", "track__album__artist",
+            )
+            .first()
+        )
+        if to_update:
+            if (
+                to_update.track.attributed_to
+                and to_update.track.attributed_to != library.actor
+            ):
+                stdout.write(
+                    "  Cannot update track metadata, track belongs to someone else".format(
+                        to_update.pk
+                    )
+                )
+                return
+            else:
+                stdout.write(
+                    "  Updating existing file #{} with new metadata…".format(
+                        to_update.pk
+                    )
+                )
+                audio_metadata = to_update.get_metadata()
+                try:
+                    tasks.update_track_metadata(audio_metadata, to_update.track)
+                except serializers.ValidationError as e:
+                    stdout.write("  Invalid metadata: {}".format(e))
+                else:
+                    to_update.checksum = checksum
+                    to_update.save(update_fields=["checksum"])
+                return
+
+    stdout.write("  Launching import for new file")
+    create_upload(
+        path=event["path"],
+        reference=kwargs["reference"],
+        library=library,
+        async_=False,
+        replace=kwargs["replace"],
+        in_place=in_place,
+        dispatch_outbox=kwargs["dispatch_outbox"],
+        broadcast=kwargs["broadcast"],
+    )
+
+
+def handle_created(event, stdout, **kwargs):
+    """
+    Created is essentially an alias for modified, because for instance when copying a file in the watched directory,
+    a created event will be fired on the initial touch, then many modified event (as the file is written).
+    """
+    return handle_modified(event, stdout, **kwargs)
+
+
+def handle_moved(event, stdout, library, in_place, **kwargs):
+    if not in_place:
+        return
+
+    old_source = "file://{}".format(event["src_path"])
+    new_source = "file://{}".format(event["dest_path"])
+    existing_candidates = library.uploads.filter(import_status="finished")
+    existing_candidates = existing_candidates.in_place().filter(source=old_source)
+    existing = existing_candidates.first()
+    if existing:
+        stdout.write("  Updating path of existing file #{}".format(existing.pk))
+        existing.source = new_source
+        existing.save(update_fields=["source"])
+
+
+def handle_deleted(event, stdout, library, in_place, **kwargs):
+    if not in_place:
+        return
+    source = "file://{}".format(event["path"])
+    existing_candidates = library.uploads.filter(import_status="finished")
+    existing_candidates = existing_candidates.in_place().filter(source=source)
+    if existing_candidates.count():
+        stdout.write("  Removing file from DB")
+        existing_candidates.delete()
+        GLOBAL["need_pruning"] = True
+
+
+def check_updates(stdout, library, extensions, paths, batch_size):
+    existing = (
+        library.uploads.in_place()
+        .filter(import_status="finished")
+        .exclude(checksum=None)
+        .select_related("library", "track")
+    )
+    queries = []
+    checked_paths = set()
+    for path in paths:
+        for ext in extensions:
+            queries.append(
+                Q(source__startswith="file://{}".format(path))
+                & Q(source__endswith=".{}".format(ext))
+            )
+    query, remainder = queries[0], queries[1:]
+    for q in remainder:
+        query = q | query
+    existing = existing.filter(query)
+    total = existing.count()
+    stdout.write("Found {} files to check in database!".format(total))
+    uploads = existing.order_by("source")
+    for i, rows in enumerate(batch(uploads.iterator(), batch_size)):
+        stdout.write("Handling batch {} ({} items)".format(i + 1, len(rows),))
+
+        for upload in rows:
+
+            check_upload(stdout, upload)
+            checked_paths.add(upload.source.replace("file://", "", 1))
+
+    return checked_paths
+
+
+def check_upload(stdout, upload):
+    try:
+        audio_file = upload.get_audio_file()
+    except FileNotFoundError:
+        stdout.write(
+            "  Removing file #{} missing from disk at {}".format(
+                upload.pk, upload.source
+            )
+        )
+        return upload.delete()
+
+    checksum = common_utils.get_file_hash(audio_file)
+    if upload.checksum != checksum:
+        stdout.write(
+            "  File #{} at {} was modified, updating metadata…".format(
+                upload.pk, upload.source
+            )
+        )
+        if upload.library.actor_id != upload.track.attributed_to_id:
+            stdout.write(
+                "  Cannot update track metadata, track belongs to someone else".format(
+                    upload.pk
+                )
+            )
+        else:
+            track = models.Track.objects.select_related("artist", "album__artist").get(
+                pk=upload.track_id
+            )
+            try:
+                tasks.update_track_metadata(upload.get_metadata(), track)
+            except serializers.ValidationError as e:
+                stdout.write("  Invalid metadata: {}".format(e))
+                return
+            else:
+                upload.checksum = checksum
+                upload.save(update_fields=["checksum"])
--- a/api/funkwhale_api/music/migrations/0052_auto_20200505_0810.py
+++ b/api/funkwhale_api/music/migrations/0052_auto_20200505_0810.py
@ -0,0 +1,23 @@
+# Generated by Django 3.0.4 on 2020-05-05 08:10
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('music', '0051_auto_20200319_1249'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='upload',
+            name='checksum',
+            field=models.CharField(blank=True, db_index=True, max_length=100, null=True),
+        ),
+        migrations.AlterField(
+            model_name='uploadversion',
+            name='mimetype',
+            field=models.CharField(choices=[('audio/mp3', 'mp3'), ('audio/mpeg3', 'mp3'), ('audio/x-mp3', 'mp3'), ('audio/mpeg', 'mp3'), ('video/ogg', 'ogg'), ('audio/ogg', 'ogg'), ('audio/opus', 'opus'), ('audio/x-m4a', 'aac'), ('audio/x-m4a', 'm4a'), ('audio/x-flac', 'flac'), ('audio/flac', 'flac')], max_length=50),
+        ),
+    ]
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@ -655,6 +655,14 @@ class Track(APIModelMixin):


 class UploadQuerySet(common_models.NullsLastQuerySet):
+    def in_place(self, include=True):
+        query = models.Q(source__startswith="file://") & (
+            models.Q(audio_file="") | models.Q(audio_file=None)
+        )
+        if not include:
+            query = ~query
+        return self.filter(query)
+
    def playable_by(self, actor, include=True):
        libraries = Library.objects.viewable_by(actor)

@ -754,6 +762,9 @@ class Upload(models.Model):
    )
    downloads_count = models.PositiveIntegerField(default=0)

+    # stores checksums such as `sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855`
+    checksum = models.CharField(max_length=100, db_index=True, null=True, blank=True)
+
    objects = UploadQuerySet.as_manager()

    @property
@ -833,7 +844,7 @@ class Upload(models.Model):
    def get_audio_file(self):
        if self.audio_file:
            return self.audio_file.open()
-        if self.source.startswith("file://"):
+        if self.source and self.source.startswith("file://"):
            return open(self.source.replace("file://", "", 1), "rb")

    def get_audio_data(self):
@ -866,6 +877,15 @@ class Upload(models.Model):
                self.mimetype = mimetypes.guess_type(self.source)[0]
        if not self.size and self.audio_file:
            self.size = self.audio_file.size
+        if not self.checksum:
+            try:
+                audio_file = self.get_audio_file()
+            except FileNotFoundError:
+                pass
+            else:
+                if audio_file:
+                    self.checksum = common_utils.get_file_hash(audio_file)
+
        if not self.pk and not self.fid and self.library.actor.get_user():
            self.fid = self.get_federation_id()
        return super().save(**kwargs)
--- a/api/funkwhale_api/music/tasks.py
+++ b/api/funkwhale_api/music/tasks.py
@ -851,3 +851,71 @@ def update_library_entity(obj, data):
    obj.save(update_fields=list(data.keys()))

    return obj
+
+
+UPDATE_CONFIG = {
+    "track": {
+        "position": {},
+        "title": {},
+        "mbid": {},
+        "disc_number": {},
+        "copyright": {},
+        "license": {
+            "getter": lambda data, field: licenses.match(
+                data.get("license"), data.get("copyright")
+            )
+        },
+    },
+    "album": {"title": {}, "mbid": {}, "release_date": {}},
+    "artist": {"name": {}, "mbid": {}},
+    "album_artist": {"name": {}, "mbid": {}},
+}
+
+
+@transaction.atomic
+def update_track_metadata(audio_metadata, track):
+    # XXX: implement this to support updating metadata when an imported files
+    # is updated by an outside tool (e.g beets).
+    serializer = metadata.TrackMetadataSerializer(data=audio_metadata)
+    serializer.is_valid(raise_exception=True)
+    new_data = serializer.validated_data
+
+    to_update = [
+        ("track", track, lambda data: data),
+        ("album", track.album, lambda data: data["album"]),
+        ("artist", track.artist, lambda data: data["artists"][0]),
+        (
+            "album_artist",
+            track.album.artist if track.album else None,
+            lambda data: data["album"]["artists"][0],
+        ),
+    ]
+    for id, obj, data_getter in to_update:
+        if not obj:
+            continue
+        obj_updated_fields = []
+        try:
+            obj_data = data_getter(new_data)
+        except IndexError:
+            continue
+        for field, config in UPDATE_CONFIG[id].items():
+            getter = config.get(
+                "getter", lambda data, field: data[config.get("field", field)]
+            )
+            try:
+                new_value = getter(obj_data, field)
+            except KeyError:
+                continue
+            old_value = getattr(obj, field)
+            if new_value == old_value:
+                continue
+            obj_updated_fields.append(field)
+            setattr(obj, field, new_value)
+
+        if obj_updated_fields:
+            obj.save(update_fields=obj_updated_fields)
+
+    if track.album and "album" in new_data and new_data["album"].get("cover_data"):
+        common_utils.attach_file(
+            track.album, "attachment_cover", new_data["album"].get("cover_data")
+        )
--- a/api/requirements/base.txt
+++ b/api/requirements/base.txt
@ -83,3 +83,4 @@ service_identity==18.1.0
 markdown>=3.2,<4
 bleach>=3,<4
 feedparser==6.0.0b3
+watchdog==0.10.2
--- a/api/tests/common/test_utils.py
+++ b/api/tests/common/test_utils.py
@ -258,3 +258,12 @@ def test_monkey_patch_request_build_absolute_uri(
    request = fake_request.get("/", **meta)

    assert request.build_absolute_uri(path) == expected
+
+
+def test_get_file_hash(tmpfile, settings):
+    settings.HASHING_ALGORITHM = "sha256"
+    content = b"hello"
+    tmpfile.write(content)
+    # echo -n "hello" | sha256sum
+    expected = "sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
+    assert utils.get_file_hash(tmpfile) == expected
--- a/api/tests/music/test_commands.py
+++ b/api/tests/music/test_commands.py
@ -1,6 +1,7 @@
 import os
 import pytest

+from funkwhale_api.common import utils as common_utils
 from funkwhale_api.music.management.commands import check_inplace_files
 from funkwhale_api.music.management.commands import fix_uploads
 from funkwhale_api.music.management.commands import prune_library
@ -18,7 +19,7 @@ def test_fix_uploads_bitrate_length(factories, mocker):
        return_value={"bitrate": 42, "length": 43},
    )

-    c.fix_file_data(dry_run=False)
+    c.fix_file_data(dry_run=False, batch_size=100)

    upload1.refresh_from_db()
    upload2.refresh_from_db()
@ -41,7 +42,7 @@ def test_fix_uploads_size(factories, mocker):

    mocker.patch("funkwhale_api.music.models.Upload.get_file_size", return_value=2)

-    c.fix_file_size(dry_run=False)
+    c.fix_file_size(dry_run=False, batch_size=100)

    upload1.refresh_from_db()
    upload2.refresh_from_db()
@ -69,7 +70,7 @@ def test_fix_uploads_mimetype(factories, mocker):
        mimetype="audio/something",
    )
    c = fix_uploads.Command()
-    c.fix_mimetypes(dry_run=False)
+    c.fix_mimetypes(dry_run=False, batch_size=100)

    upload1.refresh_from_db()
    upload2.refresh_from_db()
@ -78,6 +79,25 @@ def test_fix_uploads_mimetype(factories, mocker):
    assert upload2.mimetype == "audio/something"


+def test_fix_uploads_checksum(factories, mocker):
+    upload1 = factories["music.Upload"]()
+    upload2 = factories["music.Upload"]()
+    upload1.__class__.objects.filter(pk=upload1.pk).update(checksum="test")
+    upload2.__class__.objects.filter(pk=upload2.pk).update(checksum=None)
+    c = fix_uploads.Command()
+
+    c.fix_file_checksum(dry_run=False, batch_size=100)
+
+    upload1.refresh_from_db()
+    upload2.refresh_from_db()
+
+    # not updated
+    assert upload1.checksum == "test"
+
+    # updated
+    assert upload2.checksum == common_utils.get_file_hash(upload2.audio_file)
+
+
 def test_prune_library_dry_run(factories):
    prunable = factories["music.Track"]()
    not_prunable = factories["music.Track"]()
--- a/api/tests/music/test_models.py
+++ b/api/tests/music/test_models.py
@ -5,6 +5,7 @@ import pytest
 from django.utils import timezone
 from django.urls import reverse

+from funkwhale_api.common import utils as common_utils
 from funkwhale_api.music import importers, models, tasks
 from funkwhale_api.federation import utils as federation_utils

@ -164,6 +165,17 @@ def test_audio_track_mime_type(extention, mimetype, factories):
    assert upload.mimetype == mimetype


+@pytest.mark.parametrize("name", ["test.ogg", "test.mp3"])
+def test_audio_track_checksum(name, factories):
+
+    path = os.path.join(DATA_DIR, name)
+    upload = factories["music.Upload"](audio_file__from_path=path, mimetype=None)
+
+    with open(path, "rb") as f:
+        expected = common_utils.get_file_hash(f)
+    assert upload.checksum == expected
+
+
 def test_upload_file_name(factories):
    name = "test.mp3"
    path = os.path.join(DATA_DIR, name)
--- a/api/tests/music/test_tasks.py
+++ b/api/tests/music/test_tasks.py
@ -1329,3 +1329,40 @@ def test_can_import_track_with_same_position_in_same_discs_skipped(factories, mo
    new_upload.refresh_from_db()

    assert new_upload.import_status == "skipped"
+
+
+def test_update_track_metadata(factories):
+    track = factories["music.Track"]()
+    data = {
+        "title": "Peer Gynt Suite no. 1, op. 46: I. Morning",
+        "artist": "Edvard Grieg",
+        "album_artist": "Edvard Grieg; Musopen Symphony Orchestra",
+        "album": "Peer Gynt Suite no. 1, op. 46",
+        "date": "2012-08-15",
+        "position": "4",
+        "disc_number": "2",
+        "musicbrainz_albumid": "a766da8b-8336-47aa-a3ee-371cc41ccc75",
+        "mbid": "bd21ac48-46d8-4e78-925f-d9cc2a294656",
+        "musicbrainz_artistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823",
+        "musicbrainz_albumartistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823;5b4d7d2d-36df-4b38-95e3-a964234f520f",
+        "license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/",
+        "copyright": "Someone",
+        "comment": "hello there",
+    }
+    tasks.update_track_metadata(metadata.FakeMetadata(data), track)
+
+    track.refresh_from_db()
+
+    assert track.title == data["title"]
+    assert track.position == int(data["position"])
+    assert track.disc_number == int(data["disc_number"])
+    assert track.license.code == "cc-by-sa-4.0"
+    assert track.copyright == data["copyright"]
+    assert str(track.mbid) == data["mbid"]
+    assert track.album.title == data["album"]
+    assert track.album.release_date == datetime.date(2012, 8, 15)
+    assert str(track.album.mbid) == data["musicbrainz_albumid"]
+    assert track.artist.name == data["artist"]
+    assert str(track.artist.mbid) == data["musicbrainz_artistid"]
+    assert track.album.artist.name == "Edvard Grieg"
+    assert str(track.album.artist.mbid) == "013c8e5b-d72a-4cd3-8dee-6c64d6125823"
--- a/api/tests/test_import_audio_file.py
+++ b/api/tests/test_import_audio_file.py
@ -4,6 +4,8 @@ import pytest
 from django.core.management import call_command
 from django.core.management.base import CommandError

+from funkwhale_api.common import utils as common_utils
+from funkwhale_api.music.management.commands import import_files

 DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "files")

@ -159,3 +161,194 @@ def test_import_files_in_place(factories, mocker, settings):
 def test_storage_rename_utf_8_files(factories):
    upload = factories["music.Upload"](audio_file__filename="été.ogg")
    assert upload.audio_file.name.endswith("ete.ogg")
+
+
+@pytest.mark.parametrize("name", ["modified", "moved", "created", "deleted"])
+def test_handle_event(name, mocker):
+    handler = mocker.patch.object(import_files, "handle_{}".format(name))
+
+    event = {"type": name}
+    stdout = mocker.Mock()
+    kwargs = {"hello": "world"}
+    import_files.handle_event(event, stdout, **kwargs)
+
+    handler.assert_called_once_with(event=event, stdout=stdout, **kwargs)
+
+
+def test_handle_created(mocker):
+    handle_modified = mocker.patch.object(import_files, "handle_modified")
+
+    event = mocker.Mock()
+    stdout = mocker.Mock()
+    kwargs = {"hello": "world"}
+    import_files.handle_created(event, stdout, **kwargs)
+
+    handle_modified.assert_called_once_with(event, stdout, **kwargs)
+
+
+def test_handle_deleted(factories, mocker):
+    stdout = mocker.Mock()
+    event = {
+        "path": "/path.mp3",
+    }
+    library = factories["music.Library"]()
+    deleted = factories["music.Upload"](
+        library=library,
+        source="file://{}".format(event["path"]),
+        import_status="finished",
+        audio_file=None,
+    )
+    kept = [
+        factories["music.Upload"](
+            library=library,
+            source="file://{}".format(event["path"]),
+            import_status="finished",
+        ),
+        factories["music.Upload"](
+            source="file://{}".format(event["path"]),
+            import_status="finished",
+            audio_file=None,
+        ),
+    ]
+
+    import_files.handle_deleted(
+        event=event, stdout=stdout, library=library, in_place=True
+    )
+
+    with pytest.raises(deleted.DoesNotExist):
+        deleted.refresh_from_db()
+
+    for upload in kept:
+        upload.refresh_from_db()
+
+
+def test_handle_moved(factories, mocker):
+    stdout = mocker.Mock()
+    event = {
+        "src_path": "/path.mp3",
+        "dest_path": "/new_path.mp3",
+    }
+    library = factories["music.Library"]()
+    updated = factories["music.Upload"](
+        library=library,
+        source="file://{}".format(event["src_path"]),
+        import_status="finished",
+        audio_file=None,
+    )
+    untouched = [
+        factories["music.Upload"](
+            library=library,
+            source="file://{}".format(event["src_path"]),
+            import_status="finished",
+        ),
+        factories["music.Upload"](
+            source="file://{}".format(event["src_path"]),
+            import_status="finished",
+            audio_file=None,
+        ),
+    ]
+
+    import_files.handle_moved(
+        event=event, stdout=stdout, library=library, in_place=True
+    )
+
+    updated.refresh_from_db()
+    assert updated.source == "file://{}".format(event["dest_path"])
+    for upload in untouched:
+        source = upload.source
+        upload.refresh_from_db()
+        assert source == upload.source
+
+
+def test_handle_modified_creates_upload(tmpfile, factories, mocker):
+    stdout = mocker.Mock()
+    event = {
+        "path": tmpfile.name,
+    }
+    process_upload = mocker.patch("funkwhale_api.music.tasks.process_upload")
+    library = factories["music.Library"]()
+    import_files.handle_modified(
+        event=event,
+        stdout=stdout,
+        library=library,
+        in_place=True,
+        reference="hello",
+        replace=False,
+        dispatch_outbox=False,
+        broadcast=False,
+    )
+    upload = library.uploads.latest("id")
+    assert upload.source == "file://{}".format(event["path"])
+
+    process_upload.assert_called_once_with(upload_id=upload.pk)
+
+
+def test_handle_modified_skips_existing_checksum(tmpfile, factories, mocker):
+    stdout = mocker.Mock()
+    event = {
+        "path": tmpfile.name,
+    }
+    tmpfile.write(b"hello")
+
+    library = factories["music.Library"]()
+    factories["music.Upload"](
+        checksum=common_utils.get_file_hash(tmpfile),
+        library=library,
+        import_status="finished",
+    )
+    import_files.handle_modified(
+        event=event, stdout=stdout, library=library, in_place=True,
+    )
+    assert library.uploads.count() == 1
+
+
+def test_handle_modified_update_existing_path_if_found(tmpfile, factories, mocker):
+    stdout = mocker.Mock()
+    event = {
+        "path": tmpfile.name,
+    }
+    update_track_metadata = mocker.patch(
+        "funkwhale_api.music.tasks.update_track_metadata"
+    )
+    get_metadata = mocker.patch("funkwhale_api.music.models.Upload.get_metadata")
+    library = factories["music.Library"]()
+    track = factories["music.Track"](attributed_to=library.actor)
+    upload = factories["music.Upload"](
+        source="file://{}".format(event["path"]),
+        track=track,
+        checksum="old",
+        library=library,
+        import_status="finished",
+        audio_file=None,
+    )
+    import_files.handle_modified(
+        event=event, stdout=stdout, library=library, in_place=True,
+    )
+    update_track_metadata.assert_called_once_with(
+        get_metadata.return_value, upload.track,
+    )
+
+
+def test_handle_modified_update_existing_path_if_found_and_attributed_to(
+    tmpfile, factories, mocker
+):
+    stdout = mocker.Mock()
+    event = {
+        "path": tmpfile.name,
+    }
+    update_track_metadata = mocker.patch(
+        "funkwhale_api.music.tasks.update_track_metadata"
+    )
+    library = factories["music.Library"]()
+    factories["music.Upload"](
+        source="file://{}".format(event["path"]),
+        checksum="old",
+        library=library,
+        track__attributed_to=factories["federation.Actor"](),
+        import_status="finished",
+        audio_file=None,
+    )
+    import_files.handle_modified(
+        event=event, stdout=stdout, library=library, in_place=True,
+    )
+    update_track_metadata.assert_not_called()
--- a/changes/changelog.d/1093.bugfix
+++ b/changes/changelog.d/1093.bugfix
@ -1 +1 @@
-Fixed mimetype detection issue that broke transcoding on some tracks (#1093). Run ``python manage.py fix_uploads --mimetypes`` to set proper mimetypes on existing uploads.
+Fixed mimetype detection issue that broke transcoding on some tracks (#1093). Run ``python manage.py fix_uploads --mimetype`` to set proper mimetypes on existing uploads.
--- a/changes/changelog.d/721.feature
+++ b/changes/changelog.d/721.feature
@ -0,0 +1 @@
+Support a --watch mode with ``import_files`` to automatically add, update and remove files when filesystem is updated (#721)
--- a/docs/admin/importing-music.rst
+++ b/docs/admin/importing-music.rst
@ -1,15 +1,21 @@
-Importing music
-================
+Importing music from the server
+===============================

-From music directory on the server
----------------------------------
-
-You can import music files in Funkwhale assuming they are located on the server
-and readable by the Funkwhale application. Your music files should contain at
+Funkwhale can import music files that are located on the server assuming
+they readable by the Funkwhale application. Your music files should contain at
 least an ``artist``, ``album`` and ``title`` tags, but we recommend you tag
 it extensively using a proper tool, such as Beets or Musicbrainz Picard.

-You can import those tracks as follows, assuming they are located in
+Funkwhale supports two different import modes:
+
+- copy (the default): files are copied into Funkwhale's internal storage. This means importing a 1GB library will result in the same amount of space being used by Funkwhale.
+- :ref:`in-place <in-place-import>` (when the ``--in-place`` is provided): files are referenced in Funkwhale's DB but not copied or touched in anyway. This is useful if you have a huge library, or one that is updated by an external tool such as Beets..
+
+.. note::
+
+    In Funkwhale 1.0, **the default behaviour will change to in-place import**
+
+Regardless of the mode you're choosing, import works as described below, assuming your files are located in
 ``/srv/funkwhale/data/music``:

 .. code-block:: bash
@ -17,6 +23,17 @@ You can import those tracks as follows, assuming they are located in
    export LIBRARY_ID="<your_libary_id>"
    python api/manage.py import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput

+.. note::
+    You'll have to create a library in the Web UI before to get your library ID. Simply visit
+    https://yourdomain/content/libraries/ to create one.
+
+    Library IDs are available in library urls or sharing link. In this example:
+    https://funkwhale.instance/content/libraries/769a2ae3-eb3d-4aff-9f94-2c4d80d5c2d1,
+    the library ID is 769a2bc3-eb1d-4aff-9f84-2c4d80d5c2d1
+
+    You can use only the first characters of the ID when calling the command, like that:
+    ``export LIBRARY_ID="769a2bc3"``
+
 When you use docker, the ``/srv/funkwhale/data/music`` is mounted from the host
 to the ``/music`` directory on the container:

@ -32,16 +49,6 @@ When you installed Funkwhale via ansible, you need to call a script instead of P
    export LIBRARY_ID="<your_libary_id>"
    /srv/funkwhale/manage import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput

-.. note::
-    You'll have to create a library in the Web UI before to get your library ID. Simply visit
-    https://yourdomain/content/libraries/ to create one.
-
-    Library IDs are available in library urls or sharing link. In this example:
-    https://funkwhale.instance/content/libraries/769a2ae3-eb3d-4aff-9f94-2c4d80d5c2d1,
-    the library ID is 769a2bc3-eb1d-4aff-9f84-2c4d80d5c2d1
-
-    You can use only the first characters of the ID when calling the command, like that:
-    ``export LIBRARY_ID="769a2bc3"``

 The import command supports several options, and you can check the help to
 get details::
@ -63,6 +70,7 @@ get details::
    At the moment, only Flac, OGG/Vorbis and MP3 files with ID3 tags are supported


+
 .. _in-place-import:

 In-place import
@ -88,14 +96,6 @@ configuration options to ensure the webserver can serve them properly:
 - :ref:`setting-MUSIC_DIRECTORY_PATH`
 - :ref:`setting-MUSIC_DIRECTORY_SERVE_PATH`

-.. warning::
-
-    While in-place import is faster and less disk-space-hungry, it's also
-    more fragile: if, for some reason, you move or rename the source files,
-    Funkwhale will not be able to serve those files anymore.
-
-    Thus, be especially careful when you manipulate the source files.
-
 We recommend you symlink all your music directories into ``/srv/funkwhale/data/music``
 and run the `import_files` command from that directory. This will make it possible
 to use multiple music directories, without any additional configuration
@ -134,6 +134,49 @@ If you want to go with symlinks, ensure each symlinked directory is mounted as a
      # add your symlinked dirs here
      - /media/nfsshare:/media/nfsshare:ro

+Metadata updates
+^^^^^^^^^^^^^^^^
+
+When doing an import with in ``in-place`` mode, the importer will also check and update existing entries
+found in the database. For instance, if a file was imported, the ID3 Title tag was updated, and you rerun a scan,
+Funkwhale will pick up the new title. The following fields can be updated this way:
+
+- Track mbid
+- Track title
+- Track position and disc number
+- Track license and copyright
+- Album cover
+- Album title
+- Album mbid
+- Album release date
+- Artist name
+- Artist mbid
+- Album artist name
+- Album artist mbid
+
+
+React to filesystem events with ``--watch``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you have a really big library or one that is updated quite often, running the ``import_files`` command by hand
+may not be practical. To help with this use case, the ``import_files`` command supports a ``--watch`` flag that will observes filesystem events
+instead of performing a full import.
+
+File creation, move, update and removal are handled when ``--watch`` is provided:
+
+- Files created in the watched directory are imported immediatly
+- If using ``in-place`` mode, files updates trigger a metadata update on the corresponding entries
+- If using ``in-place`` mode, files that are moved and known by Funkwhale will see their path updated in Funkwhale's DB
+- If using ``in-place`` mode, files that are removed and known by Funkwhale will be removed from Funkwhale's DB
+
+Pruning dangling metadata with ``--prune``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Funkwhale is, by design, conservative with music metadata in its database. If you remove a file from Funkwhale's DB,
+the corresponding artist, album and track object won't be deleted by default.
+
+If you want to prune dangling metadata from the database once the ``import_files`` command is over, simply add the ``--prune`` flag.
+This also works in with ``--watch``.

 Album covers
 ^^^^^^^^^^^^
@ -159,9 +202,3 @@ under creative commons (courtesy of Jamendo):
    ./download-tracks.sh music.txt

 This will download a bunch of zip archives (one per album) under the ``data/music`` directory and unzip their content.
-
-From other instances
--------------------
-
-Funkwhale also supports importing music from other instances. Please refer
-to :doc:`../federation/index` for more details.
				`@ -0,0 +1 @@`
				Support a --watch mode with ``import_files`` to automatically add, update and remove files when filesystem is updated (#721)