kopia lustrzana https://github.com/bellingcat/auto-archiver
generalizes ydl info to filename method for reusing
rodzic
c0be41950d
commit
c7a84bc97a
|
@ -20,6 +20,7 @@ from loguru import logger
|
||||||
from auto_archiver.core.extractor import Extractor
|
from auto_archiver.core.extractor import Extractor
|
||||||
from auto_archiver.core import Metadata, Media
|
from auto_archiver.core import Metadata, Media
|
||||||
from auto_archiver.utils import get_datetime_from_str
|
from auto_archiver.utils import get_datetime_from_str
|
||||||
|
from auto_archiver.utils.misc import ydl_entry_to_filename
|
||||||
from .dropin import GenericDropin
|
from .dropin import GenericDropin
|
||||||
|
|
||||||
|
|
||||||
|
@ -382,27 +383,13 @@ class GenericExtractor(Extractor):
|
||||||
entries = [data]
|
entries = [data]
|
||||||
result = Metadata()
|
result = Metadata()
|
||||||
|
|
||||||
def _helper_get_filename(entry: dict) -> str:
|
|
||||||
entry_url = entry.get("url")
|
|
||||||
|
|
||||||
filename = ydl.prepare_filename(entry)
|
|
||||||
base_filename, _ = os.path.splitext(filename) # '/get/path/to/file' ignore '.ext'
|
|
||||||
directory = os.path.dirname(base_filename) # '/get/path/to'
|
|
||||||
basename = os.path.basename(base_filename) # 'file'
|
|
||||||
for f in os.listdir(directory):
|
|
||||||
if (
|
|
||||||
f.startswith(basename)
|
|
||||||
or (entry_url and os.path.splitext(f)[0] in entry_url)
|
|
||||||
and "video/" in (mimetypes.guess_type(f)[0] or "")
|
|
||||||
):
|
|
||||||
return os.path.join(directory, f)
|
|
||||||
return False
|
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
try:
|
try:
|
||||||
filename = _helper_get_filename(entry)
|
filename = ydl_entry_to_filename(ydl, entry)
|
||||||
|
|
||||||
if not filename or not os.path.exists(filename):
|
if not filename:
|
||||||
# file was not downloaded or could not be retrieved, example: sensitive videos on YT without using cookies.
|
# file was not downloaded or could not be retrieved, example: sensitive videos on YT without using cookies.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
@ -116,3 +117,26 @@ def get_timestamp(ts, utc=True, iso=True, dayfirst=True) -> str | datetime | Non
|
||||||
|
|
||||||
def get_current_timestamp() -> str:
|
def get_current_timestamp() -> str:
|
||||||
return get_timestamp(datetime.now())
|
return get_timestamp(datetime.now())
|
||||||
|
|
||||||
|
|
||||||
|
def ydl_entry_to_filename(ydl, entry: dict) -> str:
|
||||||
|
import yt_dlp
|
||||||
|
|
||||||
|
ydl: yt_dlp.YoutubeDL
|
||||||
|
entry_url = entry.get("url")
|
||||||
|
|
||||||
|
filename = ydl.prepare_filename(entry)
|
||||||
|
if os.path.exists(filename):
|
||||||
|
return filename
|
||||||
|
|
||||||
|
base_filename, _ = os.path.splitext(filename) # '/get/path/to/file' ignore '.ext'
|
||||||
|
directory = os.path.dirname(base_filename) # '/get/path/to'
|
||||||
|
basename = os.path.basename(base_filename) # 'file'
|
||||||
|
for f in os.listdir(directory):
|
||||||
|
if (
|
||||||
|
f.startswith(basename)
|
||||||
|
or (entry_url and os.path.splitext(f)[0] in entry_url)
|
||||||
|
and "video/" in (mimetypes.guess_type(f)[0] or "")
|
||||||
|
):
|
||||||
|
return os.path.join(directory, f)
|
||||||
|
return False
|
Ładowanie…
Reference in New Issue