auto-archiver/src/auto_archiver/core/media.py

108 wiersze
4.0 KiB
Python
Czysty Zwykły widok Historia

2022-12-14 19:01:20 +00:00
from __future__ import annotations
2023-07-28 11:19:14 +00:00
import os
import traceback
2023-05-24 10:18:39 +00:00
from typing import Any, List
from dataclasses import dataclass, field
2023-03-23 14:28:45 +00:00
from dataclasses_json import dataclass_json, config
import mimetypes
2022-12-14 19:01:20 +00:00
2023-07-28 11:19:14 +00:00
import ffmpeg
from ffmpeg._run import Error
2023-03-23 14:28:45 +00:00
from .context import ArchivingContext
2023-03-23 14:28:45 +00:00
from loguru import logger
2023-03-23 14:28:45 +00:00
@dataclass_json # annotation order matters
2022-12-14 19:01:20 +00:00
@dataclass
class Media:
filename: str
2023-01-04 18:02:44 +00:00
key: str = None
2023-01-13 02:12:08 +00:00
urls: List[str] = field(default_factory=list)
properties: dict = field(default_factory=dict)
2023-03-23 14:28:45 +00:00
_mimetype: str = None # eg: image/jpeg
_stored: bool = field(default=False, repr=False, metadata=config(exclude=lambda _: True)) # always exclude
2023-03-23 14:28:45 +00:00
def store(self: Media, override_storages: List = None, url: str = "url-not-available"):
# stores the media into the provided/available storages [Storage]
# repeats the process for its properties, in case they have inner media themselves
# for now it only goes down 1 level but it's easy to make it recursive if needed
storages = override_storages or ArchivingContext.get("storages")
if not len(storages):
logger.warning(f"No storages found in local context or provided directly for {self.filename}.")
return
for s in storages:
for any_media in self.all_inner_media(include_self=True):
s.store(any_media, url)
def all_inner_media(self, include_self=False):
""" Media can be inside media properties, examples include transformations on original media.
2023-06-26 16:32:19 +00:00
This function returns a generator for all the inner media.
"""
if include_self: yield self
for prop in self.properties.values():
if isinstance(prop, Media):
for inner_media in prop.all_inner_media(include_self=True):
yield inner_media
if isinstance(prop, list):
for prop_media in prop:
if isinstance(prop_media, Media):
for inner_media in prop_media.all_inner_media(include_self=True):
yield inner_media
2023-01-13 02:12:08 +00:00
2023-03-23 18:50:30 +00:00
def is_stored(self) -> bool:
return len(self.urls) > 0 and len(self.urls) == len(ArchivingContext.get("storages"))
2023-03-23 18:50:30 +00:00
2023-01-13 02:12:08 +00:00
def set(self, key: str, value: Any) -> Media:
self.properties[key] = value
return self
def get(self, key: str, default: Any = None) -> Any:
return self.properties.get(key, default)
def add_url(self, url: str) -> None:
# url can be remote, local, ...
self.urls.append(url)
@property # getter .mimetype
def mimetype(self) -> str:
2023-12-12 22:47:54 +00:00
if not self.filename or len(self.filename) == 0:
logger.warning(f"cannot get mimetype from media without filename: {self}")
return ""
if not self._mimetype:
self._mimetype = mimetypes.guess_type(self.filename)[0]
2023-01-19 00:27:11 +00:00
return self._mimetype or ""
@mimetype.setter # setter .mimetype
def mimetype(self, v: str) -> None:
self._mimetype = v
2023-01-11 00:03:47 +00:00
def is_video(self) -> bool:
2023-01-17 16:29:27 +00:00
return self.mimetype.startswith("video")
def is_audio(self) -> bool:
return self.mimetype.startswith("audio")
2023-07-28 11:19:14 +00:00
def is_image(self) -> bool:
return self.mimetype.startswith("image")
2023-07-28 11:19:14 +00:00
def is_valid_video(self) -> bool:
# checks for video streams with ffmpeg, or min file size for a video
# self.is_video() should be used together with this method
try:
streams = ffmpeg.probe(self.filename, select_streams='v')['streams']
logger.warning(f"STREAMS FOR {self.filename} {streams}")
2023-08-05 12:57:09 +00:00
return any(s.get("duration_ts", 0) > 0 for s in streams)
2023-07-28 11:19:14 +00:00
except Error: return False # ffmpeg errors when reading bad files
except Exception as e:
logger.error(e)
logger.error(traceback.format_exc())
try:
fsize = os.path.getsize(self.filename)
return fsize > 20_000
except: pass
return True