Merge branch 'main' into wrong_steps

pull/263/head
Patrick Robertson 2025-03-20 18:44:19 +04:00
commit 034857075d
4 zmienionych plików z 13 dodań i 10 usunięć

Wyświetl plik

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[project]
name = "auto-archiver"
version = "0.13.6"
version = "0.13.7"
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
requires-python = ">=3.10,<3.13"

Wyświetl plik

@ -13,6 +13,7 @@ from loguru import logger
from auto_archiver.core.extractor import Extractor
from auto_archiver.core import Metadata, Media
from auto_archiver.utils import get_datetime_from_str
from .dropin import GenericDropin
@ -202,7 +203,7 @@ class GenericExtractor(Extractor):
if not result.get("url"):
result.set_url(url)
if "description" in video_data and not result.get_content():
if "description" in video_data and not result.get("content"):
result.set_content(video_data["description"])
# extract comments if enabled
if self.comments:
@ -219,11 +220,14 @@ class GenericExtractor(Extractor):
)
# then add the common metadata
if timestamp := video_data.pop("timestamp", None) and not result.get("timestamp"):
timestamp = video_data.pop("timestamp", None)
if timestamp and not result.get("timestamp"):
timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
result.set_timestamp(timestamp)
if upload_date := video_data.pop("upload_date", None) and not result.get("upload_date"):
upload_date = datetime.datetime.strptime(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
upload_date = video_data.pop("upload_date", None)
if upload_date and not result.get("upload_date"):
upload_date = get_datetime_from_str(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
result.set("upload_date", upload_date)
# then clean away any keys we don't want

Wyświetl plik

@ -1,13 +1,12 @@
import re
import mimetypes
import json
from datetime import datetime
from loguru import logger
from slugify import slugify
from auto_archiver.core.metadata import Metadata, Media
from auto_archiver.utils import url as UrlUtil
from auto_archiver.utils import url as UrlUtil, get_datetime_from_str
from auto_archiver.core.extractor import Extractor
from .dropin import GenericDropin, InfoExtractor
@ -38,7 +37,7 @@ class Twitter(GenericDropin):
try:
if not tweet.get("user") or not tweet.get("created_at"):
raise ValueError("Error retreiving post. Are you sure it exists?")
timestamp = datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
timestamp = get_datetime_from_str(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
except (ValueError, KeyError) as ex:
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
return False

Wyświetl plik

@ -2,7 +2,6 @@ import json
import re
import mimetypes
import requests
from datetime import datetime
from loguru import logger
from pytwitter import Api
@ -10,6 +9,7 @@ from slugify import slugify
from auto_archiver.core import Extractor
from auto_archiver.core import Metadata, Media
from auto_archiver.utils import get_datetime_from_str
class TwitterApiExtractor(Extractor):
@ -91,7 +91,7 @@ class TwitterApiExtractor(Extractor):
result = Metadata()
result.set_title(tweet.data.text)
result.set_timestamp(datetime.strptime(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ"))
result.set_timestamp(get_datetime_from_str(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ"))
urls = []
if tweet.includes: