kopia lustrzana https://github.com/bellingcat/auto-archiver
Merge branch 'main' into wrong_steps
commit
034857075d
|
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|||
|
||||
[project]
|
||||
name = "auto-archiver"
|
||||
version = "0.13.6"
|
||||
version = "0.13.7"
|
||||
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
|
||||
|
||||
requires-python = ">=3.10,<3.13"
|
||||
|
|
|
@ -13,6 +13,7 @@ from loguru import logger
|
|||
|
||||
from auto_archiver.core.extractor import Extractor
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.utils import get_datetime_from_str
|
||||
from .dropin import GenericDropin
|
||||
|
||||
|
||||
|
@ -202,7 +203,7 @@ class GenericExtractor(Extractor):
|
|||
if not result.get("url"):
|
||||
result.set_url(url)
|
||||
|
||||
if "description" in video_data and not result.get_content():
|
||||
if "description" in video_data and not result.get("content"):
|
||||
result.set_content(video_data["description"])
|
||||
# extract comments if enabled
|
||||
if self.comments:
|
||||
|
@ -219,11 +220,14 @@ class GenericExtractor(Extractor):
|
|||
)
|
||||
|
||||
# then add the common metadata
|
||||
if timestamp := video_data.pop("timestamp", None) and not result.get("timestamp"):
|
||||
timestamp = video_data.pop("timestamp", None)
|
||||
if timestamp and not result.get("timestamp"):
|
||||
timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
|
||||
result.set_timestamp(timestamp)
|
||||
if upload_date := video_data.pop("upload_date", None) and not result.get("upload_date"):
|
||||
upload_date = datetime.datetime.strptime(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
|
||||
|
||||
upload_date = video_data.pop("upload_date", None)
|
||||
if upload_date and not result.get("upload_date"):
|
||||
upload_date = get_datetime_from_str(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
|
||||
result.set("upload_date", upload_date)
|
||||
|
||||
# then clean away any keys we don't want
|
||||
|
|
|
@ -1,13 +1,12 @@
|
|||
import re
|
||||
import mimetypes
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
from loguru import logger
|
||||
from slugify import slugify
|
||||
|
||||
from auto_archiver.core.metadata import Metadata, Media
|
||||
from auto_archiver.utils import url as UrlUtil
|
||||
from auto_archiver.utils import url as UrlUtil, get_datetime_from_str
|
||||
from auto_archiver.core.extractor import Extractor
|
||||
|
||||
from .dropin import GenericDropin, InfoExtractor
|
||||
|
@ -38,7 +37,7 @@ class Twitter(GenericDropin):
|
|||
try:
|
||||
if not tweet.get("user") or not tweet.get("created_at"):
|
||||
raise ValueError("Error retreiving post. Are you sure it exists?")
|
||||
timestamp = datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||
timestamp = get_datetime_from_str(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||
except (ValueError, KeyError) as ex:
|
||||
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
|
||||
return False
|
||||
|
|
|
@ -2,7 +2,6 @@ import json
|
|||
import re
|
||||
import mimetypes
|
||||
import requests
|
||||
from datetime import datetime
|
||||
|
||||
from loguru import logger
|
||||
from pytwitter import Api
|
||||
|
@ -10,6 +9,7 @@ from slugify import slugify
|
|||
|
||||
from auto_archiver.core import Extractor
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.utils import get_datetime_from_str
|
||||
|
||||
|
||||
class TwitterApiExtractor(Extractor):
|
||||
|
@ -91,7 +91,7 @@ class TwitterApiExtractor(Extractor):
|
|||
|
||||
result = Metadata()
|
||||
result.set_title(tweet.data.text)
|
||||
result.set_timestamp(datetime.strptime(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ"))
|
||||
result.set_timestamp(get_datetime_from_str(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ"))
|
||||
|
||||
urls = []
|
||||
if tweet.includes:
|
||||
|
|
Ładowanie…
Reference in New Issue