Merge pull request #257 from bellingcat/standardise_parsedates

Standardise parse dates to get_datetime_from_str
pull/259/head
Patrick Robertson 2025-03-18 12:17:51 +00:00 zatwierdzone przez GitHub
commit b64826dc16
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
3 zmienionych plików z 6 dodań i 6 usunięć

Wyświetl plik

@ -13,6 +13,7 @@ from loguru import logger
from auto_archiver.core.extractor import Extractor from auto_archiver.core.extractor import Extractor
from auto_archiver.core import Metadata, Media from auto_archiver.core import Metadata, Media
from auto_archiver.utils import get_datetime_from_str
from .dropin import GenericDropin from .dropin import GenericDropin
@ -223,7 +224,7 @@ class GenericExtractor(Extractor):
timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat() timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
result.set_timestamp(timestamp) result.set_timestamp(timestamp)
if upload_date := video_data.pop("upload_date", None) and not result.get("upload_date"): if upload_date := video_data.pop("upload_date", None) and not result.get("upload_date"):
upload_date = datetime.datetime.strptime(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc) upload_date = get_datetime_from_str(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
result.set("upload_date", upload_date) result.set("upload_date", upload_date)
# then clean away any keys we don't want # then clean away any keys we don't want

Wyświetl plik

@ -1,13 +1,12 @@
import re import re
import mimetypes import mimetypes
import json import json
from datetime import datetime
from loguru import logger from loguru import logger
from slugify import slugify from slugify import slugify
from auto_archiver.core.metadata import Metadata, Media from auto_archiver.core.metadata import Metadata, Media
from auto_archiver.utils import url as UrlUtil from auto_archiver.utils import url as UrlUtil, get_datetime_from_str
from auto_archiver.core.extractor import Extractor from auto_archiver.core.extractor import Extractor
from .dropin import GenericDropin, InfoExtractor from .dropin import GenericDropin, InfoExtractor
@ -38,7 +37,7 @@ class Twitter(GenericDropin):
try: try:
if not tweet.get("user") or not tweet.get("created_at"): if not tweet.get("user") or not tweet.get("created_at"):
raise ValueError("Error retreiving post. Are you sure it exists?") raise ValueError("Error retreiving post. Are you sure it exists?")
timestamp = datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y") timestamp = get_datetime_from_str(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
except (ValueError, KeyError) as ex: except (ValueError, KeyError) as ex:
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}") logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
return False return False

Wyświetl plik

@ -2,7 +2,6 @@ import json
import re import re
import mimetypes import mimetypes
import requests import requests
from datetime import datetime
from loguru import logger from loguru import logger
from pytwitter import Api from pytwitter import Api
@ -10,6 +9,7 @@ from slugify import slugify
from auto_archiver.core import Extractor from auto_archiver.core import Extractor
from auto_archiver.core import Metadata, Media from auto_archiver.core import Metadata, Media
from auto_archiver.utils import get_datetime_from_str
class TwitterApiExtractor(Extractor): class TwitterApiExtractor(Extractor):
@ -91,7 +91,7 @@ class TwitterApiExtractor(Extractor):
result = Metadata() result = Metadata()
result.set_title(tweet.data.text) result.set_title(tweet.data.text)
result.set_timestamp(datetime.strptime(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ")) result.set_timestamp(get_datetime_from_str(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ"))
urls = [] urls = []
if tweet.includes: if tweet.includes: