kopia lustrzana https://github.com/bellingcat/auto-archiver
Merge pull request #257 from bellingcat/standardise_parsedates
Standardise parse dates to get_datetime_from_strpull/259/head
commit
b64826dc16
|
@ -13,6 +13,7 @@ from loguru import logger
|
||||||
|
|
||||||
from auto_archiver.core.extractor import Extractor
|
from auto_archiver.core.extractor import Extractor
|
||||||
from auto_archiver.core import Metadata, Media
|
from auto_archiver.core import Metadata, Media
|
||||||
|
from auto_archiver.utils import get_datetime_from_str
|
||||||
from .dropin import GenericDropin
|
from .dropin import GenericDropin
|
||||||
|
|
||||||
|
|
||||||
|
@ -223,7 +224,7 @@ class GenericExtractor(Extractor):
|
||||||
timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
|
timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
|
||||||
result.set_timestamp(timestamp)
|
result.set_timestamp(timestamp)
|
||||||
if upload_date := video_data.pop("upload_date", None) and not result.get("upload_date"):
|
if upload_date := video_data.pop("upload_date", None) and not result.get("upload_date"):
|
||||||
upload_date = datetime.datetime.strptime(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
|
upload_date = get_datetime_from_str(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
|
||||||
result.set("upload_date", upload_date)
|
result.set("upload_date", upload_date)
|
||||||
|
|
||||||
# then clean away any keys we don't want
|
# then clean away any keys we don't want
|
||||||
|
|
|
@ -1,13 +1,12 @@
|
||||||
import re
|
import re
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
|
||||||
from auto_archiver.core.metadata import Metadata, Media
|
from auto_archiver.core.metadata import Metadata, Media
|
||||||
from auto_archiver.utils import url as UrlUtil
|
from auto_archiver.utils import url as UrlUtil, get_datetime_from_str
|
||||||
from auto_archiver.core.extractor import Extractor
|
from auto_archiver.core.extractor import Extractor
|
||||||
|
|
||||||
from .dropin import GenericDropin, InfoExtractor
|
from .dropin import GenericDropin, InfoExtractor
|
||||||
|
@ -38,7 +37,7 @@ class Twitter(GenericDropin):
|
||||||
try:
|
try:
|
||||||
if not tweet.get("user") or not tweet.get("created_at"):
|
if not tweet.get("user") or not tweet.get("created_at"):
|
||||||
raise ValueError("Error retreiving post. Are you sure it exists?")
|
raise ValueError("Error retreiving post. Are you sure it exists?")
|
||||||
timestamp = datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
timestamp = get_datetime_from_str(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||||
except (ValueError, KeyError) as ex:
|
except (ValueError, KeyError) as ex:
|
||||||
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
|
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -2,7 +2,6 @@ import json
|
||||||
import re
|
import re
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import requests
|
import requests
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from pytwitter import Api
|
from pytwitter import Api
|
||||||
|
@ -10,6 +9,7 @@ from slugify import slugify
|
||||||
|
|
||||||
from auto_archiver.core import Extractor
|
from auto_archiver.core import Extractor
|
||||||
from auto_archiver.core import Metadata, Media
|
from auto_archiver.core import Metadata, Media
|
||||||
|
from auto_archiver.utils import get_datetime_from_str
|
||||||
|
|
||||||
|
|
||||||
class TwitterApiExtractor(Extractor):
|
class TwitterApiExtractor(Extractor):
|
||||||
|
@ -91,7 +91,7 @@ class TwitterApiExtractor(Extractor):
|
||||||
|
|
||||||
result = Metadata()
|
result = Metadata()
|
||||||
result.set_title(tweet.data.text)
|
result.set_title(tweet.data.text)
|
||||||
result.set_timestamp(datetime.strptime(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ"))
|
result.set_timestamp(get_datetime_from_str(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ"))
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
if tweet.includes:
|
if tweet.includes:
|
||||||
|
|
Ładowanie…
Reference in New Issue