kopia lustrzana https://github.com/bellingcat/auto-archiver
Merge branch 'main' into wrong_steps
commit
034857075d
|
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "auto-archiver"
|
name = "auto-archiver"
|
||||||
version = "0.13.6"
|
version = "0.13.7"
|
||||||
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
|
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
|
||||||
|
|
||||||
requires-python = ">=3.10,<3.13"
|
requires-python = ">=3.10,<3.13"
|
||||||
|
|
|
@ -13,6 +13,7 @@ from loguru import logger
|
||||||
|
|
||||||
from auto_archiver.core.extractor import Extractor
|
from auto_archiver.core.extractor import Extractor
|
||||||
from auto_archiver.core import Metadata, Media
|
from auto_archiver.core import Metadata, Media
|
||||||
|
from auto_archiver.utils import get_datetime_from_str
|
||||||
from .dropin import GenericDropin
|
from .dropin import GenericDropin
|
||||||
|
|
||||||
|
|
||||||
|
@ -202,7 +203,7 @@ class GenericExtractor(Extractor):
|
||||||
if not result.get("url"):
|
if not result.get("url"):
|
||||||
result.set_url(url)
|
result.set_url(url)
|
||||||
|
|
||||||
if "description" in video_data and not result.get_content():
|
if "description" in video_data and not result.get("content"):
|
||||||
result.set_content(video_data["description"])
|
result.set_content(video_data["description"])
|
||||||
# extract comments if enabled
|
# extract comments if enabled
|
||||||
if self.comments:
|
if self.comments:
|
||||||
|
@ -219,11 +220,14 @@ class GenericExtractor(Extractor):
|
||||||
)
|
)
|
||||||
|
|
||||||
# then add the common metadata
|
# then add the common metadata
|
||||||
if timestamp := video_data.pop("timestamp", None) and not result.get("timestamp"):
|
timestamp = video_data.pop("timestamp", None)
|
||||||
|
if timestamp and not result.get("timestamp"):
|
||||||
timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
|
timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
|
||||||
result.set_timestamp(timestamp)
|
result.set_timestamp(timestamp)
|
||||||
if upload_date := video_data.pop("upload_date", None) and not result.get("upload_date"):
|
|
||||||
upload_date = datetime.datetime.strptime(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
|
upload_date = video_data.pop("upload_date", None)
|
||||||
|
if upload_date and not result.get("upload_date"):
|
||||||
|
upload_date = get_datetime_from_str(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
|
||||||
result.set("upload_date", upload_date)
|
result.set("upload_date", upload_date)
|
||||||
|
|
||||||
# then clean away any keys we don't want
|
# then clean away any keys we don't want
|
||||||
|
|
|
@ -1,13 +1,12 @@
|
||||||
import re
|
import re
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
|
||||||
from auto_archiver.core.metadata import Metadata, Media
|
from auto_archiver.core.metadata import Metadata, Media
|
||||||
from auto_archiver.utils import url as UrlUtil
|
from auto_archiver.utils import url as UrlUtil, get_datetime_from_str
|
||||||
from auto_archiver.core.extractor import Extractor
|
from auto_archiver.core.extractor import Extractor
|
||||||
|
|
||||||
from .dropin import GenericDropin, InfoExtractor
|
from .dropin import GenericDropin, InfoExtractor
|
||||||
|
@ -38,7 +37,7 @@ class Twitter(GenericDropin):
|
||||||
try:
|
try:
|
||||||
if not tweet.get("user") or not tweet.get("created_at"):
|
if not tweet.get("user") or not tweet.get("created_at"):
|
||||||
raise ValueError("Error retreiving post. Are you sure it exists?")
|
raise ValueError("Error retreiving post. Are you sure it exists?")
|
||||||
timestamp = datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
timestamp = get_datetime_from_str(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||||
except (ValueError, KeyError) as ex:
|
except (ValueError, KeyError) as ex:
|
||||||
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
|
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -2,7 +2,6 @@ import json
|
||||||
import re
|
import re
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import requests
|
import requests
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from pytwitter import Api
|
from pytwitter import Api
|
||||||
|
@ -10,6 +9,7 @@ from slugify import slugify
|
||||||
|
|
||||||
from auto_archiver.core import Extractor
|
from auto_archiver.core import Extractor
|
||||||
from auto_archiver.core import Metadata, Media
|
from auto_archiver.core import Metadata, Media
|
||||||
|
from auto_archiver.utils import get_datetime_from_str
|
||||||
|
|
||||||
|
|
||||||
class TwitterApiExtractor(Extractor):
|
class TwitterApiExtractor(Extractor):
|
||||||
|
@ -91,7 +91,7 @@ class TwitterApiExtractor(Extractor):
|
||||||
|
|
||||||
result = Metadata()
|
result = Metadata()
|
||||||
result.set_title(tweet.data.text)
|
result.set_title(tweet.data.text)
|
||||||
result.set_timestamp(datetime.strptime(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ"))
|
result.set_timestamp(get_datetime_from_str(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ"))
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
if tweet.includes:
|
if tweet.includes:
|
||||||
|
|
Ładowanie…
Reference in New Issue