Add Atlos feeder

pull/137/head
R. Miles McCain 2024-03-30 20:51:32 +01:00
rodzic f4827770e6
commit d063a9cd72
3 zmienionych plików z 48 dodań i 2 usunięć

3
.gitignore vendored
Wyświetl plik

@ -27,4 +27,5 @@ instaloader.session
orchestration.yaml
auto_archiver.egg-info*
logs*
*.csv
*.csv
archived/

Wyświetl plik

@ -1,3 +1,4 @@
from.feeder import Feeder
from .gsheet_feeder import GsheetsFeeder
from .cli_feeder import CLIFeeder
from .cli_feeder import CLIFeeder
from .atlos_feeder import AtlosFeeder

Wyświetl plik

@ -0,0 +1,44 @@
from loguru import logger
import requests
from . import Feeder
from ..core import Metadata, ArchivingContext
from ..utils import get_atlos_config_options
class AtlosFeeder(Feeder):
name = "atlos_feeder"
def __init__(self, config: dict) -> None:
# without this STEP.__init__ is not called
super().__init__(config)
if type(self.api_token) != str:
raise Exception("Atlos Feeder did not receive an Atlos API token")
@staticmethod
def configs() -> dict:
return get_atlos_config_options()
def __iter__(self) -> Metadata:
# Get all the urls from the Atlos API
count = 0
cursor = None
while True:
response = requests.get(
f"{self.atlos_url}/api/v2/source_material",
headers={"Authorization": f"Bearer {self.api_token}"},
params={"cursor": cursor}
)
data = response.json()
response.raise_for_status()
cursor = data["next"]
for item in data["results"]:
if item["source_url"] not in [None, ""]:
yield Metadata().set_url(item["source_url"]).set("atlos_id", item["id"])
count += 1
if len(data["results"]) == 0:
break
logger.success(f"Processed {count} URL(s)")