Update manifests and modules

pull/183/head
erinhmclark 2025-01-24 12:58:16 +00:00
rodzic ba4b330881
commit aa7ca93a43
95 zmienionych plików z 172 dodań i 115 usunięć

Wyświetl plik

@ -0,0 +1 @@
from api_db import AAApiDb

Wyświetl plik

@ -15,7 +15,9 @@
"group_id": {"default": None, "help": "which group of users have access to the archive in case public=false as author"},
"allow_rearchive": {"default": True, "help": "if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived"},
"store_results": {"default": True, "help": "when set, will send the results to the API database."},
"tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
"tags": {"default": [], "help": "what tags to add to the archived URL",
"type": lambda val: set(val.split(",")),
}
},
"description": """
Provides integration with the Auto-Archiver API for querying and storing archival data.

Wyświetl plik

@ -2,7 +2,7 @@ from typing import Union
import requests, os
from loguru import logger
from auto_archiver.base_modules import Database
from auto_archiver.base_processors import Database
from auto_archiver.core import Metadata

Wyświetl plik

@ -0,0 +1 @@
from .atlos import AtlosStorage

Wyświetl plik

@ -15,12 +15,12 @@
"api_token": {
"default": None,
"help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
"cli_set": lambda cli_val, _: cli_val,
"type": str,
},
"atlos_url": {
"default": "https://platform.atlos.org",
"help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
"cli_set": lambda cli_val, _: cli_val,
"type": str,
},
},
"description": """

Wyświetl plik

@ -5,7 +5,7 @@ import requests
import hashlib
from auto_archiver.core import Media, Metadata
from auto_archiver.base_modules import Storage
from auto_archiver.base_processors import Storage
from auto_archiver.utils import get_atlos_config_options

Wyświetl plik

@ -0,0 +1 @@
from atlos_db import AtlosDb

Wyświetl plik

@ -11,12 +11,11 @@
"api_token": {
"default": None,
"help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
"cli_set": lambda cli_val, _: cli_val
},
"atlos_url": {
"default": "https://platform.atlos.org",
"help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
"cli_set": lambda cli_val, _: cli_val
"type": str
},
},
"description": """

Wyświetl plik

@ -6,7 +6,7 @@ from csv import DictWriter
from dataclasses import asdict
import requests
from auto_archiver.base_modules import Database
from auto_archiver.base_processors import Database
from auto_archiver.core import Metadata
from auto_archiver.utils import get_atlos_config_options

Wyświetl plik

@ -3,11 +3,11 @@ def get_atlos_config_options():
"api_token": {
"default": None,
"help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
"cli_set": lambda cli_val, _: cli_val
"type": str
},
"atlos_url": {
"default": "https://platform.atlos.org",
"help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
"cli_set": lambda cli_val, _: cli_val
"type": str
},
}

Wyświetl plik

@ -0,0 +1 @@
from .atlos_feeder import AtlosFeeder

Wyświetl plik

@ -9,12 +9,12 @@
"api_token": {
"default": None,
"help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
"cli_set": lambda cli_val, _: cli_val
"type": str
},
"atlos_url": {
"default": "https://platform.atlos.org",
"help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
"cli_set": lambda cli_val, _: cli_val
"type": str
},
},
"description": """

Wyświetl plik

@ -1,7 +1,7 @@
from loguru import logger
import requests
from auto_archiver.base_modules import Feeder
from auto_archiver.base_processors import Feeder
from auto_archiver.core import Metadata, ArchivingContext
from auto_archiver.utils import get_atlos_config_options

Wyświetl plik

@ -0,0 +1 @@
from .cli_feeder import CLIFeeder

Wyświetl plik

@ -9,7 +9,7 @@
"urls": {
"default": None,
"help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
"cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
"type": lambda val: set(val.split(",")),
},
},
"description": """

Wyświetl plik

@ -1,6 +1,6 @@
from loguru import logger
from auto_archiver.base_modules import Feeder
from auto_archiver.base_processors import Feeder
from auto_archiver.core import Metadata, ArchivingContext

Wyświetl plik

@ -0,0 +1 @@
from .console_db import ConsoleDb

Wyświetl plik

@ -1,6 +1,6 @@
from loguru import logger
from auto_archiver.base_modules import Database
from auto_archiver.base_processors import Database
from auto_archiver.core import Metadata

Wyświetl plik

@ -0,0 +1 @@
from .csv_db import CSVDb

Wyświetl plik

@ -3,7 +3,7 @@ from loguru import logger
from csv import DictWriter
from dataclasses import asdict
from auto_archiver.base_modules import Database
from auto_archiver.base_processors import Database
from auto_archiver.core import Metadata

Wyświetl plik

@ -0,0 +1 @@
from .csv_feeder import CSVFeeder

Wyświetl plik

@ -11,7 +11,7 @@
"default": None,
"help": "Path to the input file(s) to read the URLs from, comma separated. \
Input files should be formatted with one URL per line",
"cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
"type": lambda val: set(val.split(",")),
},
"column": {
"default": None,

Wyświetl plik

@ -1,7 +1,7 @@
from loguru import logger
import csv
from auto_archiver.base_modules import Feeder
from auto_archiver.base_processors import Feeder
from auto_archiver.core import Metadata, ArchivingContext
from auto_archiver.utils import url_or_none
@ -17,7 +17,7 @@ class CSVFeeder(Feeder):
"default": None,
"help": "Path to the input file(s) to read the URLs from, comma separated. \
Input files should be formatted with one URL per line",
"cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
"type": lambda val: set(val.split(",")),
},
"column": {
"default": None,

Wyświetl plik

@ -0,0 +1 @@
from .gdrive_storage import GDriveStorage

Wyświetl plik

@ -1,4 +1,4 @@
m = {
{
"name": "Google Drive Storage",
"type": ["storage"],
"requires_setup": True,
@ -12,15 +12,16 @@ m = {
],
},
"configs": {
"path_generator": {
"default": "url",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
},
"filename_generator": {
"default": "random",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
},
# TODO: get base storage configs
"path_generator": {
"default": "url",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
"choices": ["flat", "url", "random"],
},
"filename_generator": {
"default": "random",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
"choices": ["random", "static"],
},
"root_folder_id": {"default": None, "help": "root google drive folder ID to use as storage, found in URL: 'https://drive.google.com/drive/folders/FOLDER_ID'"},
"oauth_token": {"default": None, "help": "JSON filename with Google Drive OAuth token: check auto-archiver repository scripts folder for create_update_gdrive_oauth_token.py. NOTE: storage used will count towards owner of GDrive folder, therefore it is best to use oauth_token_filename over service_account."},
"service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path, same as used for Google Sheets. NOTE: storage used will count towards the developer account."},

Wyświetl plik

@ -10,7 +10,7 @@ from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
from auto_archiver.core import Media
from auto_archiver.base_modules import Storage
from auto_archiver.base_processors import Storage
class GDriveStorage(Storage):

Wyświetl plik

@ -1,6 +1,6 @@
from loguru import logger
from auto_archiver.base_modules.extractor import Extractor
from auto_archiver.base_processors.extractor import Extractor
from auto_archiver.core.metadata import Metadata, Media
from .dropin import GenericDropin, InfoExtractor

Wyświetl plik

@ -1,6 +1,6 @@
from yt_dlp.extractor.common import InfoExtractor
from auto_archiver.core.metadata import Metadata
from auto_archiver.base_modules.extractor import Extractor
from auto_archiver.base_processors.extractor import Extractor
class GenericDropin:
"""Base class for dropins for the generic extractor.

Wyświetl plik

@ -5,7 +5,7 @@ from yt_dlp.extractor.common import InfoExtractor
from loguru import logger
from auto_archiver.base_modules.extractor import Extractor
from auto_archiver.base_processors.extractor import Extractor
from ...core import Metadata, Media, ArchivingContext
class GenericExtractor(Extractor):

Wyświetl plik

@ -2,7 +2,7 @@ from typing import Type
from auto_archiver.utils import traverse_obj
from auto_archiver.core.metadata import Metadata, Media
from auto_archiver.base_modules.extractor import Extractor
from auto_archiver.base_processors.extractor import Extractor
from yt_dlp.extractor.common import InfoExtractor
from dateutil.parser import parse as parse_dt

Wyświetl plik

@ -6,7 +6,7 @@ from slugify import slugify
from auto_archiver.core.metadata import Metadata, Media
from auto_archiver.utils import UrlUtil
from auto_archiver.base_modules.extractor import Extractor
from auto_archiver.base_processors.extractor import Extractor
from .dropin import GenericDropin, InfoExtractor

Wyświetl plik

@ -0,0 +1 @@
from .gsheet_db import GsheetsDb

Wyświetl plik

@ -0,0 +1,38 @@
{
"name": "Google Sheets Database",
"type": ["database"],
"requires_setup": True,
"external_dependencies": {
"python": ["loguru", "gspread", "python-slugify"],
},
"configs": {
"allow_worksheets": {
"default": set(),
"help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
"type": lambda val: set(val.split(",")),
},
"block_worksheets": {
"default": set(),
"help": "(CSV) explicitly block some worksheets from being processed",
"type": lambda val: set(val.split(",")),
},
"use_sheet_names_in_stored_paths": {
"default": True,
"help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
}
},
"description": """
GsheetsDatabase:
Handles integration with Google Sheets for tracking archival tasks.
### Features
- Updates a Google Sheet with the status of the archived URLs, including in progress, success or failure, and method used.
- Saves metadata such as title, text, timestamp, hashes, screenshots, and media URLs to designated columns.
- Formats media-specific metadata, such as thumbnails and PDQ hashes for the sheet.
- Skips redundant updates for empty or invalid data fields.
### Notes
- Currently works only with metadata provided by GsheetFeeder.
- Requires configuration of a linked Google Sheet and appropriate API credentials.
"""
}

Wyświetl plik

@ -5,7 +5,7 @@ from urllib.parse import quote
from loguru import logger
from auto_archiver.base_modules import Database
from auto_archiver.base_processors import Database
from auto_archiver.core import Metadata, Media, ArchivingContext
from auto_archiver.utils import GWorksheet
@ -105,5 +105,4 @@ class GsheetsDb(Database):
elif self.sheet_id:
print(self.sheet_id)
return gw, row

Wyświetl plik

@ -0,0 +1 @@
from .gsheet_feeder import GsheetsFeeder

Wyświetl plik

@ -9,12 +9,12 @@
"allow_worksheets": {
"default": set(),
"help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
"cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
"type": lambda val: set(val.split(",")),
},
"block_worksheets": {
"default": set(),
"help": "(CSV) explicitly block some worksheets from being processed",
"cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
"type": lambda val: set(val.split(",")),
},
"use_sheet_names_in_stored_paths": {
"default": True,
@ -22,11 +22,7 @@
}
},
"description": """
Google Sheets Module.
Handles feeding from a google sheet as well as an optional write back to the sheet.
## GsheetsFeeder
GsheetsFeeder
A Google Sheets-based feeder for the Auto Archiver.
This reads data from Google Sheets and filters rows based on user-defined rules.
@ -41,18 +37,5 @@
### Notes
- Requires a Google Service Account JSON file for authentication. Suggested location is `secrets/gsheets_service_account.json`.
- Create the sheet using the template provided in the docs.
## GsheetsDatabase:
Handles integration with Google Sheets for tracking archival tasks.
### Features
- Updates a Google Sheet with the status of the archived URLs, including in progress, success or failure, and method used.
- Saves metadata such as title, text, timestamp, hashes, screenshots, and media URLs to designated columns.
- Formats media-specific metadata, such as thumbnails and PDQ hashes for the sheet.
- Skips redundant updates for empty or invalid data fields.
### Notes
- Currently works only with metadata provided by GsheetFeeder.
- Requires configuration of a linked Google Sheet and appropriate API credentials.
"""
}

Wyświetl plik

@ -13,7 +13,7 @@ import gspread, os
from loguru import logger
from slugify import slugify
from auto_archiver.base_modules import Feeder
from auto_archiver.base_processors import Feeder
from auto_archiver.core import Metadata, ArchivingContext
from auto_archiver.utils import Gsheets, GWorksheet

Wyświetl plik

@ -1 +1 @@
from hash_enricher import HashEnricher
from .hash_enricher import HashEnricher

Wyświetl plik

@ -10,7 +10,7 @@ making it suitable for handling large files efficiently.
import hashlib
from loguru import logger
from auto_archiver.base_modules import Enricher
from auto_archiver.base_processors import Enricher
from auto_archiver.core import Metadata, ArchivingContext

Wyświetl plik

@ -0,0 +1 @@
from .html_formatter import HtmlFormatter

Wyświetl plik

@ -1,4 +1,4 @@
m = {
{
"name": "HTML Formatter",
"type": ["formatter"],
"requires_setup": False,

Wyświetl plik

@ -9,7 +9,7 @@ import base64
from auto_archiver.version import __version__
from auto_archiver.core import Metadata, Media, ArchivingContext
from auto_archiver.base_modules import Formatter
from auto_archiver.base_processors import Formatter
from auto_archiver.modules.hash_enricher import HashEnricher
from auto_archiver.utils.misc import random_str

Wyświetl plik

@ -0,0 +1 @@
from .instagram_api_archiver import InstagramAPIExtractor

Wyświetl plik

@ -16,7 +16,7 @@ from loguru import logger
from retrying import retry
from tqdm import tqdm
from auto_archiver.base_modules import Extractor
from auto_archiver.base_processors import Extractor
from auto_archiver.core import Media
from auto_archiver.core import Metadata

Wyświetl plik

@ -0,0 +1 @@
from .instagram_archiver import InstagramExtractor

Wyświetl plik

@ -7,7 +7,7 @@ import re, os, shutil, traceback
import instaloader # https://instaloader.github.io/as-module.html
from loguru import logger
from auto_archiver.base_modules import Extractor
from auto_archiver.base_processors import Extractor
from auto_archiver.core import Metadata
from auto_archiver.core import Media

Wyświetl plik

@ -0,0 +1 @@
from .instagram_tbot_archiver import InstagramTbotExtractor

Wyświetl plik

@ -15,7 +15,7 @@ from sqlite3 import OperationalError
from loguru import logger
from telethon.sync import TelegramClient
from auto_archiver.base_modules import Extractor
from auto_archiver.base_processors import Extractor
from auto_archiver.core import Metadata, Media, ArchivingContext
from auto_archiver.utils import random_str

Wyświetl plik

@ -0,0 +1 @@
from .local import LocalStorage

Wyświetl plik

@ -1,4 +1,4 @@
m = {
{
"name": "Local Storage",
"type": ["storage"],
"requires_setup": False,
@ -9,10 +9,12 @@ m = {
"path_generator": {
"default": "url",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
"choices": ["flat", "url", "random"],
},
"filename_generator": {
"default": "random",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
"choices": ["random", "static"],
},
"save_to": {"default": "./archived", "help": "folder where to save archived content"},
"save_absolute": {"default": False, "help": "whether the path to the stored file is absolute or relative in the output result inc. formatters (WARN: leaks the file structure)"},

Wyświetl plik

@ -5,7 +5,7 @@ import os
from loguru import logger
from auto_archiver.core import Media
from auto_archiver.base_modules import Storage
from auto_archiver.base_processors import Storage
class LocalStorage(Storage):

Wyświetl plik

@ -0,0 +1 @@
from .meta_enricher import MetaEnricher

Wyświetl plik

@ -2,7 +2,7 @@ import datetime
import os
from loguru import logger
from auto_archiver.base_modules import Enricher
from auto_archiver.base_processors import Enricher
from auto_archiver.core import Metadata

Wyświetl plik

@ -0,0 +1 @@
from .metadata_enricher import MetadataEnricher

Wyświetl plik

@ -2,7 +2,7 @@ import subprocess
import traceback
from loguru import logger
from auto_archiver.base_modules import Enricher
from auto_archiver.base_processors import Enricher
from auto_archiver.core import Metadata

Wyświetl plik

@ -0,0 +1 @@
from .mute_formatter import MuteFormatter

Wyświetl plik

@ -0,0 +1 @@
from .pdq_hash_enricher import PdqHashEnricher

Wyświetl plik

@ -16,7 +16,7 @@ import numpy as np
from PIL import Image, UnidentifiedImageError
from loguru import logger
from auto_archiver.base_modules import Enricher
from auto_archiver.base_processors import Enricher
from auto_archiver.core import Metadata

Wyświetl plik

@ -0,0 +1 @@
from .s3 import S3Storage

Wyświetl plik

@ -1,4 +1,4 @@
m = {
{
"name": "S3 Storage",
"type": ["storage"],
"requires_setup": True,
@ -6,29 +6,31 @@ m = {
"python": ["boto3", "loguru"],
},
"configs": {
"path_generator": {
"default": "url",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
},
"filename_generator": {
"default": "random",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
},
"bucket": {"default": None, "help": "S3 bucket name"},
"region": {"default": None, "help": "S3 region name"},
"key": {"default": None, "help": "S3 API key"},
"secret": {"default": None, "help": "S3 API secret"},
"random_no_duplicate": {"default": False, "help": f"if set, it will override `path_generator`, `filename_generator` and `folder`. It will check if the file already exists and if so it will not upload it again. Creates a new root folder path `{NO_DUPLICATES_FOLDER}`"},
"endpoint_url": {
"default": 'https://{region}.digitaloceanspaces.com',
"help": "S3 bucket endpoint, {region} are inserted at runtime"
},
"cdn_url": {
"default": 'https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}',
"help": "S3 CDN url, {bucket}, {region} and {key} are inserted at runtime"
},
"private": {"default": False, "help": "if true S3 files will not be readable online"},
},
"path_generator": {
"default": "url",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
"choices": ["flat", "url", "random"],
},
"filename_generator": {
"default": "random",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
"choices": ["random", "static"],
},
"bucket": {"default": None, "help": "S3 bucket name"},
"region": {"default": None, "help": "S3 region name"},
"key": {"default": None, "help": "S3 API key"},
"secret": {"default": None, "help": "S3 API secret"},
"random_no_duplicate": {"default": False, "help": f"if set, it will override `path_generator`, `filename_generator` and `folder`. It will check if the file already exists and if so it will not upload it again. Creates a new root folder path `{NO_DUPLICATES_FOLDER}`"},
"endpoint_url": {
"default": 'https://{region}.digitaloceanspaces.com',
"help": "S3 bucket endpoint, {region} are inserted at runtime"
},
"cdn_url": {
"default": 'https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}',
"help": "S3 CDN url, {bucket}, {region} and {key} are inserted at runtime"
},
"private": {"default": False, "help": "if true S3 files will not be readable online"},
},
"description": """
S3Storage: A storage module for saving media files to an S3-compatible object storage.

Wyświetl plik

@ -4,7 +4,7 @@ import boto3, os
from auto_archiver.utils.misc import random_str
from auto_archiver.core import Media
from auto_archiver.base_modules import Storage
from auto_archiver.base_processors import Storage
# TODO
from auto_archiver.modules.hash_enricher import HashEnricher
from loguru import logger

Wyświetl plik

@ -0,0 +1 @@
from .screenshot_enricher import ScreenshotEnricher

Wyświetl plik

@ -5,7 +5,7 @@ import base64
from selenium.common.exceptions import TimeoutException
from auto_archiver.base_modules import Enricher
from auto_archiver.base_processors import Enricher
from auto_archiver.utils import Webdriver, UrlUtil, random_str
from auto_archiver.core import Media, Metadata, ArchivingContext

Wyświetl plik

@ -0,0 +1 @@
from .ssl_enricher import SSLEnricher

Wyświetl plik

@ -3,7 +3,7 @@ from slugify import slugify
from urllib.parse import urlparse
from loguru import logger
from auto_archiver.base_modules import Enricher
from auto_archiver.base_processors import Enricher
from auto_archiver.core import Metadata, ArchivingContext, Media

Wyświetl plik

@ -0,0 +1 @@
from .telegram_extractor import TelegramExtractor

Wyświetl plik

@ -2,7 +2,7 @@ import requests, re, html
from bs4 import BeautifulSoup
from loguru import logger
from auto_archiver.base_modules import Extractor
from auto_archiver.base_processors import Extractor
from auto_archiver.core import Metadata, Media

Wyświetl plik

@ -0,0 +1 @@
from .telethon_archiver import TelethonArchiver

Wyświetl plik

@ -1,4 +1,4 @@
# TODO rm dependency on json
import json
{
"name": "telethon_extractor",
"type": ["extractor"],
@ -19,8 +19,7 @@
"channel_invites": {
"default": {},
"help": "(JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup",
# TODO
"cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val))
"type": lambda x: json.loads(x),
}
},
"description": """

Wyświetl plik

@ -8,7 +8,7 @@ from loguru import logger
from tqdm import tqdm
import re, time, json, os
from auto_archiver.base_modules import Extractor
from auto_archiver.base_processors import Extractor
from auto_archiver.core import Metadata, Media, ArchivingContext
from auto_archiver.utils import random_str

Wyświetl plik

@ -0,0 +1 @@
from .thumbnail_enricher import ThumbnailEnricher

Wyświetl plik

@ -9,7 +9,7 @@ and identify important moments without watching the entire video.
import ffmpeg, os
from loguru import logger
from auto_archiver.base_modules import Enricher
from auto_archiver.base_processors import Enricher
from auto_archiver.core import Media, Metadata, ArchivingContext
from auto_archiver.utils.misc import random_str

Wyświetl plik

@ -0,0 +1 @@
from .timestamping_enricher import TimestampingEnricher

Wyświetl plik

@ -21,7 +21,7 @@
"http://tss.accv.es:8318/tsa"
],
"help": "List of RFC3161 Time Stamp Authorities to use, separate with commas if passed via the command line.",
"cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
"type": lambda val: set(val.split(",")),
}
},
"description": """

Wyświetl plik

@ -8,9 +8,9 @@ from certvalidator import CertificateValidator, ValidationContext
from asn1crypto import pem
import certifi
from auto_archiver.base_modules import Enricher
from auto_archiver.base_processors import Enricher
from auto_archiver.core import Metadata, ArchivingContext, Media
from auto_archiver.base_modules import Extractor
from auto_archiver.base_processors import Extractor
class TimestampingEnricher(Enricher):

Wyświetl plik

@ -0,0 +1 @@
from .twitter_api_archiver import TwitterApiExtractor

Wyświetl plik

@ -12,7 +12,7 @@
"configs": {
"bearer_token": {"default": None, "help": "[deprecated: see bearer_tokens] twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret"},
"bearer_tokens": {"default": [], "help": " a list of twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret, if provided you can still add those for better rate limits. CSV of bearer tokens if provided via the command line",
"cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))},
"type": lambda val: set(val.split(",")),},
"consumer_key": {"default": None, "help": "twitter API consumer_key"},
"consumer_secret": {"default": None, "help": "twitter API consumer_secret"},
"access_token": {"default": None, "help": "twitter API access_token"},

Wyświetl plik

@ -8,7 +8,7 @@ from loguru import logger
from pytwitter import Api
from slugify import slugify
from auto_archiver.base_modules import Extractor
from auto_archiver.base_processors import Extractor
from auto_archiver.core import Metadata,Media
class TwitterApiExtractor(Extractor):

Wyświetl plik

@ -0,0 +1 @@
from .vk_archiver import VkExtractor

Wyświetl plik

@ -2,7 +2,7 @@ from loguru import logger
from vk_url_scraper import VkScraper
from auto_archiver.utils.misc import dump_payload
from auto_archiver.base_modules import Extractor
from auto_archiver.base_processors import Extractor
from auto_archiver.core import Metadata, Media, ArchivingContext

Wyświetl plik

@ -0,0 +1 @@
from .wacz_enricher import WaczExtractorEnricher

Wyświetl plik

@ -6,7 +6,7 @@ from loguru import logger
from warcio.archiveiterator import ArchiveIterator
from auto_archiver.core import Media, Metadata, ArchivingContext
from auto_archiver.base_modules import Extractor, Enricher
from auto_archiver.base_processors import Extractor, Enricher
from auto_archiver.utils import UrlUtil, random_str

Wyświetl plik

@ -0,0 +1 @@
from .wayback_enricher import WaybackExtractorEnricher

Wyświetl plik

@ -2,7 +2,7 @@ import json
from loguru import logger
import time, requests
from auto_archiver.base_modules import Extractor, Enricher
from auto_archiver.base_processors import Extractor, Enricher
from auto_archiver.utils import UrlUtil
from auto_archiver.core import Metadata

Wyświetl plik

@ -0,0 +1 @@
from .whisper_enricher import WhisperEnricher

Wyświetl plik

@ -2,9 +2,9 @@ import traceback
import requests, time
from loguru import logger
from auto_archiver.base_modules import Enricher
from auto_archiver.base_processors import Enricher
from auto_archiver.core import Metadata, Media, ArchivingContext
from auto_archiver.modules import S3Storage
from auto_archiver.modules.s3_storage import S3Storage
class WhisperEnricher(Enricher):

Wyświetl plik

@ -1,7 +1,7 @@
import pytest
from auto_archiver.core.metadata import Metadata
from auto_archiver.base_modules.extractor import Extractor
from auto_archiver.base_processors.extractor import Extractor
class TestArchiverBase(object):
archiver_class: str = None