pull/72/head
msramalho 2023-02-09 12:32:55 +00:00
rodzic 224ebe7ee8
commit d6dbdec6ac
1 zmienionych plików z 64 dodań i 64 usunięć

Wyświetl plik

@ -1,80 +1,80 @@
steps: steps:
# only 1 feeder allowed # only 1 feeder allowed
# a feeder could be in an "infinite loop" for example: gsheets_infinite feeder which holds-> this could be an easy logic addiction by modifying for each to while not feeder.done() if it becomes necessary # feeder: cli_feeder # default feeder
feeder: gsheet_feeder # default -> only expects URL from CLI feeder: gsheet_feeder # default -> only expects URL from CLI
archivers: # order matters archivers: # order matters
- telethon # - vk_archiver
# - tiktok # - telethon_archiver
# - twitter # - telegram_archiver
# - instagram # - twitter_archiver
# - webarchive # this way it runs as a failsafe only # - twitter_api_archiver
# enrichers: # # - instagram_archiver
# - screenshot # - tiktok_archiver
# - wacz - youtubedl_enricher
# - webarchive # this way it runs for every case, webarchive extends archiver and enrichment # - wayback_archiver_enricher
# - thumbnails enrichers:
formatters: - hash_enricher
- HTMLFormater - screenshot_enricher
- PdfFormater - thumbnail_enricher
# - wayback_archiver_enricher
# - wacz_enricher
formatter: html_formatter # defaults to mute_formatter
storages: storages:
- local_storage - local_storage
- s3 # - s3_storage
# - gdrive_storage
databases: databases:
- gsheets_db # - console_db
- mongo_db # - csv_db
- gsheet_db
# - mongo_db
configurations: configurations:
global: #TODO: implement this logic, does nothing ATM
- save_logs: false
gsheet_feeder: gsheet_feeder:
sheet: my-auto-archiver sheet: auto-archiver-test
header: 2 # defaults to 1 in GSheetsFeeder header: 2 # defaults to 1 in GSheetsFeeder
service_account: "secrets/service_account.json" service_account: "secrets/service_account.json"
# allow_worksheets: "allowed" use_sheet_names_in_stored_paths: false
# block_worksheets: "blocked1,blocked2"
columns: columns:
'url': 'link' url: link
'status': 'archive status' status: archive status
'folder': 'destination folder' folder: destination folder
'archive': 'archive location' archive: archive location
'date': 'archive date' date: archive date
'thumbnail': 'thumbnail' thumbnail: thumbnail
'thumbnail_index': 'thumbnail index' thumbnail_index: thumbnail index
'timestamp': 'upload timestamp' timestamp: upload timestamp
'title': 'upload title' title: upload title
'duration': 'duration' text: textual content
'screenshot': 'screenshot' duration: duration
'hash': 'hash' screenshot: screenshot
'wacz': 'wacz' hash: hash
'replaywebpage': 'replaywebpage' wacz: wacz
telethon: replaywebpage: replaywebpage
api_id: "1234567"
api_hash: "examplehash"
session_file: "secrets/anon"
channel_invites:
- invite: https://t.me/+XXXXXXXXXXXXXX
id: 1000000000
- invite: https://t.me/joinchat/XXXXXXXXXXXXXX
id: 1000000001
tiktok: screenshot_enricher:
api_keys:
- username: 1
password: 2
- username: 3
password: 4
username: "abc"
password: "123"
token: "here"
screenshot:
width: 1280 width: 1280
height: 4600 height: 2300
wacz: wayback_archiver_enricher:
profile: secrets/profile.tar.gz timeout: 10
webarchive: key: ""
api_key: "12345" secret: ""
s3: hash_enricher:
- bucket: 123 algorithm: "SHA3-512"
- region: "nyc3" # wacz:
- cdn: "{region}{bucket}" # profile: secrets/profile.tar.gz
local_storage:
save_to: "./local_archive"
save_absolute: true
filename_generator: static
path_generator: flat
gdrive_storage:
path_generator: url
filename_generator: random
root_folder_id: TODO
oauth_token: secrets/gd-token.json
service_account: "secrets/service_account.json"