kopia lustrzana https://github.com/bellingcat/auto-archiver
83 wiersze
2.1 KiB
YAML
83 wiersze
2.1 KiB
YAML
steps:
|
|
# only 1 feeder allowed
|
|
# a feeder could be in an "infinite loop" for example: gsheets_infinite feeder which holds-> this could be an easy logic addiction by modifying for each to while not feeder.done() if it becomes necessary
|
|
feeder: gsheet_feeder # default -> only expects URL from CLI
|
|
archivers: # order matters
|
|
- telethon
|
|
# - tiktok
|
|
# - twitter
|
|
# - instagram
|
|
# - webarchive # this way it runs as a failsafe only
|
|
# enrichers:
|
|
# - screenshot
|
|
# - wacz
|
|
# - webarchive # this way it runs for every case, webarchive extends archiver and enrichment
|
|
# - thumbnails
|
|
formatters:
|
|
- HTMLFormater
|
|
- PdfFormater
|
|
storages:
|
|
- local_storage
|
|
- s3
|
|
databases:
|
|
- gsheets_db
|
|
- mongo_db
|
|
|
|
|
|
|
|
configurations:
|
|
global:
|
|
- save_logs: False
|
|
gsheet_feeder:
|
|
sheet: my-auto-archiver
|
|
header: 2 # defaults to 1 in GSheetsFeeder
|
|
service_account: "secrets/service_account.json"
|
|
# allow_worksheets: "allowed"
|
|
# block_worksheets: "blocked1,blocked2"
|
|
columns:
|
|
'url': 'link'
|
|
'status': 'archive status'
|
|
'folder': 'destination folder'
|
|
'archive': 'archive location'
|
|
'date': 'archive date'
|
|
'thumbnail': 'thumbnail'
|
|
'thumbnail_index': 'thumbnail index'
|
|
'timestamp': 'upload timestamp'
|
|
'title': 'upload title'
|
|
'duration': 'duration'
|
|
'screenshot': 'screenshot'
|
|
'hash': 'hash'
|
|
'wacz': 'wacz'
|
|
'replaywebpage': 'replaywebpage'
|
|
telethon:
|
|
api_id: "1234567"
|
|
api_hash: "examplehash"
|
|
session_file: "secrets/anon"
|
|
channel_invites:
|
|
- invite: https://t.me/+XXXXXXXXXXXXXX
|
|
id: 1000000000
|
|
- invite: https://t.me/joinchat/XXXXXXXXXXXXXX
|
|
id: 1000000001
|
|
|
|
tiktok:
|
|
api_keys:
|
|
- username: 1
|
|
password: 2
|
|
- username: 3
|
|
password: 4
|
|
username: "abc"
|
|
password: "123"
|
|
token: "here"
|
|
screenshot:
|
|
width: 1280
|
|
height: 4600
|
|
wacz:
|
|
profile: secrets/profile.tar.gz
|
|
webarchive:
|
|
api_key: "12345"
|
|
s3:
|
|
- bucket: 123
|
|
- region: "nyc3"
|
|
- cdn: "{region}{bucket}"
|
|
|