kopia lustrzana https://github.com/bellingcat/auto-archiver
example
rodzic
224ebe7ee8
commit
d6dbdec6ac
|
@ -1,80 +1,80 @@
|
||||||
steps:
|
steps:
|
||||||
# only 1 feeder allowed
|
# only 1 feeder allowed
|
||||||
# a feeder could be in an "infinite loop" for example: gsheets_infinite feeder which holds-> this could be an easy logic addiction by modifying for each to while not feeder.done() if it becomes necessary
|
# feeder: cli_feeder # default feeder
|
||||||
feeder: gsheet_feeder # default -> only expects URL from CLI
|
feeder: gsheet_feeder # default -> only expects URL from CLI
|
||||||
archivers: # order matters
|
archivers: # order matters
|
||||||
- telethon
|
# - vk_archiver
|
||||||
# - tiktok
|
# - telethon_archiver
|
||||||
# - twitter
|
# - telegram_archiver
|
||||||
# - instagram
|
# - twitter_archiver
|
||||||
# - webarchive # this way it runs as a failsafe only
|
# - twitter_api_archiver
|
||||||
# enrichers:
|
# # - instagram_archiver
|
||||||
# - screenshot
|
# - tiktok_archiver
|
||||||
# - wacz
|
- youtubedl_enricher
|
||||||
# - webarchive # this way it runs for every case, webarchive extends archiver and enrichment
|
# - wayback_archiver_enricher
|
||||||
# - thumbnails
|
enrichers:
|
||||||
formatters:
|
- hash_enricher
|
||||||
- HTMLFormater
|
- screenshot_enricher
|
||||||
- PdfFormater
|
- thumbnail_enricher
|
||||||
|
# - wayback_archiver_enricher
|
||||||
|
# - wacz_enricher
|
||||||
|
|
||||||
|
formatter: html_formatter # defaults to mute_formatter
|
||||||
storages:
|
storages:
|
||||||
- local_storage
|
- local_storage
|
||||||
- s3
|
# - s3_storage
|
||||||
|
# - gdrive_storage
|
||||||
databases:
|
databases:
|
||||||
- gsheets_db
|
# - console_db
|
||||||
- mongo_db
|
# - csv_db
|
||||||
|
- gsheet_db
|
||||||
|
# - mongo_db
|
||||||
|
|
||||||
configurations:
|
configurations:
|
||||||
|
global: #TODO: implement this logic, does nothing ATM
|
||||||
|
- save_logs: false
|
||||||
gsheet_feeder:
|
gsheet_feeder:
|
||||||
sheet: my-auto-archiver
|
sheet: auto-archiver-test
|
||||||
header: 2 # defaults to 1 in GSheetsFeeder
|
header: 2 # defaults to 1 in GSheetsFeeder
|
||||||
service_account: "secrets/service_account.json"
|
service_account: "secrets/service_account.json"
|
||||||
# allow_worksheets: "allowed"
|
use_sheet_names_in_stored_paths: false
|
||||||
# block_worksheets: "blocked1,blocked2"
|
|
||||||
columns:
|
columns:
|
||||||
'url': 'link'
|
url: link
|
||||||
'status': 'archive status'
|
status: archive status
|
||||||
'folder': 'destination folder'
|
folder: destination folder
|
||||||
'archive': 'archive location'
|
archive: archive location
|
||||||
'date': 'archive date'
|
date: archive date
|
||||||
'thumbnail': 'thumbnail'
|
thumbnail: thumbnail
|
||||||
'thumbnail_index': 'thumbnail index'
|
thumbnail_index: thumbnail index
|
||||||
'timestamp': 'upload timestamp'
|
timestamp: upload timestamp
|
||||||
'title': 'upload title'
|
title: upload title
|
||||||
'duration': 'duration'
|
text: textual content
|
||||||
'screenshot': 'screenshot'
|
duration: duration
|
||||||
'hash': 'hash'
|
screenshot: screenshot
|
||||||
'wacz': 'wacz'
|
hash: hash
|
||||||
'replaywebpage': 'replaywebpage'
|
wacz: wacz
|
||||||
telethon:
|
replaywebpage: replaywebpage
|
||||||
api_id: "1234567"
|
|
||||||
api_hash: "examplehash"
|
|
||||||
session_file: "secrets/anon"
|
|
||||||
channel_invites:
|
|
||||||
- invite: https://t.me/+XXXXXXXXXXXXXX
|
|
||||||
id: 1000000000
|
|
||||||
- invite: https://t.me/joinchat/XXXXXXXXXXXXXX
|
|
||||||
id: 1000000001
|
|
||||||
|
|
||||||
tiktok:
|
screenshot_enricher:
|
||||||
api_keys:
|
|
||||||
- username: 1
|
|
||||||
password: 2
|
|
||||||
- username: 3
|
|
||||||
password: 4
|
|
||||||
username: "abc"
|
|
||||||
password: "123"
|
|
||||||
token: "here"
|
|
||||||
screenshot:
|
|
||||||
width: 1280
|
width: 1280
|
||||||
height: 4600
|
height: 2300
|
||||||
wacz:
|
wayback_archiver_enricher:
|
||||||
profile: secrets/profile.tar.gz
|
timeout: 10
|
||||||
webarchive:
|
key: ""
|
||||||
api_key: "12345"
|
secret: ""
|
||||||
s3:
|
hash_enricher:
|
||||||
- bucket: 123
|
algorithm: "SHA3-512"
|
||||||
- region: "nyc3"
|
# wacz:
|
||||||
- cdn: "{region}{bucket}"
|
# profile: secrets/profile.tar.gz
|
||||||
|
local_storage:
|
||||||
|
save_to: "./local_archive"
|
||||||
|
save_absolute: true
|
||||||
|
filename_generator: static
|
||||||
|
path_generator: flat
|
||||||
|
|
||||||
|
gdrive_storage:
|
||||||
|
path_generator: url
|
||||||
|
filename_generator: random
|
||||||
|
root_folder_id: TODO
|
||||||
|
oauth_token: secrets/gd-token.json
|
||||||
|
service_account: "secrets/service_account.json"
|
||||||
|
|
Ładowanie…
Reference in New Issue