kopia lustrzana https://github.com/bellingcat/auto-archiver
updating example config
rodzic
1970fa3c82
commit
d347b26d37
|
@ -1,22 +1,21 @@
|
||||||
steps:
|
steps:
|
||||||
# only 1 feeder allowed
|
# only 1 feeder allowed
|
||||||
# feeder: cli_feeder # default feeder
|
feeder: gsheet_feeder # defaults to cli_feeder
|
||||||
feeder: gsheet_feeder # default -> only expects URL from CLI
|
archivers: # order matters, uncomment to activate
|
||||||
archivers: # order matters
|
|
||||||
# - vk_archiver
|
# - vk_archiver
|
||||||
# - telethon_archiver
|
# - telethon_archiver
|
||||||
# - telegram_archiver
|
# - telegram_archiver
|
||||||
# - twitter_archiver
|
# - twitter_archiver
|
||||||
# - twitter_api_archiver
|
# - twitter_api_archiver
|
||||||
# - instagram_archiver
|
|
||||||
# - instagram_tbot_archiver
|
# - instagram_tbot_archiver
|
||||||
|
# - instagram_archiver
|
||||||
# - tiktok_archiver
|
# - tiktok_archiver
|
||||||
- youtubedl_archiver
|
- youtubedl_archiver
|
||||||
# - wayback_archiver_enricher
|
- wayback_archiver_enricher
|
||||||
enrichers:
|
enrichers:
|
||||||
- hash_enricher
|
- hash_enricher
|
||||||
- screenshot_enricher
|
# - screenshot_enricher
|
||||||
- thumbnail_enricher
|
# - thumbnail_enricher
|
||||||
# - wayback_archiver_enricher
|
# - wayback_archiver_enricher
|
||||||
# - wacz_enricher
|
# - wacz_enricher
|
||||||
|
|
||||||
|
@ -26,16 +25,18 @@ steps:
|
||||||
# - s3_storage
|
# - s3_storage
|
||||||
# - gdrive_storage
|
# - gdrive_storage
|
||||||
databases:
|
databases:
|
||||||
# - console_db
|
- console_db
|
||||||
# - csv_db
|
# - csv_db
|
||||||
- gsheet_db
|
# - gsheet_db
|
||||||
# - mongo_db
|
# - mongo_db
|
||||||
|
|
||||||
configurations:
|
configurations:
|
||||||
gsheet_feeder:
|
gsheet_feeder:
|
||||||
sheet: auto-archiver-test
|
sheet: "your sheet name"
|
||||||
header: 2 # defaults to 1 in GSheetsFeeder
|
header: 1
|
||||||
service_account: "secrets/service_account.json"
|
service_account: "secrets/service_account.json"
|
||||||
|
# allow_worksheets: "only parse this worksheet"
|
||||||
|
# block_worksheets: "blocked sheet 1,blocked sheet 2"
|
||||||
use_sheet_names_in_stored_paths: false
|
use_sheet_names_in_stored_paths: false
|
||||||
columns:
|
columns:
|
||||||
url: link
|
url: link
|
||||||
|
@ -53,27 +54,70 @@ configurations:
|
||||||
hash: hash
|
hash: hash
|
||||||
wacz: wacz
|
wacz: wacz
|
||||||
replaywebpage: replaywebpage
|
replaywebpage: replaywebpage
|
||||||
|
instagram_tbot_archiver:
|
||||||
|
api_id: "TELEGRAM_BOT_API_ID"
|
||||||
|
api_hash: "TELEGRAM_BOT_API_HASH"
|
||||||
|
# session_file: "secrets/anon"
|
||||||
|
telethon_archiver:
|
||||||
|
api_id: "TELEGRAM_BOT_API_ID"
|
||||||
|
api_hash: "TELEGRAM_BOT_API_HASH"
|
||||||
|
# session_file: "secrets/anon"
|
||||||
|
join_channels: false
|
||||||
|
channel_invites: # if you want to archive from private channels
|
||||||
|
- invite: https://t.me/+123456789
|
||||||
|
id: 0000000001
|
||||||
|
- invite: https://t.me/+123456788
|
||||||
|
id: 0000000002
|
||||||
|
|
||||||
|
twitter_api_archiver:
|
||||||
|
# either bearer_token only
|
||||||
|
bearer_token: "TWITTER_BEARER_TOKEN"
|
||||||
|
# OR all of the below
|
||||||
|
# consumer_key: ""
|
||||||
|
# consumer_secret: ""
|
||||||
|
# access_token: ""
|
||||||
|
# access_secret: ""
|
||||||
|
instagram_archiver:
|
||||||
|
username: "INSTAGRAM_USERNAME"
|
||||||
|
password: "INSTAGRAM_PASSWORD"
|
||||||
|
# session_file: "secrets/instaloader.session"
|
||||||
|
|
||||||
|
vk_archiver:
|
||||||
|
username: "or phone number"
|
||||||
|
password: "vk pass"
|
||||||
|
session_file: "secrets/vk_config.v2.json"
|
||||||
|
|
||||||
screenshot_enricher:
|
screenshot_enricher:
|
||||||
width: 1280
|
width: 1280
|
||||||
height: 2300
|
height: 2300
|
||||||
wayback_archiver_enricher:
|
wayback_archiver_enricher:
|
||||||
timeout: 10
|
timeout: 10
|
||||||
key: ""
|
key: "wayback key"
|
||||||
secret: ""
|
secret: "wayback secret"
|
||||||
hash_enricher:
|
hash_enricher:
|
||||||
algorithm: "SHA3-512"
|
algorithm: "SHA3-512" # can also be SHA-256
|
||||||
# wacz:
|
wacz_enricher:
|
||||||
# profile: secrets/profile.tar.gz
|
profile: secrets/profile.tar.gz
|
||||||
local_storage:
|
local_storage:
|
||||||
save_to: "./local_archive"
|
save_to: "./local_archive"
|
||||||
save_absolute: true
|
save_absolute: true
|
||||||
filename_generator: static
|
filename_generator: static
|
||||||
path_generator: flat
|
path_generator: flat
|
||||||
|
s3_storage:
|
||||||
|
bucket: your-bucket-name
|
||||||
|
region: reg1
|
||||||
|
key: S3_KEY
|
||||||
|
secret: S3_SECRET
|
||||||
|
endpoint_url: "https://{region}.digitaloceanspaces.com"
|
||||||
|
cdn_url: "https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}"
|
||||||
|
# if private:true S3 urls will not be readable online
|
||||||
|
private: false
|
||||||
|
# with 'random' you can generate a random UUID for the URL instead of a predictable path, useful to still have public but unlisted files, alternative is 'default' or not omitted from config
|
||||||
|
key_path: random
|
||||||
|
|
||||||
gdrive_storage:
|
gdrive_storage:
|
||||||
path_generator: url
|
path_generator: url
|
||||||
filename_generator: random
|
filename_generator: random
|
||||||
root_folder_id: TODO
|
root_folder_id: folder_id_from_url
|
||||||
oauth_token: secrets/gd-token.json
|
oauth_token: secrets/gd-token.json # needs to be generated with scripts/create_update_gdrive_oauth_token.py
|
||||||
service_account: "secrets/service_account.json"
|
service_account: "secrets/service_account.json"
|
||||||
|
|
Ładowanie…
Reference in New Issue