2022-11-11 02:08:48 +00:00
steps :
# only 1 feeder allowed
2023-06-26 15:55:05 +00:00
feeder : gsheet_feeder # defaults to cli_feeder
2023-02-17 16:26:23 +00:00
archivers : # order matters, uncomment to activate
2023-02-09 12:32:55 +00:00
# - vk_archiver
# - telethon_archiver
# - telegram_archiver
# - twitter_archiver
# - twitter_api_archiver
2024-02-29 11:40:30 +00:00
# - instagram_api_archiver
2023-02-17 16:15:25 +00:00
# - instagram_tbot_archiver
2023-02-17 16:26:23 +00:00
# - instagram_archiver
2023-02-09 12:32:55 +00:00
# - tiktok_archiver
2023-02-17 15:45:35 +00:00
- youtubedl_archiver
2023-06-26 13:25:55 +00:00
# - wayback_archiver_enricher
2023-07-27 14:42:10 +00:00
# - wacz_archiver_enricher
2023-02-09 12:32:55 +00:00
enrichers :
- hash_enricher
2023-07-28 11:46:30 +00:00
# - metadata_enricher
2023-02-17 16:26:23 +00:00
# - screenshot_enricher
# - thumbnail_enricher
2023-06-26 15:55:05 +00:00
# - wayback_archiver_enricher
2023-07-27 14:42:10 +00:00
# - wacz_archiver_enricher
2023-07-26 15:12:56 +00:00
# - pdq_hash_enricher # if you want to calculate hashes for thumbnails, include this after thumbnail_enricher
2023-02-09 12:32:55 +00:00
formatter : html_formatter # defaults to mute_formatter
2022-11-11 02:08:48 +00:00
storages :
- local_storage
2023-02-09 12:32:55 +00:00
# - s3_storage
# - gdrive_storage
2022-11-11 02:08:48 +00:00
databases :
2023-02-17 16:26:23 +00:00
- console_db
2023-02-09 12:32:55 +00:00
# - csv_db
2023-02-17 16:26:23 +00:00
# - gsheet_db
2023-02-09 12:32:55 +00:00
# - mongo_db
2022-11-11 02:08:48 +00:00
configurations :
2023-01-04 16:37:36 +00:00
gsheet_feeder :
2023-02-17 16:26:23 +00:00
sheet : "your sheet name"
header : 1
2022-11-24 15:44:25 +00:00
service_account : "secrets/service_account.json"
2023-02-17 16:26:23 +00:00
# allow_worksheets: "only parse this worksheet"
# block_worksheets: "blocked sheet 1,blocked sheet 2"
2023-02-09 12:32:55 +00:00
use_sheet_names_in_stored_paths : false
2022-11-24 15:44:25 +00:00
columns :
2023-02-09 12:32:55 +00:00
url : link
status : archive status
folder : destination folder
archive : archive location
date : archive date
thumbnail : thumbnail
timestamp : upload timestamp
title : upload title
text : textual content
screenshot : screenshot
hash : hash
2023-06-26 16:32:19 +00:00
pdq_hash : perceptual hashes
2023-02-09 12:32:55 +00:00
wacz : wacz
replaywebpage : replaywebpage
2023-02-17 16:26:23 +00:00
instagram_tbot_archiver :
api_id : "TELEGRAM_BOT_API_ID"
api_hash : "TELEGRAM_BOT_API_HASH"
# session_file: "secrets/anon"
telethon_archiver :
api_id : "TELEGRAM_BOT_API_ID"
api_hash : "TELEGRAM_BOT_API_HASH"
# session_file: "secrets/anon"
join_channels : false
channel_invites : # if you want to archive from private channels
- invite : https://t.me/+123456789
id : 0000000001
- invite : https://t.me/+123456788
id : 0000000002
twitter_api_archiver :
# either bearer_token only
bearer_token : "TWITTER_BEARER_TOKEN"
# OR all of the below
# consumer_key: ""
# consumer_secret: ""
# access_token: ""
# access_secret: ""
instagram_archiver :
username : "INSTAGRAM_USERNAME"
password : "INSTAGRAM_PASSWORD"
# session_file: "secrets/instaloader.session"
vk_archiver :
username : "or phone number"
password : "vk pass"
session_file : "secrets/vk_config.v2.json"
2022-12-14 14:01:39 +00:00
2023-02-09 12:32:55 +00:00
screenshot_enricher :
2022-11-11 02:08:48 +00:00
width : 1280
2023-02-09 12:32:55 +00:00
height : 2300
wayback_archiver_enricher :
timeout : 10
2023-02-17 16:26:23 +00:00
key : "wayback key"
secret : "wayback secret"
2023-02-09 12:32:55 +00:00
hash_enricher :
2023-02-17 16:26:23 +00:00
algorithm : "SHA3-512" # can also be SHA-256
2023-07-27 14:42:10 +00:00
wacz_archiver_enricher :
2023-02-17 16:26:23 +00:00
profile : secrets/profile.tar.gz
2023-02-09 12:32:55 +00:00
local_storage :
save_to : "./local_archive"
save_absolute : true
filename_generator : static
path_generator : flat
2023-02-17 16:26:23 +00:00
s3_storage :
bucket : your-bucket-name
region : reg1
key : S3_KEY
secret : S3_SECRET
endpoint_url : "https://{region}.digitaloceanspaces.com"
cdn_url : "https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}"
# if private:true S3 urls will not be readable online
private : false
# with 'random' you can generate a random UUID for the URL instead of a predictable path, useful to still have public but unlisted files, alternative is 'default' or not omitted from config
key_path : random
2023-02-09 12:32:55 +00:00
gdrive_storage :
path_generator : url
filename_generator : random
2023-02-17 16:26:23 +00:00
root_folder_id : folder_id_from_url
oauth_token : secrets/gd-token.json # needs to be generated with scripts/create_update_gdrive_oauth_token.py
2023-02-09 12:32:55 +00:00
service_account : "secrets/service_account.json"
2023-06-21 16:54:14 +00:00
csv_db :
csv_file : "./local_archive/db.csv"