Merge pull request #40 from bellingcat/vk-archiver

pull/33/head
Miguel Sozinho Ramalho 2022-06-16 16:18:48 +01:00 zatwierdzone przez GitHub
commit b7f1ec5404
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
17 zmienionych plików z 357 dodań i 80 usunięć

3
.gitignore vendored
Wyświetl plik

@ -15,4 +15,5 @@ config-*.json
config.yaml config.yaml
config-*.yaml config-*.yaml
logs/* logs/*
local_archive/ local_archive/
vk_config*.json

Wyświetl plik

@ -22,6 +22,8 @@ google-auth-oauthlib = "*"
oauth2client = "*" oauth2client = "*"
python-slugify = "*" python-slugify = "*"
pyyaml = "*" pyyaml = "*"
vk-api = "*"
dateparser = "*"
[requires] [requires]
python_version = "3.9" python_version = "3.9"

166
Pipfile.lock wygenerowano
Wyświetl plik

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "602a05a8fa475181c24714ab57188a417fdfddf373a7dab4fa0ba0fcb7ce8d0a" "sha256": "d06498403429a8fffcd6d049b314872c0095abee7fb9c6ffd3ba3d7b0c31c8cd"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
@ -50,19 +50,19 @@
}, },
"boto3": { "boto3": {
"hashes": [ "hashes": [
"sha256:28ab0947c49a6fb2409004d4a10b2828aec231cb95ca1d800cb1411e191cc201", "sha256:0821212ff521cb934801b1f655cef3c0e976775324b1018f1751700d0f42dbb4",
"sha256:833e67edfb73f2cc22ff27a1c33728686dc90a9e81ba2551f9462ea2d1b04f41" "sha256:87d34861727699c795bf8d65703f2435e75f12879bdd483e08b35b7c5510e8c8"
], ],
"index": "pypi", "index": "pypi",
"version": "==1.24.8" "version": "==1.24.9"
}, },
"botocore": { "botocore": {
"hashes": [ "hashes": [
"sha256:ad92702930d6cb7b587fc2f619672feb74d5218f8de387a28c2905820db79027", "sha256:5669b982b0583e73daef1fe0a4df311055e6287326f857dbb1dcc2de1d8412ad",
"sha256:db6667b8dfd175d16187653942cd91dd1f0cf36adc0ea9d7a0805ba4d2a3321f" "sha256:7a7588b0170e571317496ac4104803329d5bc792bc008e8a757ffd440f1b6fa6"
], ],
"markers": "python_version >= '3.7'", "markers": "python_version >= '3.7'",
"version": "==1.27.8" "version": "==1.27.9"
}, },
"brotli": { "brotli": {
"hashes": [ "hashes": [
@ -152,7 +152,7 @@
"sha256:9c5705e395cd70084351dd8ad5c41e65655e08ce46f2ec9cf6c2c08390f71eb7", "sha256:9c5705e395cd70084351dd8ad5c41e65655e08ce46f2ec9cf6c2c08390f71eb7",
"sha256:f1d53542ee8cbedbe2118b5686372fb33c297fcd6379b050cca0ef13a597382a" "sha256:f1d53542ee8cbedbe2118b5686372fb33c297fcd6379b050cca0ef13a597382a"
], ],
"markers": "python_version >= '3.6'", "markers": "python_full_version >= '3.6.0'",
"version": "==2022.5.18.1" "version": "==2022.5.18.1"
}, },
"cffi": { "cffi": {
@ -267,6 +267,14 @@
], ],
"version": "==37.0.2" "version": "==37.0.2"
}, },
"dateparser": {
"hashes": [
"sha256:038196b1f12c7397e38aad3d61588833257f6f552baa63a1499e6987fa8d42d9",
"sha256:9600874312ff28a41f96ec7ccdc73be1d1c44435719da47fea3339d55ff5a628"
],
"index": "pypi",
"version": "==1.1.1"
},
"ffmpeg-python": { "ffmpeg-python": {
"hashes": [ "hashes": [
"sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127", "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127",
@ -303,7 +311,7 @@
"sha256:958024c6aa3460b08f35741231076a4dd9a4c819a6a39d44da9627febe8b28f0", "sha256:958024c6aa3460b08f35741231076a4dd9a4c819a6a39d44da9627febe8b28f0",
"sha256:ce1daa49644b50398093d2a9ad886501aa845e2602af70c3001b9f402a9d7359" "sha256:ce1daa49644b50398093d2a9ad886501aa845e2602af70c3001b9f402a9d7359"
], ],
"markers": "python_version >= '3.6'", "markers": "python_full_version >= '3.6.0'",
"version": "==2.8.1" "version": "==2.8.1"
}, },
"google-api-python-client": { "google-api-python-client": {
@ -316,11 +324,11 @@
}, },
"google-auth": { "google-auth": {
"hashes": [ "hashes": [
"sha256:8a954960f852d5f19e6af14dd8e75c20159609e85d8db37e4013cc8c3824a7e1", "sha256:819b70140d05501739e1387291d39f0de3b4dff3b00ae4aff8e7a05369957f89",
"sha256:df549a1433108801b11bdcc0e312eaf0d5f0500db42f0523e4d65c78722e8475" "sha256:9b1da39ab8731c3061f36fefde9f8bb902dbee9eb28e3a67e8cfa7dc1be76227"
], ],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==2.7.0" "version": "==2.8.0"
}, },
"google-auth-httplib2": { "google-auth-httplib2": {
"hashes": [ "hashes": [
@ -343,7 +351,7 @@
"sha256:023eaea9d8c1cceccd9587c6af6c20f33eeeb05d4148670f2b0322dc1511700c", "sha256:023eaea9d8c1cceccd9587c6af6c20f33eeeb05d4148670f2b0322dc1511700c",
"sha256:b09b56f5463070c2153753ef123f07d2e49235e89148e9b2459ec8ed2f68d7d3" "sha256:b09b56f5463070c2153753ef123f07d2e49235e89148e9b2459ec8ed2f68d7d3"
], ],
"markers": "python_version >= '3.6'", "markers": "python_full_version >= '3.6.0'",
"version": "==1.56.2" "version": "==1.56.2"
}, },
"gspread": { "gspread": {
@ -359,7 +367,7 @@
"sha256:70813c1135087a248a4d38cc0e1a0181ffab2188141a93eaf567940c3957ff06", "sha256:70813c1135087a248a4d38cc0e1a0181ffab2188141a93eaf567940c3957ff06",
"sha256:8ddd78563b633ca55346c8cd41ec0af27d3c79931828beffb46ce70a379e7442" "sha256:8ddd78563b633ca55346c8cd41ec0af27d3c79931828beffb46ce70a379e7442"
], ],
"markers": "python_version >= '3.6'", "markers": "python_full_version >= '3.6.0'",
"version": "==0.13.0" "version": "==0.13.0"
}, },
"httplib2": { "httplib2": {
@ -554,7 +562,7 @@
"sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2", "sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2",
"sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe" "sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe"
], ],
"markers": "python_version >= '3.6'", "markers": "python_full_version >= '3.6.0'",
"version": "==3.2.0" "version": "==3.2.0"
}, },
"outcome": { "outcome": {
@ -682,7 +690,7 @@
"sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb", "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb",
"sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519" "sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519"
], ],
"markers": "python_version >= '3.6'", "markers": "python_full_version >= '3.6.0'",
"version": "==2.12.0" "version": "==2.12.0"
}, },
"pyopenssl": { "pyopenssl": {
@ -724,6 +732,21 @@
"index": "pypi", "index": "pypi",
"version": "==6.1.2" "version": "==6.1.2"
}, },
"pytz": {
"hashes": [
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
"sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"
],
"version": "==2022.1"
},
"pytz-deprecation-shim": {
"hashes": [
"sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6",
"sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==0.1.0.post0"
},
"pyyaml": { "pyyaml": {
"hashes": [ "hashes": [
"sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
@ -763,7 +786,88 @@
"index": "pypi", "index": "pypi",
"version": "==6.0" "version": "==6.0"
}, },
"regex": {
"hashes": [
"sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14",
"sha256:03299b0bcaa7824eb7c0ebd7ef1e3663302d1b533653bfe9dc7e595d453e2ae9",
"sha256:06b1df01cf2aef3a9790858af524ae2588762c8a90e784ba00d003f045306204",
"sha256:09b4b6ccc61d4119342b26246ddd5a04accdeebe36bdfe865ad87a0784efd77f",
"sha256:0be0c34a39e5d04a62fd5342f0886d0e57592a4f4993b3f9d257c1f688b19737",
"sha256:0d96eec8550fd2fd26f8e675f6d8b61b159482ad8ffa26991b894ed5ee19038b",
"sha256:0eb0e2845e81bdea92b8281a3969632686502565abf4a0b9e4ab1471c863d8f3",
"sha256:13bbf0c9453c6d16e5867bda7f6c0c7cff1decf96c5498318bb87f8136d2abd4",
"sha256:17e51ad1e6131c496b58d317bc9abec71f44eb1957d32629d06013a21bc99cac",
"sha256:1977bb64264815d3ef016625adc9df90e6d0e27e76260280c63eca993e3f455f",
"sha256:1e30762ddddb22f7f14c4f59c34d3addabc789216d813b0f3e2788d7bcf0cf29",
"sha256:1e73652057473ad3e6934944af090852a02590c349357b79182c1b681da2c772",
"sha256:20e6a27959f162f979165e496add0d7d56d7038237092d1aba20b46de79158f1",
"sha256:286ff9ec2709d56ae7517040be0d6c502642517ce9937ab6d89b1e7d0904f863",
"sha256:297c42ede2c81f0cb6f34ea60b5cf6dc965d97fa6936c11fc3286019231f0d66",
"sha256:320c2f4106962ecea0f33d8d31b985d3c185757c49c1fb735501515f963715ed",
"sha256:35ed2f3c918a00b109157428abfc4e8d1ffabc37c8f9abc5939ebd1e95dabc47",
"sha256:3d146e5591cb67c5e836229a04723a30af795ef9b70a0bbd913572e14b7b940f",
"sha256:42bb37e2b2d25d958c25903f6125a41aaaa1ed49ca62c103331f24b8a459142f",
"sha256:42d6007722d46bd2c95cce700181570b56edc0dcbadbfe7855ec26c3f2d7e008",
"sha256:43eba5c46208deedec833663201752e865feddc840433285fbadee07b84b464d",
"sha256:452519bc4c973e961b1620c815ea6dd8944a12d68e71002be5a7aff0a8361571",
"sha256:4b9c16a807b17b17c4fa3a1d8c242467237be67ba92ad24ff51425329e7ae3d0",
"sha256:5510932596a0f33399b7fff1bd61c59c977f2b8ee987b36539ba97eb3513584a",
"sha256:55820bc631684172b9b56a991d217ec7c2e580d956591dc2144985113980f5a3",
"sha256:57484d39447f94967e83e56db1b1108c68918c44ab519b8ecfc34b790ca52bf7",
"sha256:58ba41e462653eaf68fc4a84ec4d350b26a98d030be1ab24aba1adcc78ffe447",
"sha256:5bc5f921be39ccb65fdda741e04b2555917a4bced24b4df14eddc7569be3b493",
"sha256:5dcc4168536c8f68654f014a3db49b6b4a26b226f735708be2054314ed4964f4",
"sha256:5f92a7cdc6a0ae2abd184e8dfd6ef2279989d24c85d2c85d0423206284103ede",
"sha256:67250b36edfa714ba62dc62d3f238e86db1065fccb538278804790f578253640",
"sha256:6df070a986fc064d865c381aecf0aaff914178fdf6874da2f2387e82d93cc5bd",
"sha256:729aa8ca624c42f309397c5fc9e21db90bf7e2fdd872461aabdbada33de9063c",
"sha256:72bc3a5effa5974be6d965ed8301ac1e869bc18425c8a8fac179fbe7876e3aee",
"sha256:74d86e8924835f863c34e646392ef39039405f6ce52956d8af16497af4064a30",
"sha256:79e5af1ff258bc0fe0bdd6f69bc4ae33935a898e3cbefbbccf22e88a27fa053b",
"sha256:7b103dffb9f6a47ed7ffdf352b78cfe058b1777617371226c1894e1be443afec",
"sha256:83f03f0bd88c12e63ca2d024adeee75234d69808b341e88343b0232329e1f1a1",
"sha256:86d7a68fa53688e1f612c3246044157117403c7ce19ebab7d02daf45bd63913e",
"sha256:878c626cbca3b649e14e972c14539a01191d79e58934e3f3ef4a9e17f90277f8",
"sha256:878f5d649ba1db9f52cc4ef491f7dba2d061cdc48dd444c54260eebc0b1729b9",
"sha256:87bc01226cd288f0bd9a4f9f07bf6827134dc97a96c22e2d28628e824c8de231",
"sha256:8babb2b5751105dc0aef2a2e539f4ba391e738c62038d8cb331c710f6b0f3da7",
"sha256:91e0f7e7be77250b808a5f46d90bf0032527d3c032b2131b63dee54753a4d729",
"sha256:9557545c10d52c845f270b665b52a6a972884725aa5cf12777374e18f2ea8960",
"sha256:9ccb0a4ab926016867260c24c192d9df9586e834f5db83dfa2c8fffb3a6e5056",
"sha256:9d828c5987d543d052b53c579a01a52d96b86f937b1777bbfe11ef2728929357",
"sha256:9efa41d1527b366c88f265a227b20bcec65bda879962e3fc8a2aee11e81266d7",
"sha256:aaf5317c961d93c1a200b9370fb1c6b6836cc7144fef3e5a951326912bf1f5a3",
"sha256:ab69b4fe09e296261377d209068d52402fb85ef89dc78a9ac4a29a895f4e24a7",
"sha256:ad397bc7d51d69cb07ef89e44243f971a04ce1dca9bf24c992c362406c0c6573",
"sha256:ae17fc8103f3b63345709d3e9654a274eee1c6072592aec32b026efd401931d0",
"sha256:af4d8cc28e4c7a2f6a9fed544228c567340f8258b6d7ea815b62a72817bbd178",
"sha256:b22ff939a8856a44f4822da38ef4868bd3a9ade22bb6d9062b36957c850e404f",
"sha256:b549d851f91a4efb3e65498bd4249b1447ab6035a9972f7fc215eb1f59328834",
"sha256:be319f4eb400ee567b722e9ea63d5b2bb31464e3cf1b016502e3ee2de4f86f5c",
"sha256:c0446b2871335d5a5e9fcf1462f954586b09a845832263db95059dcd01442015",
"sha256:c68d2c04f7701a418ec2e5631b7f3552efc32f6bcc1739369c6eeb1af55f62e0",
"sha256:c87ac58b9baaf50b6c1b81a18d20eda7e2883aa9a4fb4f1ca70f2e443bfcdc57",
"sha256:caa2734ada16a44ae57b229d45091f06e30a9a52ace76d7574546ab23008c635",
"sha256:cb34c2d66355fb70ae47b5595aafd7218e59bb9c00ad8cc3abd1406ca5874f07",
"sha256:cb3652bbe6720786b9137862205986f3ae54a09dec8499a995ed58292bdf77c2",
"sha256:cf668f26604e9f7aee9f8eaae4ca07a948168af90b96be97a4b7fa902a6d2ac1",
"sha256:d326ff80ed531bf2507cba93011c30fff2dd51454c85f55df0f59f2030b1687b",
"sha256:d6c2441538e4fadd4291c8420853431a229fcbefc1bf521810fbc2629d8ae8c2",
"sha256:d6ecfd1970b3380a569d7b3ecc5dd70dba295897418ed9e31ec3c16a5ab099a5",
"sha256:e5602a9b5074dcacc113bba4d2f011d2748f50e3201c8139ac5b68cf2a76bd8b",
"sha256:ef806f684f17dbd6263d72a54ad4073af42b42effa3eb42b877e750c24c76f86",
"sha256:f3356afbb301ec34a500b8ba8b47cba0b44ed4641c306e1dd981a08b416170b5",
"sha256:f6f7ee2289176cb1d2c59a24f50900f8b9580259fa9f1a739432242e7d254f93",
"sha256:f7e8f1ee28e0a05831c92dc1c0c1c94af5289963b7cf09eca5b5e3ce4f8c91b0",
"sha256:f8169ec628880bdbca67082a9196e2106060a4a5cbd486ac51881a4df805a36f",
"sha256:fbc88d3ba402b5d041d204ec2449c4078898f89c4a6e6f0ed1c1a510ef1e221d",
"sha256:fbd3fe37353c62fd0eb19fb76f78aa693716262bcd5f9c14bb9e5aca4b3f0dc4"
],
"markers": "python_full_version >= '3.6.0'",
"version": "==2022.3.2"
},
"requests": { "requests": {
"extras": [],
"hashes": [ "hashes": [
"sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f", "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f",
"sha256:d568723a7ebd25875d8d1eaf5dfa068cd2fc8194b2e483d7b1f7c81918dbec6b" "sha256:d568723a7ebd25875d8d1eaf5dfa068cd2fc8194b2e483d7b1f7c81918dbec6b"
@ -799,7 +903,7 @@
"sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17", "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17",
"sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb" "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb"
], ],
"markers": "python_version >= '3.6' and python_version < '4'", "markers": "python_version < '4' and python_full_version >= '3.6.0'",
"version": "==4.8" "version": "==4.8"
}, },
"s3transfer": { "s3transfer": {
@ -853,7 +957,7 @@
"sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759", "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759",
"sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d" "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"
], ],
"markers": "python_version >= '3.6'", "markers": "python_full_version >= '3.6.0'",
"version": "==2.3.2.post1" "version": "==2.3.2.post1"
}, },
"telethon": { "telethon": {
@ -902,12 +1006,28 @@
"markers": "python_version >= '3.5'", "markers": "python_version >= '3.5'",
"version": "==0.9.2" "version": "==0.9.2"
}, },
"tzdata": {
"hashes": [
"sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9",
"sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3"
],
"markers": "python_full_version >= '3.6.0'",
"version": "==2022.1"
},
"tzlocal": {
"hashes": [
"sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745",
"sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7"
],
"markers": "python_full_version >= '3.6.0'",
"version": "==4.2"
},
"uritemplate": { "uritemplate": {
"hashes": [ "hashes": [
"sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0", "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0",
"sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e" "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"
], ],
"markers": "python_version >= '3.6'", "markers": "python_full_version >= '3.6.0'",
"version": "==4.1.1" "version": "==4.1.1"
}, },
"urllib3": { "urllib3": {
@ -922,6 +1042,14 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.26.9" "version": "==1.26.9"
}, },
"vk-api": {
"hashes": [
"sha256:11c731e214ebc7fa911db81efb021f97587493a5402b992f24748fe1cd9d7afc",
"sha256:d0ae766fa93a40d47c5da045d94201721bf766dbde122a1d2253516b35c5edf3"
],
"index": "pypi",
"version": "==11.9.8"
},
"websockets": { "websockets": {
"hashes": [ "hashes": [
"sha256:07cdc0a5b2549bcfbadb585ad8471ebdc7bdf91e32e34ae3889001c1c106a6af", "sha256:07cdc0a5b2549bcfbadb585ad8471ebdc7bdf91e32e34ae3889001c1c106a6af",

Wyświetl plik

@ -140,7 +140,7 @@ With this configuration, the archiver should archive and store all media added t
# auto_auto_archiver # auto_auto_archiver
To make it easier to set up new auto-archiver sheets, the auto-auto-archiver will look at a particular sheet and run the auto-archiver on every sheet name in column A, starting from row 11. (It starts here to support instructional text in the first rows of the sheet, as shown below.) This script takes one command line argument, with `--sheet`, the name of the sheet. It must be shared with the same service account. To make it easier to set up new auto-archiver sheets, the auto-auto-archiver will look at a particular sheet and run the auto-archiver on every sheet name in column A, starting from row 11. (It starts here to support instructional text in the first rows of the sheet, as shown below.) You can simply use your default config as for `auto_archiver.py` but use `--sheet` to specify the name of the sheet that lists the names of sheets to archive.It must be shared with the same service account.
![A screenshot of a Google Spreadsheet configured to show instructional text and a list of sheet names to check with auto-archiver.](docs/auto-auto.png) ![A screenshot of a Google Spreadsheet configured to show instructional text and a list of sheet names to check with auto-archiver.](docs/auto-auto.png)
@ -152,15 +152,16 @@ Code is split into functional concepts:
1. [GWorksheet](utils/gworksheet.py) - facilitates some of the reading/writing tasks for a Google Worksheet 1. [GWorksheet](utils/gworksheet.py) - facilitates some of the reading/writing tasks for a Google Worksheet
### Current Archivers ### Current Archivers
Archivers are tested in a meaningful order with Wayback Machine being the default, that can easily be changed in the code. Archivers are tested in a meaningful order with Wayback Machine being the failsafe, that can easily be changed in the code.
```mermaid ```mermaid
graph TD graph TD
A(Archiver) -->|parent of| B(YoutubeDLArchiver) A(Archiver) -->|parent of| B(TelethonArchiver)
A -->|parent of| C(TikTokArchiver) A -->|parent of| C(TiktokArchiver)
A -->|parent of| D(TwitterArchiver) A -->|parent of| D(YoutubeDLArchiver)
A -->|parent of| E(TelegramArchiver) A -->|parent of| E(TelegramArchiver)
A -->|parent of| F(TelethonArchiver) A -->|parent of| F(TwitterArchiver)
A -->|parent of| G(WaybackArchiver) A -->|parent of| G(VkArchiver)
A -->|parent of| H(WaybackArchiver)
``` ```
### Current Storages ### Current Storages
```mermaid ```mermaid

Wyświetl plik

@ -5,4 +5,5 @@ from .telethon_archiver import TelethonArchiver
from .tiktok_archiver import TiktokArchiver from .tiktok_archiver import TiktokArchiver
from .wayback_archiver import WaybackArchiver from .wayback_archiver import WaybackArchiver
from .youtubedl_archiver import YoutubeDLArchiver from .youtubedl_archiver import YoutubeDLArchiver
from .twitter_archiver import TwitterArchiver from .twitter_archiver import TwitterArchiver
from .vk_archiver import VkArchiver

Wyświetl plik

@ -1,4 +1,4 @@
import os, datetime, shutil, hashlib, time, requests, re import os, datetime, shutil, hashlib, time, requests, re, mimetypes
from dataclasses import dataclass from dataclasses import dataclass
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from urllib.parse import urlparse from urllib.parse import urlparse
@ -58,7 +58,13 @@ class Archiver(ABC):
<h3><a href="{url}">{url}</a></h3><ul>''' <h3><a href="{url}">{url}</a></h3><ul>'''
for url_info in urls_info: for url_info in urls_info:
page += f'''<li><a href="{url_info['cdn_url']}">{url_info['key']}</a>: {url_info['hash']}</li>''' mime_global = self._guess_file_type(url_info["key"])
preview = ""
if mime_global == "image":
preview = f'<img src="{url_info["cdn_url"]}" style="max-height:200px;max-width:400px;"></img>'
elif mime_global == "video":
preview = f'<video src="{url_info["cdn_url"]}" controls style="max-height:400px;max-width:400px;"></video>'
page += f'''<li><a href="{url_info['cdn_url']}">{preview}{url_info['key']}</a>: {url_info['hash']}</li>'''
page += f"</ul><h2>{self.name} object data:</h2><code>{object}</code>" page += f"</ul><h2>{self.name} object data:</h2><code>{object}</code>"
page += f"</body></html>" page += f"</body></html>"
@ -77,7 +83,18 @@ class Archiver(ABC):
page_cdn = self.storage.get_cdn_url(page_key) page_cdn = self.storage.get_cdn_url(page_key)
return (page_cdn, page_hash, thumbnail) return (page_cdn, page_hash, thumbnail)
def _guess_file_type(self, path: str):
"""
Receives a URL or filename and returns global mimetype like 'image' or 'video'
see https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
"""
mime = mimetypes.guess_type(path)[0]
if mime is not None:
return mime.split("/")[0]
return ""
# eg images in a tweet save to cloud storage # eg images in a tweet save to cloud storage
def generate_media_page(self, urls, url, object): def generate_media_page(self, urls, url, object):
""" """
For a list of media urls, fetch them, upload them For a list of media urls, fetch them, upload them
@ -208,12 +225,11 @@ class Archiver(ABC):
key = key_folder + fname key = key_folder + fname
self.storage.upload(thumbnail_filename, key) self.storage.upload(thumbnail_filename, key)
cdn_url = self.storage.get_cdn_url(key) cdn_url = self.storage.get_cdn_url(key)
cdn_urls.append(cdn_url) cdn_urls.append(cdn_url)
if len(cdn_urls) == 0: if len(cdn_urls) == 0:
return ('None', 'None') return ('', '')
key_thumb = cdn_urls[int(len(cdn_urls) * 0.1)] key_thumb = cdn_urls[int(len(cdn_urls) * 0.1)]

Wyświetl plik

@ -53,7 +53,6 @@ class TelegramArchiver(Archiver):
key = self.get_key(video_id) key = self.get_key(video_id)
filename = os.path.join(Storage.TMP_FOLDER, key) filename = os.path.join(Storage.TMP_FOLDER, key)
cdn_url = self.storage.get_cdn_url(key)
if check_if_exists and self.storage.exists(key): if check_if_exists and self.storage.exists(key):
status = 'already archived' status = 'already archived'
@ -84,5 +83,6 @@ class TelegramArchiver(Archiver):
filename, key, duration=duration) filename, key, duration=duration)
os.remove(filename) os.remove(filename)
cdn_url = self.storage.get_cdn_url(key)
return ArchiveResult(status=status, cdn_url=cdn_url, thumbnail=key_thumb, thumbnail_index=thumb_index, return ArchiveResult(status=status, cdn_url=cdn_url, thumbnail=key_thumb, thumbnail_index=thumb_index,
duration=duration, title=original_url, timestamp=s.find_all('time')[1].get('datetime'), hash=hash, screenshot=screenshot) duration=duration, title=original_url, timestamp=s.find_all('time')[1].get('datetime'), hash=hash, screenshot=screenshot)

Wyświetl plik

@ -8,6 +8,7 @@ from telethon.errors import ChannelInvalidError
from storages import Storage from storages import Storage
from .base_archiver import Archiver, ArchiveResult from .base_archiver import Archiver, ArchiveResult
from configs import TelethonConfig from configs import TelethonConfig
from utils import getattr_or
class TelethonArchiver(Archiver): class TelethonArchiver(Archiver):
@ -16,8 +17,9 @@ class TelethonArchiver(Archiver):
def __init__(self, storage: Storage, driver, config: TelethonConfig): def __init__(self, storage: Storage, driver, config: TelethonConfig):
super().__init__(storage, driver) super().__init__(storage, driver)
self.client = TelegramClient("./anon", config.api_id, config.api_hash) if config:
self.bot_token = config.bot_token self.client = TelegramClient("./anon", config.api_id, config.api_hash)
self.bot_token = config.bot_token
def _get_media_posts_in_group(self, chat, original_post, max_amp=10): def _get_media_posts_in_group(self, chat, original_post, max_amp=10):
""" """
@ -26,8 +28,8 @@ class TelethonArchiver(Archiver):
of `max_amp` both ways of `max_amp` both ways
Returns a list of [post] where each post has media and is in the same grouped_id Returns a list of [post] where each post has media and is in the same grouped_id
""" """
if original_post.grouped_id is None: if getattr_or(original_post, "grouped_id") is None:
return [original_post] if original_post.media is not None else [] return [original_post] if getattr_or(original_post, "media") else []
search_ids = [i for i in range(original_post.id - max_amp, original_post.id + max_amp + 1)] search_ids = [i for i in range(original_post.id - max_amp, original_post.id + max_amp + 1)]
posts = self.client.get_messages(chat, ids=search_ids) posts = self.client.get_messages(chat, ids=search_ids)
@ -38,6 +40,10 @@ class TelethonArchiver(Archiver):
return media return media
def download(self, url, check_if_exists=False): def download(self, url, check_if_exists=False):
if not hasattr(self, "client"):
logger.error('Missing Telethon config')
return False
# detect URLs that we definitely cannot handle # detect URLs that we definitely cannot handle
matches = self.link_pattern.findall(url) matches = self.link_pattern.findall(url)
if not len(matches): if not len(matches):
@ -61,12 +67,14 @@ class TelethonArchiver(Archiver):
logger.error(f"Could not fetch telegram {url}. This error can be fixed if you setup a bot_token in addition to api_id and api_hash: {e}") logger.error(f"Could not fetch telegram {url}. This error can be fixed if you setup a bot_token in addition to api_id and api_hash: {e}")
return False return False
if post is None: return False
media_posts = self._get_media_posts_in_group(chat, post) media_posts = self._get_media_posts_in_group(chat, post)
logger.debug(f'got {len(media_posts)=} for {url=}') logger.debug(f'got {len(media_posts)=} for {url=}')
screenshot = self.get_screenshot(url) screenshot = self.get_screenshot(url)
if len(media_posts) > 1: if len(media_posts) > 0:
key = self.get_html_key(url) key = self.get_html_key(url)
if check_if_exists and self.storage.exists(key): if check_if_exists and self.storage.exists(key):
@ -78,7 +86,7 @@ class TelethonArchiver(Archiver):
group_id = post.grouped_id if post.grouped_id is not None else post.id group_id = post.grouped_id if post.grouped_id is not None else post.id
uploaded_media = [] uploaded_media = []
message = post.message message = post.message
for mp in media_posts: for i, mp in enumerate(media_posts):
if len(mp.message) > len(message): message = mp.message if len(mp.message) > len(message): message = mp.message
filename_dest = os.path.join(Storage.TMP_FOLDER, f'{chat}_{group_id}', str(mp.id)) filename_dest = os.path.join(Storage.TMP_FOLDER, f'{chat}_{group_id}', str(mp.id))
filename = self.client.download_media(mp.media, filename_dest) filename = self.client.download_media(mp.media, filename_dest)
@ -87,22 +95,13 @@ class TelethonArchiver(Archiver):
hash = self.get_hash(filename) hash = self.get_hash(filename)
cdn_url = self.storage.get_cdn_url(key) cdn_url = self.storage.get_cdn_url(key)
uploaded_media.append({'cdn_url': cdn_url, 'key': key, 'hash': hash}) uploaded_media.append({'cdn_url': cdn_url, 'key': key, 'hash': hash})
if i == 0:
key_thumb, thumb_index = self.get_thumbnails(filename, key)
os.remove(filename) os.remove(filename)
page_cdn, page_hash, _ = self.generate_media_page_html(url, uploaded_media, html.escape(str(post))) page_cdn, page_hash, _ = self.generate_media_page_html(url, uploaded_media, html.escape(str(post)))
return ArchiveResult(status=status, cdn_url=page_cdn, title=message, timestamp=post.date, hash=page_hash, screenshot=screenshot) return ArchiveResult(status=status, cdn_url=page_cdn, title=message, timestamp=post.date, hash=page_hash, screenshot=screenshot, thumbnail=key_thumb, thumbnail_index=thumb_index)
elif len(media_posts) == 1:
key = self.get_key(f'{chat}_{post_id}')
filename = self.client.download_media(post.media, os.path.join(Storage.TMP_FOLDER, key))
key = filename.split(Storage.TMP_FOLDER)[1].replace(" ", "")
self.storage.upload(filename, key)
hash = self.get_hash(filename)
cdn_url = self.storage.get_cdn_url(key)
key_thumb, thumb_index = self.get_thumbnails(filename, key)
os.remove(filename)
return ArchiveResult(status=status, cdn_url=cdn_url, title=post.message, thumbnail=key_thumb, thumbnail_index=thumb_index, timestamp=post.date, hash=hash, screenshot=screenshot)
page_cdn, page_hash, _ = self.generate_media_page_html(url, [], html.escape(str(post))) page_cdn, page_hash, _ = self.generate_media_page_html(url, [], html.escape(str(post)))
return ArchiveResult(status=status, cdn_url=page_cdn, title=post.message, timestamp=post.date, hash=page_hash, screenshot=screenshot) return ArchiveResult(status=status, cdn_url=page_cdn, title=post.message, timestamp=getattr_or(post, "date"), hash=page_hash, screenshot=screenshot)

Wyświetl plik

@ -0,0 +1,89 @@
import re, json, requests
import vk_api, dateparser
from bs4 import BeautifulSoup
from loguru import logger
from storages import Storage
from .base_archiver import Archiver, ArchiveResult
from configs import VkConfig
class VkArchiver(Archiver):
""""
VK videos are handled by YTDownloader, this archiver gets posts text and images.
Currently only works for /wall posts
"""
name = "vk"
wall_pattern = re.compile(r"(wall.{0,1}\d+_\d+)")
photo_pattern = re.compile(r"(photo.{0,1}\d+_\d+)")
onclick_pattern = re.compile(r"({.*})")
def __init__(self, storage: Storage, driver, config: VkConfig):
super().__init__(storage, driver)
if config != None:
self.vk_session = vk_api.VkApi(config.username, config.password)
self.vk_session.auth(token_only=True)
def download(self, url, check_if_exists=False):
# detect URLs that this archiver can handle
_id, method = None, None
if has_wall := self.wall_pattern.search(url):
_id = has_wall[0]
method = self.archive_wall
elif has_photo := self.photo_pattern.search(url):
_id = has_photo[0]
method = self.archive_photo
else: return False
logger.info(f"found valid {_id=} from {url=}")
proper_url = f'https://vk.com/{_id}'
# if check if exists will not download again
key = self.get_html_key(proper_url)
if check_if_exists and self.storage.exists(key):
screenshot = self.get_screenshot(proper_url)
cdn_url = self.storage.get_cdn_url(key)
return ArchiveResult(status="already archived", cdn_url=cdn_url, screenshot=screenshot)
try:
return method(proper_url, _id)
except Exception as e:
logger.error(f"something went wrong with vk archive, possibly 404 causing index out of range, or missing key: {e}")
return False
def archive_photo(self, photo_url, photo_id):
headers = {"access_token": self.vk_session.token["access_token"], "photos": photo_id.replace("photo", ""), "extended": "1", "v": self.vk_session.api_version}
req = requests.get("https://api.vk.com/method/photos.getById", headers)
res = req.json()["response"][0]
title = res["text"][:200] # more on the page
img_url = res["orig_photo"]["url"]
time = dateparser.parse(str(res["date"]), settings={"RETURN_AS_TIMEZONE_AWARE": True, "TO_TIMEZONE": "UTC"})
page_cdn, page_hash, thumbnail = self.generate_media_page([img_url], photo_url, res)
screenshot = self.get_screenshot(photo_url)
return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, timestamp=time, title=title)
def archive_wall(self, wall_url, wall_id):
headers = {"access_token": self.vk_session.token["access_token"], "posts": wall_id.replace("wall", ""), "extended": "1", "copy_history_depth": "2", "v": self.vk_session.api_version}
req = requests.get("https://api.vk.com/method/wall.getById", headers)
res = req.json()["response"]
wall = res["items"][0]
img_urls = []
if "attachments" in wall:
for a in wall["attachments"]:
attachment = a[a["type"]]
if "thumb" in attachment:
attachment = attachment["thumb"]
if "sizes" in attachment:
try: img_urls.append(attachment["sizes"][-1]["url"])
except Exception as e:
logger.warning(f"could not get image from attachment: {e}")
title = wall["text"][:200] # more on the page
time = dateparser.parse(str(wall["date"]), settings={"RETURN_AS_TIMEZONE_AWARE": True, "TO_TIMEZONE": "UTC"})
page_cdn, page_hash, thumbnail = self.generate_media_page(img_urls, wall_url, res)
screenshot = self.get_screenshot(wall_url)
return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, timestamp=time, title=title)

Wyświetl plik

@ -18,10 +18,12 @@ class WaybackArchiver(Archiver):
def __init__(self, storage: Storage, driver, config: WaybackConfig): def __init__(self, storage: Storage, driver, config: WaybackConfig):
super(WaybackArchiver, self).__init__(storage, driver) super(WaybackArchiver, self).__init__(storage, driver)
self.config = config self.config = config
# TODO: this logic should live at the auto-archiver level
self.seen_urls = {} self.seen_urls = {}
def download(self, url, check_if_exists=False): def download(self, url, check_if_exists=False):
if self.config is None:
logger.error('Missing Wayback config')
return False
if check_if_exists: if check_if_exists:
if url in self.seen_urls: return self.seen_urls[url] if url in self.seen_urls: return self.seen_urls[url]
@ -57,7 +59,7 @@ class WaybackArchiver(Archiver):
retries += 1 retries += 1
if status_r.status_code != 200: if status_r.status_code != 200:
return ArchiveResult(status="Internet archive failed", screenshot=screenshot) return ArchiveResult(status=f"Internet archive failed: check https://web.archive.org/save/status/{job_id}", screenshot=screenshot)
status_json = status_r.json() status_json = status_r.json()
if status_json['status'] != 'success': if status_json['status'] != 'success':

Wyświetl plik

@ -106,11 +106,11 @@ class YoutubeDLArchiver(Archiver):
os.remove(filename) os.remove(filename)
timestamp = datetime.datetime.utcfromtimestamp(info['timestamp']).replace(tzinfo=datetime.timezone.utc).isoformat() \ timestamp = None
if 'timestamp' in info else \ if 'timestamp' in info and info['timestamp'] is not None:
datetime.datetime.strptime(info['upload_date'], '%Y%m%d').replace(tzinfo=datetime.timezone.utc) \ timestamp = datetime.datetime.utcfromtimestamp(info['timestamp']).replace(tzinfo=datetime.timezone.utc).isoformat()
if 'upload_date' in info and info['upload_date'] is not None else \ elif 'upload_date' in info and info['upload_date'] is not None:
None timestamp = datetime.datetime.strptime(info['upload_date'], '%Y%m%d').replace(tzinfo=datetime.timezone.utc)
return ArchiveResult(status=status, cdn_url=cdn_url, thumbnail=key_thumb, thumbnail_index=thumb_index, duration=duration, return ArchiveResult(status=status, cdn_url=cdn_url, thumbnail=key_thumb, thumbnail_index=thumb_index, duration=duration,
title=info['title'] if 'title' in info else None, timestamp=timestamp, hash=hash, screenshot=screenshot) title=info['title'] if 'title' in info else None, timestamp=timestamp, hash=hash, screenshot=screenshot)

Wyświetl plik

@ -3,7 +3,7 @@ import os, datetime, shutil, traceback, random
from loguru import logger from loguru import logger
from slugify import slugify from slugify import slugify
from archivers import TelethonArchiver, TelegramArchiver, TiktokArchiver, YoutubeDLArchiver, TwitterArchiver, WaybackArchiver, ArchiveResult, Archiver from archivers import TelethonArchiver, TelegramArchiver, TiktokArchiver, YoutubeDLArchiver, TwitterArchiver, VkArchiver, WaybackArchiver, ArchiveResult, Archiver
from utils import GWorksheet, mkdir_if_not_exists, expand_url from utils import GWorksheet, mkdir_if_not_exists, expand_url
from configs import Config from configs import Config
from storages import Storage from storages import Storage
@ -95,6 +95,7 @@ def process_sheet(c: Config):
YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie), YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie),
TelegramArchiver(storage, c.webdriver), TelegramArchiver(storage, c.webdriver),
TwitterArchiver(storage, c.webdriver), TwitterArchiver(storage, c.webdriver),
VkArchiver(storage, c.webdriver, c.vk_config),
WaybackArchiver(storage, c.webdriver, c.wayback_config) WaybackArchiver(storage, c.webdriver, c.wayback_config)
] ]

Wyświetl plik

@ -1,29 +1,30 @@
import gspread import shutil
import argparse
import auto_archive import auto_archive
from loguru import logger from loguru import logger
from configs import Config
from storages import Storage
from utils import mkdir_if_not_exists
def main(): def main():
parser = argparse.ArgumentParser( c = Config()
description="Automatically use youtube-dl to download media from a Google Sheet") c.parse()
parser.add_argument("--sheet", action="store", dest="sheet") logger.info(f'Opening document {c.sheet} to look for sheet names to archive')
args = parser.parse_args() gc = c.gsheets_client
sh = gc.open(c.sheet)
logger.info("Opening document " + args.sheet)
gc = gspread.service_account(filename='service_account.json')
sh = gc.open(args.sheet)
wks = sh.get_worksheet(0) wks = sh.get_worksheet(0)
values = wks.get_all_values() values = wks.get_all_values()
mkdir_if_not_exists(Storage.TMP_FOLDER)
for i in range(11, len(values)): for i in range(11, len(values)):
sheet_name = values[i][0] c.sheet = values[i][0]
logger.info(f"Processing {c.sheet}")
auto_archive.process_sheet(c)
c.destroy_webdriver()
shutil.rmtree(Storage.TMP_FOLDER)
logger.info("Processing " + sheet_name)
auto_archive.process_sheet(sheet_name)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

Wyświetl plik

@ -1,4 +1,5 @@
from .config import Config from .config import Config
from .selenium_config import SeleniumConfig from .selenium_config import SeleniumConfig
from .telethon_config import TelethonConfig from .telethon_config import TelethonConfig
from .wayback_config import WaybackConfig from .wayback_config import WaybackConfig
from .vk_config import VkConfig

Wyświetl plik

@ -4,11 +4,13 @@ import gspread
from loguru import logger from loguru import logger
from selenium import webdriver from selenium import webdriver
from dataclasses import asdict from dataclasses import asdict
from selenium.common.exceptions import TimeoutException
from utils import GWorksheet, getattr_or from utils import GWorksheet, getattr_or
from .wayback_config import WaybackConfig from .wayback_config import WaybackConfig
from .telethon_config import TelethonConfig from .telethon_config import TelethonConfig
from .selenium_config import SeleniumConfig from .selenium_config import SeleniumConfig
from .vk_config import VkConfig
from storages import Storage, S3Config, S3Storage, GDStorage, GDConfig, LocalStorage, LocalConfig from storages import Storage, S3Config, S3Storage, GDStorage, GDConfig, LocalStorage, LocalConfig
@ -120,6 +122,7 @@ class Config:
secret=secrets["wayback"]["secret"], secret=secrets["wayback"]["secret"],
) )
else: else:
self.wayback_config = None
logger.debug(f"'wayback' key not present in the {self.config_file=}") logger.debug(f"'wayback' key not present in the {self.config_file=}")
# telethon config # telethon config
@ -130,8 +133,19 @@ class Config:
bot_token=secrets["telegram"].get("bot_token", None) bot_token=secrets["telegram"].get("bot_token", None)
) )
else: else:
self.telegram_config = None
logger.debug(f"'telegram' key not present in the {self.config_file=}") logger.debug(f"'telegram' key not present in the {self.config_file=}")
# vk config
if "vk" in secrets:
self.vk_config = VkConfig(
username=secrets["vk"]["username"],
password=secrets["vk"]["password"]
)
else:
self.vk_config = None
logger.debug(f"'vk' key not present in the {self.config_file=}")
del self.config["secrets"] # delete to prevent leaks del self.config["secrets"] # delete to prevent leaks
def set_log_files(self): def set_log_files(self):
@ -197,16 +211,23 @@ class Config:
def destroy_webdriver(self): def destroy_webdriver(self):
if self.webdriver is not None and type(self.webdriver) != str: if self.webdriver is not None and type(self.webdriver) != str:
self.webdriver.quit() self.webdriver.quit()
del self.webdriver
def recreate_webdriver(self): def recreate_webdriver(self):
self.destroy_webdriver()
options = webdriver.FirefoxOptions() options = webdriver.FirefoxOptions()
options.headless = True options.headless = True
options.set_preference('network.protocol-handler.external.tg', False) options.set_preference('network.protocol-handler.external.tg', False)
self.webdriver = webdriver.Firefox(options=options) try:
self.webdriver.set_window_size(self.selenium_config.window_width, new_webdriver = webdriver.Firefox(options=options)
# only destroy if creation is successful
self.destroy_webdriver()
self.webdriver = new_webdriver
self.webdriver.set_window_size(self.selenium_config.window_width,
self.selenium_config.window_height) self.selenium_config.window_height)
self.webdriver.set_page_load_timeout(self.selenium_config.timeout_seconds) self.webdriver.set_page_load_timeout(self.selenium_config.timeout_seconds)
except TimeoutException as e:
logger.error(f"failed to get new webdriver, possibly due to insufficient system resources or timeout settings: {e}")
def __str__(self) -> str: def __str__(self) -> str:
return json.dumps({ return json.dumps({
@ -225,6 +246,7 @@ class Config:
"local_config": hasattr(self, "local_config"), "local_config": hasattr(self, "local_config"),
"wayback_config": self.wayback_config != None, "wayback_config": self.wayback_config != None,
"telegram_config": self.telegram_config != None, "telegram_config": self.telegram_config != None,
"vk_config": self.vk_config != None,
"gsheets_client": self.gsheets_client != None, "gsheets_client": self.gsheets_client != None,
"column_names": self.column_names, "column_names": self.column_names,
}, ensure_ascii=False, indent=4) }, ensure_ascii=False, indent=4)

Wyświetl plik

@ -0,0 +1,8 @@
from dataclasses import dataclass
@dataclass
class VkConfig:
username: str
password: str

Wyświetl plik

@ -39,6 +39,11 @@ secrets:
# optional, but allows access to more content such as large videos, talk to @botfather # optional, but allows access to more content such as large videos, talk to @botfather
bot_token: your bot-token bot_token: your bot-token
# vkontakte (vk.com) credentials
vk:
username: "phone number or email"
password: "password"
google_sheets: google_sheets:
# local filename: defaults to service_account.json, see https://gspread.readthedocs.io/en/latest/oauth2.html#for-bots-using-service-account # local filename: defaults to service_account.json, see https://gspread.readthedocs.io/en/latest/oauth2.html#for-bots-using-service-account
service_account: "service_account.json" service_account: "service_account.json"