kopia lustrzana https://github.com/bellingcat/auto-archiver
new archiver, new hack, ready
rodzic
b4e9d6a2a8
commit
ffe1c425a0
1
Pipfile
1
Pipfile
|
@ -24,6 +24,7 @@ python-slugify = "*"
|
|||
pyyaml = "*"
|
||||
dateparser = "*"
|
||||
vk-url-scraper = "*"
|
||||
python-twitter-v2 = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.9"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "eacd9633c33d4d526d7737fc6bf83ab713205f28f819530f549378fbd14da3d8"
|
||||
"sha256": "1ed953d08e31d891de0f887e520f12025d109a20718b27dd8f9b361f73c95651"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
|
@ -29,7 +29,7 @@
|
|||
"sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b",
|
||||
"sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"
|
||||
],
|
||||
"markers": "python_version >= '3.5'",
|
||||
"markers": "python_full_version >= '3.5.0'",
|
||||
"version": "==1.10"
|
||||
},
|
||||
"attrs": {
|
||||
|
@ -40,6 +40,13 @@
|
|||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==21.4.0"
|
||||
},
|
||||
"authlib": {
|
||||
"hashes": [
|
||||
"sha256:b83cf6360c8e92b0e9df0d1f32d675790bcc4e3c03977499b1eed24dcdef4252",
|
||||
"sha256:ecf4a7a9f2508c0bb07e93a752dd3c495cfaffc20e864ef0ffc95e3f40d2abaf"
|
||||
],
|
||||
"version": "==0.15.5"
|
||||
},
|
||||
"beautifulsoup4": {
|
||||
"hashes": [
|
||||
"sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30",
|
||||
|
@ -50,19 +57,19 @@
|
|||
},
|
||||
"boto3": {
|
||||
"hashes": [
|
||||
"sha256:13efff22f1cb6d25ec7027edaccdfdd515ba593e093173beb09094cff898a8cc",
|
||||
"sha256:945d49941541a3cbb02710361be64b22f98e68c2e447229f0d51f7c215009e28"
|
||||
"sha256:a547880008f0031834fe0122e91cc064438f54d15b9c34729672c53203a0c740",
|
||||
"sha256:bcbf31eff02bc01f9c55e2d428b4f6a27701c86b4600cbe4e9d45aa1dd61f036"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.24.13"
|
||||
"version": "==1.24.17"
|
||||
},
|
||||
"botocore": {
|
||||
"hashes": [
|
||||
"sha256:df75e53576b061818bbce4bd70221749e40cc91d16a2b6c03fbeec8023665734",
|
||||
"sha256:fbc09558c02d415e8646520f95db7e8d313460938780fa6040b00865f098fd55"
|
||||
"sha256:af9d44592b4d0d6509b355b2ec5cb14fd23eadf7c33d13b880266dede22759ac",
|
||||
"sha256:baf60b803ffd7b1dbc9c93dd2049fe2372699e4c993c9d33713667acdea64d1f"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.27.13"
|
||||
"version": "==1.27.17"
|
||||
},
|
||||
"brotli": {
|
||||
"hashes": [
|
||||
|
@ -215,7 +222,7 @@
|
|||
"sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597",
|
||||
"sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"
|
||||
],
|
||||
"markers": "python_version >= '3.5'",
|
||||
"markers": "python_full_version >= '3.5.0'",
|
||||
"version": "==2.0.12"
|
||||
},
|
||||
"click": {
|
||||
|
@ -265,8 +272,17 @@
|
|||
"sha256:f224ad253cc9cea7568f49077007d2263efa57396a2f2f78114066fd54b5c68e",
|
||||
"sha256:f8ec91983e638a9bcd75b39f1396e5c0dc2330cbd9ce4accefe68717e6779e0a"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==37.0.2"
|
||||
},
|
||||
"dataclasses-json": {
|
||||
"hashes": [
|
||||
"sha256:bc285b5f892094c3a53d558858a88553dd6a61a11ab1a8128a0e554385dcc5dd",
|
||||
"sha256:c2c11bc8214fbf709ffc369d11446ff6945254a7f09128154a7620613d8fda90"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==0.5.7"
|
||||
},
|
||||
"dateparser": {
|
||||
"hashes": [
|
||||
"sha256:038196b1f12c7397e38aad3d61588833257f6f552baa63a1499e6987fa8d42d9",
|
||||
|
@ -348,11 +364,11 @@
|
|||
},
|
||||
"googleapis-common-protos": {
|
||||
"hashes": [
|
||||
"sha256:023eaea9d8c1cceccd9587c6af6c20f33eeeb05d4148670f2b0322dc1511700c",
|
||||
"sha256:b09b56f5463070c2153753ef123f07d2e49235e89148e9b2459ec8ed2f68d7d3"
|
||||
"sha256:6f1369b58ed6cf3a4b7054a44ebe8d03b29c309257583a2bbdc064cd1e4a1442",
|
||||
"sha256:87955d7b3a73e6e803f2572a33179de23989ebba725e05ea42f24838b792e461"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.56.2"
|
||||
"version": "==1.56.3"
|
||||
},
|
||||
"gspread": {
|
||||
"hashes": [
|
||||
|
@ -383,16 +399,16 @@
|
|||
"sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
|
||||
"sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
|
||||
],
|
||||
"markers": "python_version >= '3.5'",
|
||||
"markers": "python_full_version >= '3.5.0'",
|
||||
"version": "==3.3"
|
||||
},
|
||||
"importlib-metadata": {
|
||||
"hashes": [
|
||||
"sha256:5d26852efe48c0a32b0509ffbc583fda1a2266545a78d104a6f4aff3db17d700",
|
||||
"sha256:c58c8eb8a762858f49e18436ff552e83914778e50e9d2f1660535ffb364552ec"
|
||||
"sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670",
|
||||
"sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"
|
||||
],
|
||||
"markers": "python_version < '3.10'",
|
||||
"version": "==4.11.4"
|
||||
"version": "==4.12.0"
|
||||
},
|
||||
"itsdangerous": {
|
||||
"hashes": [
|
||||
|
@ -541,14 +557,36 @@
|
|||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.1.1"
|
||||
},
|
||||
"marshmallow": {
|
||||
"hashes": [
|
||||
"sha256:53a1e0ee69f79e1f3e80d17393b25cfc917eda52f859e8183b4af72c3390c1f1",
|
||||
"sha256:a762c1d8b2bcb0e5c8e964850d03f9f3bffd6a12b626f3c14b9d6b1841999af5"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==3.16.0"
|
||||
},
|
||||
"marshmallow-enum": {
|
||||
"hashes": [
|
||||
"sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58",
|
||||
"sha256:57161ab3dbfde4f57adeb12090f39592e992b9c86d206d02f6bd03ebec60f072"
|
||||
],
|
||||
"version": "==1.5.1"
|
||||
},
|
||||
"mutagen": {
|
||||
"hashes": [
|
||||
"sha256:6397602efb3c2d7baebd2166ed85731ae1c1d475abca22090b7141ff5034b3e1",
|
||||
"sha256:9c9f243fcec7f410f138cb12c21c84c64fde4195481a30c9bfb05b5f003adfed"
|
||||
],
|
||||
"markers": "python_version >= '3.5' and python_version < '4'",
|
||||
"markers": "python_version < '4' and python_full_version >= '3.5.0'",
|
||||
"version": "==1.45.1"
|
||||
},
|
||||
"mypy-extensions": {
|
||||
"hashes": [
|
||||
"sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
|
||||
"sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
|
||||
],
|
||||
"version": "==0.4.3"
|
||||
},
|
||||
"oauth2client": {
|
||||
"hashes": [
|
||||
"sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac",
|
||||
|
@ -573,35 +611,33 @@
|
|||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.2.0"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
|
||||
"sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==21.3"
|
||||
},
|
||||
"protobuf": {
|
||||
"hashes": [
|
||||
"sha256:06059eb6953ff01e56a25cd02cca1a9649a75a7e65397b5b9b4e929ed71d10cf",
|
||||
"sha256:097c5d8a9808302fb0da7e20edf0b8d4703274d140fd25c5edabddcde43e081f",
|
||||
"sha256:284f86a6207c897542d7e956eb243a36bb8f9564c1742b253462386e96c6b78f",
|
||||
"sha256:32ca378605b41fd180dfe4e14d3226386d8d1b002ab31c969c366549e66a2bb7",
|
||||
"sha256:3cc797c9d15d7689ed507b165cd05913acb992d78b379f6014e013f9ecb20996",
|
||||
"sha256:62f1b5c4cd6c5402b4e2d63804ba49a327e0c386c99b1675c8a0fefda23b2067",
|
||||
"sha256:69ccfdf3657ba59569c64295b7d51325f91af586f8d5793b734260dfe2e94e2c",
|
||||
"sha256:6f50601512a3d23625d8a85b1638d914a0970f17920ff39cec63aaef80a93fb7",
|
||||
"sha256:7403941f6d0992d40161aa8bb23e12575637008a5a02283a930addc0508982f9",
|
||||
"sha256:755f3aee41354ae395e104d62119cb223339a8f3276a0cd009ffabfcdd46bb0c",
|
||||
"sha256:77053d28427a29987ca9caf7b72ccafee011257561259faba8dd308fda9a8739",
|
||||
"sha256:7e371f10abe57cee5021797126c93479f59fccc9693dafd6bd5633ab67808a91",
|
||||
"sha256:9016d01c91e8e625141d24ec1b20fed584703e527d28512aa8c8707f105a683c",
|
||||
"sha256:9be73ad47579abc26c12024239d3540e6b765182a91dbc88e23658ab71767153",
|
||||
"sha256:adc31566d027f45efe3f44eeb5b1f329da43891634d61c75a5944e9be6dd42c9",
|
||||
"sha256:adfc6cf69c7f8c50fd24c793964eef18f0ac321315439d94945820612849c388",
|
||||
"sha256:af0ebadc74e281a517141daad9d0f2c5d93ab78e9d455113719a45a49da9db4e",
|
||||
"sha256:cb29edb9eab15742d791e1025dd7b6a8f6fcb53802ad2f6e3adcb102051063ab",
|
||||
"sha256:cd68be2559e2a3b84f517fb029ee611546f7812b1fdd0aa2ecc9bc6ec0e4fdde",
|
||||
"sha256:cdee09140e1cd184ba9324ec1df410e7147242b94b5f8b0c64fc89e38a8ba531",
|
||||
"sha256:db977c4ca738dd9ce508557d4fce0f5aebd105e158c725beec86feb1f6bc20d8",
|
||||
"sha256:dd5789b2948ca702c17027c84c2accb552fc30f4622a98ab5c51fcfe8c50d3e7",
|
||||
"sha256:e250a42f15bf9d5b09fe1b293bdba2801cd520a9f5ea2d7fb7536d4441811d20",
|
||||
"sha256:ff8d8fa42675249bb456f5db06c00de6c2f4c27a065955917b28c4f15978b9c3"
|
||||
"sha256:095fda15fe04a79c9f0edab09b424be46dd057b15986d235b84c8cea91659df7",
|
||||
"sha256:29eaf8e9db33bc3bae14576ad61370aa2b64ea5d6e6cd705042692e5e0404b10",
|
||||
"sha256:4758b9c22ad0486639a68cea58d38571f233019a73212d78476ec648f68a49a3",
|
||||
"sha256:57a593e40257ab4f164fe6e171651b1386c98f8ec5f5a8643642889c50d4f3c4",
|
||||
"sha256:5f8c7488e74024fa12b46aab4258f707d7d6e94c8d322d7c45cc13770f66ab59",
|
||||
"sha256:7b2dcca25d88ec77358eed3d031c8260b5bf3023fff03a31c9584591c5910833",
|
||||
"sha256:853708afc3a7eed4df28a8d4bd4812f829f8d736c104dd8d584ccff27969e311",
|
||||
"sha256:863f65e137d9de4a76cac39ae731a19bea1c30997f512ecf0dc9348112313401",
|
||||
"sha256:9b42afb67e19010cdda057e439574ccd944902ea14b0d52ba0bfba2aad50858d",
|
||||
"sha256:b82ac05b0651a4d2b9d56f5aeef3d711f5858eb4b71c13d77553739e5930a74a",
|
||||
"sha256:d622dc75e289e8b3031dd8b4e87df508f11a6b3d86a49fb50256af7ce030d35b",
|
||||
"sha256:e3d3df3292ab4bae85213b9ebef566b5aedb45f97425a92fac5b2e431d31e71c",
|
||||
"sha256:ef0768a609a02b2b412fa0f59f1242f1597e9bb15188d043f3fde09115ca6c69",
|
||||
"sha256:f2f43ae8dff452aee3026b59ea0a09245ab2529a55a0984992e76bcf848610e1"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==3.20.1"
|
||||
"version": "==4.21.2"
|
||||
},
|
||||
"pyaes": {
|
||||
"hashes": [
|
||||
|
@ -654,36 +690,39 @@
|
|||
},
|
||||
"pycryptodomex": {
|
||||
"hashes": [
|
||||
"sha256:1ca8e1b4c62038bb2da55451385246f51f412c5f5eabd64812c01766a5989b4a",
|
||||
"sha256:298c00ea41a81a491d5b244d295d18369e5aac4b61b77b2de5b249ca61cd6659",
|
||||
"sha256:2aa887683eee493e015545bd69d3d21ac8d5ad582674ec98f4af84511e353e45",
|
||||
"sha256:2ce76ed0081fd6ac8c74edc75b9d14eca2064173af79843c24fa62573263c1f2",
|
||||
"sha256:3da13c2535b7aea94cc2a6d1b1b37746814c74b6e80790daddd55ca5c120a489",
|
||||
"sha256:406ec8cfe0c098fadb18d597dc2ee6de4428d640c0ccafa453f3d9b2e58d29e2",
|
||||
"sha256:4d0db8df9ffae36f416897ad184608d9d7a8c2b46c4612c6bc759b26c073f750",
|
||||
"sha256:530756d2faa40af4c1f74123e1d889bd07feae45bac2fd32f259a35f7aa74151",
|
||||
"sha256:77931df40bb5ce5e13f4de2bfc982b2ddc0198971fbd947776c8bb5050896eb2",
|
||||
"sha256:797a36bd1f69df9e2798e33edb4bd04e5a30478efc08f9428c087f17f65a7045",
|
||||
"sha256:8085bd0ad2034352eee4d4f3e2da985c2749cb7344b939f4d95ead38c2520859",
|
||||
"sha256:8536bc08d130cae6dcba1ea689f2913dfd332d06113904d171f2f56da6228e89",
|
||||
"sha256:a4d412eba5679ede84b41dbe48b1bed8f33131ab9db06c238a235334733acc5e",
|
||||
"sha256:aebecde2adc4a6847094d3bd6a8a9538ef3438a5ea84ac1983fcb167db614461",
|
||||
"sha256:b276cc4deb4a80f9dfd47a41ebb464b1fe91efd8b1b8620cf5ccf8b824b850d6",
|
||||
"sha256:b5a185ae79f899b01ca49f365bdf15a45d78d9856f09b0de1a41b92afce1a07f",
|
||||
"sha256:c4d8977ccda886d88dc3ca789de2f1adc714df912ff3934b3d0a3f3d777deafb",
|
||||
"sha256:c5dd3ffa663c982d7f1be9eb494a8924f6d40e2e2f7d1d27384cfab1b2ac0662",
|
||||
"sha256:ca88f2f7020002638276439a01ffbb0355634907d1aa5ca91f3dc0c2e44e8f3b",
|
||||
"sha256:d2cce1c82a7845d7e2e8a0956c6b7ed3f1661c9acf18eb120fc71e098ab5c6fe",
|
||||
"sha256:d709572d64825d8d59ea112e11cc7faf6007f294e9951324b7574af4251e4de8",
|
||||
"sha256:da8db8374295fb532b4b0c467e66800ef17d100e4d5faa2bbbd6df35502da125",
|
||||
"sha256:e36c7e3b5382cd5669cf199c4a04a0279a43b2a3bdd77627e9b89778ac9ec08c",
|
||||
"sha256:e95a4a6c54d27a84a4624d2af8bb9ee178111604653194ca6880c98dcad92f48",
|
||||
"sha256:ee835def05622e0c8b1435a906491760a43d0c462f065ec9143ec4b8d79f8bff",
|
||||
"sha256:f75009715dcf4a3d680c2338ab19dac5498f8121173a929872950f4fb3a48fbf",
|
||||
"sha256:f8524b8bc89470cec7ac51734907818d3620fb1637f8f8b542d650ebec42a126"
|
||||
"sha256:04cc393045a8f19dd110c975e30f38ed7ab3faf21ede415ea67afebd95a22380",
|
||||
"sha256:0776bfaf2c48154ab54ea45392847c1283d2fcf64e232e85565f858baedfc1fa",
|
||||
"sha256:0fadb9f7fa3150577800eef35f62a8a24b9ddf1563ff060d9bd3af22d3952c8c",
|
||||
"sha256:18e2ab4813883ae63396c0ffe50b13554b32bb69ec56f0afaf052e7a7ae0d55b",
|
||||
"sha256:191e73bc84a8064ad1874dba0ebadedd7cce4dedee998549518f2c74a003b2e1",
|
||||
"sha256:35a8f7afe1867118330e2e0e0bf759c409e28557fb1fc2fbb1c6c937297dbe9a",
|
||||
"sha256:3709f13ca3852b0b07fc04a2c03b379189232b24007c466be0f605dd4723e9d4",
|
||||
"sha256:4540904c09704b6f831059c0dfb38584acb82cb97b0125cd52688c1f1e3fffa6",
|
||||
"sha256:463119d7d22d0fc04a0f9122e9d3e6121c6648bcb12a052b51bd1eed1b996aa2",
|
||||
"sha256:46b3f05f2f7ac7841053da4e0f69616929ca3c42f238c405f6c3df7759ad2780",
|
||||
"sha256:48697790203909fab02a33226fda546604f4e2653f9d47bc5d3eb40879fa7c64",
|
||||
"sha256:5676a132169a1c1a3712edf25250722ebc8c9102aa9abd814df063ca8362454f",
|
||||
"sha256:65204412d0c6a8e3c41e21e93a5e6054a74fea501afa03046a388cf042e3377a",
|
||||
"sha256:67e1e6a92151023ccdfcfbc0afb3314ad30080793b4c27956ea06ab1fb9bcd8a",
|
||||
"sha256:6f5b6ba8aefd624834bc177a2ac292734996bb030f9d1b388e7504103b6fcddf",
|
||||
"sha256:7341f1bb2dadb0d1a0047f34c3a58208a92423cdbd3244d998e4b28df5eac0ed",
|
||||
"sha256:78d9621cf0ea35abf2d38fa2ca6d0634eab6c991a78373498ab149953787e5e5",
|
||||
"sha256:8eecdf9cdc7343001d047f951b9cc805cd68cb6cd77b20ea46af5bffc5bd3dfb",
|
||||
"sha256:94c7b60e1f52e1a87715571327baea0733708ab4723346598beca4a3b6879794",
|
||||
"sha256:996e1ba717077ce1e6d4849af7a1426f38b07b3d173b879e27d5e26d2e958beb",
|
||||
"sha256:a07a64709e366c2041cd5cfbca592b43998bf4df88f7b0ca73dca37071ccf1bd",
|
||||
"sha256:b6306403228edde6e289f626a3908a2f7f67c344e712cf7c0a508bab3ad9e381",
|
||||
"sha256:b9279adc16e4b0f590ceff581f53a80179b02cba9056010d733eb4196134a870",
|
||||
"sha256:c4cb9cb492ea7dcdf222a8d19a1d09002798ea516aeae8877245206d27326d86",
|
||||
"sha256:dd452a5af7014e866206d41751886c9b4bf379a339fdf2dbfc7dd16c0fb4f8e0",
|
||||
"sha256:e2b12968522a0358b8917fc7b28865acac002f02f4c4c6020fcb264d76bfd06d",
|
||||
"sha256:e3164a18348bd53c69b4435ebfb4ac8a4076291ffa2a70b54f0c4b80c7834b1d",
|
||||
"sha256:e47bf8776a7e15576887f04314f5228c6527b99946e6638cf2f16da56d260cab",
|
||||
"sha256:f8be976cec59b11f011f790b88aca67b4ea2bd286578d0bd3e31bcd19afcd3e4",
|
||||
"sha256:fc9bc7a9b79fe5c750fc81a307052f8daabb709bdaabb0fb18fb136b66b653b5"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==3.14.1"
|
||||
"version": "==3.15.0"
|
||||
},
|
||||
"pygments": {
|
||||
"hashes": [
|
||||
|
@ -729,7 +768,7 @@
|
|||
"sha256:b7e3b04a59693c42c36f9ab1cc2acc46fa5df8c78e178fc33a8d4cd05c8d498f",
|
||||
"sha256:d92a187be61fe482e4fd675b6d52200e7be63a12b724abbf931a40ce4fa92938"
|
||||
],
|
||||
"markers": "python_version >= '3.5'",
|
||||
"markers": "python_full_version >= '3.5.0'",
|
||||
"version": "==0.20.0"
|
||||
},
|
||||
"python-slugify": {
|
||||
|
@ -740,6 +779,14 @@
|
|||
"index": "pypi",
|
||||
"version": "==6.1.2"
|
||||
},
|
||||
"python-twitter-v2": {
|
||||
"hashes": [
|
||||
"sha256:0b6ab9abff4bc447ece4a2cc2439bc8776d306a3415a73d89013436e9a77894d",
|
||||
"sha256:f9fabdb2b34e7c49e9014e3acfd52ae5199248e8948567033fb4b73b927cfb0d"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.7.7"
|
||||
},
|
||||
"pytz": {
|
||||
"hashes": [
|
||||
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
|
||||
|
@ -923,10 +970,10 @@
|
|||
},
|
||||
"selenium": {
|
||||
"hashes": [
|
||||
"sha256:ba5b2633f43cf6fe9d308fa4a6996e00a101ab9cb1aad6fd91ae1f3dbe57f56f"
|
||||
"sha256:f67402b8f973aaa98d9c55b8f9aa63532009cd1859b2222a8b9800354942d8bc"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.2.0"
|
||||
"version": "==4.3.0"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
|
@ -941,7 +988,7 @@
|
|||
"sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663",
|
||||
"sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"
|
||||
],
|
||||
"markers": "python_version >= '3.5'",
|
||||
"markers": "python_full_version >= '3.5.0'",
|
||||
"version": "==1.2.0"
|
||||
},
|
||||
"snscrape": {
|
||||
|
@ -1010,9 +1057,25 @@
|
|||
"sha256:5b558f6e83cc20a37c3b61202476c5295d1addf57bd65543364e0337e37ed2bc",
|
||||
"sha256:a3d34de8fac26023eee701ed1e7bf4da9a8326b61a62934ec9e53b64970fd8fe"
|
||||
],
|
||||
"markers": "python_version >= '3.5'",
|
||||
"markers": "python_full_version >= '3.5.0'",
|
||||
"version": "==0.9.2"
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708",
|
||||
"sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.2.0"
|
||||
},
|
||||
"typing-inspect": {
|
||||
"hashes": [
|
||||
"sha256:047d4097d9b17f46531bf6f014356111a1b6fb821a24fe7ac909853ca2a782aa",
|
||||
"sha256:3cd7d4563e997719a710a3bfe7ffb544c6b72069b6812a02e9b414a8fa3aaa6b",
|
||||
"sha256:b1f56c0783ef0f25fb064a01be6e5407e54cf4a4bf4f3ba3fe51e0bd6dcea9e5"
|
||||
],
|
||||
"version": "==0.7.1"
|
||||
},
|
||||
"tzdata": {
|
||||
"hashes": [
|
||||
"sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9",
|
||||
|
@ -1038,7 +1101,10 @@
|
|||
"version": "==4.1.1"
|
||||
},
|
||||
"urllib3": {
|
||||
"extras": [],
|
||||
"extras": [
|
||||
"secure",
|
||||
"socks"
|
||||
],
|
||||
"hashes": [
|
||||
"sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
|
||||
"sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
|
||||
|
@ -1055,11 +1121,11 @@
|
|||
},
|
||||
"vk-url-scraper": {
|
||||
"hashes": [
|
||||
"sha256:181c8a4b69e395a68bdf00e3dc1717e5218960c9fda6e90eea9633ff26fc9257",
|
||||
"sha256:9cfc6bc3d7259f18508c3822955efac21ff9bad5bd886010b10f098ea10ad551"
|
||||
"sha256:7caf8d788fc268d311b13c06ff0cbd9413dd8978f463af970459b9e7e2f42ba5",
|
||||
"sha256:c4593d86b5096e75e2845e4838f46ce2cf0ac34b2fe1c4476d2eeb6744b18a11"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.3.2"
|
||||
"version": "==0.3.5"
|
||||
},
|
||||
"websockets": {
|
||||
"hashes": [
|
||||
|
|
|
@ -160,6 +160,7 @@ graph TD
|
|||
A -->|parent of| F(TwitterArchiver)
|
||||
A -->|parent of| G(VkArchiver)
|
||||
A -->|parent of| H(WaybackArchiver)
|
||||
F -->|parent of| I(TwitterApiArchiver)
|
||||
```
|
||||
### Current Storages
|
||||
```mermaid
|
||||
|
|
|
@ -6,4 +6,5 @@ from .tiktok_archiver import TiktokArchiver
|
|||
from .wayback_archiver import WaybackArchiver
|
||||
from .youtubedl_archiver import YoutubeDLArchiver
|
||||
from .twitter_archiver import TwitterArchiver
|
||||
from .vk_archiver import VkArchiver
|
||||
from .vk_archiver import VkArchiver
|
||||
from .twitter_api_archiver import TwitterApiArchiver
|
|
@ -149,9 +149,13 @@ class Archiver(ABC):
|
|||
if a string is passed in @with_extension the slug is appended with it if there is no "." in the slug
|
||||
if @append_date is true, the key adds a timestamp after the URL slug and before the extension
|
||||
"""
|
||||
slug = slugify(urlparse(url).path)
|
||||
url_path = urlparse(url).path
|
||||
path, ext = os.path.splitext(url_path)
|
||||
slug = slugify(path)
|
||||
if append_datetime:
|
||||
slug += "-" + slugify(datetime.datetime.utcnow().isoformat())
|
||||
if len(ext):
|
||||
slug += ext
|
||||
if with_extension is not None:
|
||||
if "." not in slug:
|
||||
slug += with_extension
|
||||
|
|
|
@ -41,7 +41,7 @@ class TelethonArchiver(Archiver):
|
|||
|
||||
def download(self, url, check_if_exists=False):
|
||||
if not hasattr(self, "client"):
|
||||
logger.error('Missing Telethon config')
|
||||
logger.warning('Missing Telethon config')
|
||||
return False
|
||||
|
||||
# detect URLs that we definitely cannot handle
|
||||
|
@ -80,7 +80,6 @@ class TelethonArchiver(Archiver):
|
|||
if check_if_exists and self.storage.exists(key):
|
||||
# only s3 storage supports storage.exists as not implemented on gd
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
status = 'already archived'
|
||||
return ArchiveResult(status='already archived', cdn_url=cdn_url, title=post.message, timestamp=post.date, screenshot=screenshot)
|
||||
|
||||
group_id = post.grouped_id if post.grouped_id is not None else post.id
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from pytwitter import Api
|
||||
|
||||
from storages.base_storage import Storage
|
||||
from configs import TwitterApiConfig
|
||||
from .base_archiver import ArchiveResult
|
||||
from .twitter_archiver import TwitterArchiver
|
||||
|
||||
|
||||
class TwitterApiArchiver(TwitterArchiver):
|
||||
name = "twitter_api"
|
||||
|
||||
def __init__(self, storage: Storage, driver, config: TwitterApiConfig):
|
||||
super().__init__(storage, driver)
|
||||
|
||||
if config.bearer_token:
|
||||
self.api = Api(bearer_token=config.bearer_token)
|
||||
elif config.consumer_key and config.consumer_secret and config.access_token and config.access_secret:
|
||||
self.api = Api(
|
||||
consumer_key=config.consumer_key, consumer_secret=config.consumer_secret, access_token=config.access_token, access_secret=config.access_secret)
|
||||
|
||||
def download(self, url, check_if_exists=False):
|
||||
if not hasattr(self, "api"):
|
||||
logger.warning('Missing Twitter API config')
|
||||
return False
|
||||
|
||||
username, tweet_id = self.get_username_tweet_id(url)
|
||||
if not username: return False
|
||||
|
||||
tweet = self.api.get_tweet(tweet_id, expansions=["attachments.media_keys"], media_fields=["type", "duration_ms", "url", "variants"], tweet_fields=["attachments", "author_id", "created_at", "entities", "id", "text", "possibly_sensitive"])
|
||||
timestamp = datetime.strptime(tweet.data.created_at, "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
|
||||
# check if exists
|
||||
key = self.get_html_key(url)
|
||||
if check_if_exists and self.storage.exists(key):
|
||||
# only s3 storage supports storage.exists as not implemented on gd
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
screenshot = self.get_screenshot(url)
|
||||
return ArchiveResult(status='already archived', cdn_url=cdn_url, title=tweet.data.text, timestamp=timestamp, screenshot=screenshot)
|
||||
|
||||
urls = []
|
||||
if tweet.includes:
|
||||
for m in tweet.includes.media:
|
||||
if m.url:
|
||||
urls.append(m.url)
|
||||
elif hasattr(m, "variants"):
|
||||
var_url = self.choose_variant(m.variants)
|
||||
urls.append(var_url)
|
||||
else:
|
||||
urls.append(None) # will trigger error
|
||||
|
||||
for u in urls:
|
||||
if u is None:
|
||||
logger.error(f"Should not have gotten None url for {tweet.includes.media=}")
|
||||
return self.download_alternative(url, tweet_id)
|
||||
logger.debug(f"found {urls=}")
|
||||
|
||||
output = json.dumps({
|
||||
"id": tweet.data.id,
|
||||
"text": tweet.data.text,
|
||||
"created_at": tweet.data.created_at,
|
||||
"author_id": tweet.data.author_id,
|
||||
"geo": tweet.data.geo,
|
||||
"lang": tweet.data.lang,
|
||||
"media": urls
|
||||
}, ensure_ascii=False, indent=4)
|
||||
|
||||
screenshot = self.get_screenshot(url)
|
||||
page_cdn, page_hash, thumbnail = self.generate_media_page(urls, url, output)
|
||||
return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, timestamp=timestamp, title=tweet.data.text)
|
|
@ -1,6 +1,5 @@
|
|||
|
||||
import html
|
||||
from urllib.parse import urlparse
|
||||
import html, re, requests
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from snscrape.modules.twitter import TwitterTweetScraper, Video, Gif, Photo
|
||||
|
||||
|
@ -9,20 +8,21 @@ from .base_archiver import Archiver, ArchiveResult
|
|||
|
||||
class TwitterArchiver(Archiver):
|
||||
name = "twitter"
|
||||
link_pattern = re.compile(r"twitter.com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")
|
||||
|
||||
def get_username_tweet_id(self, url):
|
||||
# detect URLs that we definitely cannot handle
|
||||
matches = self.link_pattern.findall(url)
|
||||
if not len(matches): return False, False
|
||||
|
||||
username, tweet_id = matches[0] # only one URL supported
|
||||
logger.debug(f"Found {username=} and {tweet_id=} in {url=}")
|
||||
|
||||
return username, tweet_id
|
||||
|
||||
def download(self, url, check_if_exists=False):
|
||||
|
||||
if 'twitter.com' != self.get_netloc(url):
|
||||
logger.debug(f'{url=} is not from twitter')
|
||||
return False
|
||||
|
||||
tweet_id = urlparse(url).path.split('/')
|
||||
if 'status' in tweet_id:
|
||||
i = tweet_id.index('status')
|
||||
tweet_id = tweet_id[i + 1]
|
||||
else:
|
||||
logger.debug(f'{url=} does not contain "status"')
|
||||
return False
|
||||
username, tweet_id = self.get_username_tweet_id(url)
|
||||
if not username: return False
|
||||
|
||||
scr = TwitterTweetScraper(tweet_id)
|
||||
|
||||
|
@ -30,7 +30,7 @@ class TwitterArchiver(Archiver):
|
|||
tweet = next(scr.get_items())
|
||||
except Exception as ex:
|
||||
logger.warning(f"can't get tweet: {type(ex).__name__} occurred. args: {ex.args}")
|
||||
return False
|
||||
return self.download_alternative(url, tweet_id)
|
||||
|
||||
if tweet.media is None:
|
||||
logger.debug(f'No media found, archiving tweet text only')
|
||||
|
@ -57,3 +57,40 @@ class TwitterArchiver(Archiver):
|
|||
screenshot = self.get_screenshot(url)
|
||||
|
||||
return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, timestamp=tweet.date, title=tweet.content)
|
||||
|
||||
def download_alternative(self, url, tweet_id):
|
||||
logger.debug(f"Trying twitter hack for {url=}")
|
||||
hack_url = f"https://cdn.syndication.twimg.com/tweet?id={tweet_id}"
|
||||
r = requests.get(hack_url)
|
||||
if r.status_code != 200: return False
|
||||
tweet = r.json()
|
||||
|
||||
urls = []
|
||||
for p in tweet["photos"]:
|
||||
urls.append(p["url"])
|
||||
|
||||
# 1 tweet has 1 video max
|
||||
v = tweet["video"]
|
||||
urls.append(self.choose_variant(v.get("variants", [])))
|
||||
|
||||
logger.debug(f"Twitter hack got {urls=}")
|
||||
|
||||
timestamp = datetime.strptime(tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
screenshot = self.get_screenshot(url)
|
||||
page_cdn, page_hash, thumbnail = self.generate_media_page(urls, url, r.text)
|
||||
return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, timestamp=timestamp, title=tweet["text"])
|
||||
|
||||
def choose_variant(self, variants):
|
||||
# choosing the highest quality possible
|
||||
variant, width, height = None, 0, 0
|
||||
for var in variants:
|
||||
if var["type"] == "video/mp4":
|
||||
width_height = re.search(r"\/(\d+)x(\d+)\/", var["src"])
|
||||
if width_height:
|
||||
w, h = int(width_height[1]), int(width_height[2])
|
||||
if w > width or h > height:
|
||||
width, height = w, h
|
||||
variant = var.get("src", variant)
|
||||
else:
|
||||
variant = var.get("src") if not variant else variant
|
||||
return variant
|
||||
|
|
|
@ -3,7 +3,7 @@ import os, datetime, traceback, random, tempfile
|
|||
from loguru import logger
|
||||
from slugify import slugify
|
||||
|
||||
from archivers import TelethonArchiver, TelegramArchiver, TiktokArchiver, YoutubeDLArchiver, TwitterArchiver, VkArchiver, WaybackArchiver, ArchiveResult, Archiver
|
||||
from archivers import TelethonArchiver, TelegramArchiver, TiktokArchiver, YoutubeDLArchiver, TwitterArchiver, TwitterApiArchiver, VkArchiver, WaybackArchiver, ArchiveResult, Archiver
|
||||
from utils import GWorksheet, mkdir_if_not_exists, expand_url
|
||||
from configs import Config
|
||||
from storages import Storage
|
||||
|
@ -92,6 +92,7 @@ def process_sheet(c: Config):
|
|||
active_archivers = [
|
||||
TelethonArchiver(storage, c.webdriver, c.telegram_config),
|
||||
TiktokArchiver(storage, c.webdriver),
|
||||
TwitterApiArchiver(storage, c.webdriver, c.twitter_config),
|
||||
YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie),
|
||||
TelegramArchiver(storage, c.webdriver),
|
||||
TwitterArchiver(storage, c.webdriver),
|
||||
|
|
|
@ -2,4 +2,5 @@ from .config import Config
|
|||
from .selenium_config import SeleniumConfig
|
||||
from .telethon_config import TelethonConfig
|
||||
from .wayback_config import WaybackConfig
|
||||
from .twitter_api_config import TwitterApiConfig
|
||||
from .vk_config import VkConfig
|
|
@ -11,7 +11,8 @@ from .wayback_config import WaybackConfig
|
|||
from .telethon_config import TelethonConfig
|
||||
from .selenium_config import SeleniumConfig
|
||||
from .vk_config import VkConfig
|
||||
from storages import Storage, S3Config, S3Storage, GDStorage, GDConfig, LocalStorage, LocalConfig
|
||||
from .twitter_api_config import TwitterApiConfig
|
||||
from storages import S3Config, S3Storage, GDStorage, GDConfig, LocalStorage, LocalConfig
|
||||
|
||||
|
||||
class Config:
|
||||
|
@ -135,6 +136,19 @@ class Config:
|
|||
self.telegram_config = None
|
||||
logger.debug(f"'telegram' key not present in the {self.config_file=}")
|
||||
|
||||
# twitter config
|
||||
if "twitter" in secrets:
|
||||
self.twitter_config = TwitterApiConfig(
|
||||
bearer_token=secrets["twitter"].get("bearer_token"),
|
||||
consumer_key=secrets["twitter"].get("consumer_key"),
|
||||
consumer_secret=secrets["twitter"].get("consumer_secret"),
|
||||
access_token=secrets["twitter"].get("access_token"),
|
||||
access_secret=secrets["twitter"].get("access_secret"),
|
||||
)
|
||||
else:
|
||||
self.twitter_config = None
|
||||
logger.debug(f"'twitter' key not present in the {self.config_file=}")
|
||||
|
||||
# vk config
|
||||
if "vk" in secrets:
|
||||
self.vk_config = VkConfig(
|
||||
|
@ -223,12 +237,11 @@ class Config:
|
|||
self.destroy_webdriver()
|
||||
self.webdriver = new_webdriver
|
||||
self.webdriver.set_window_size(self.selenium_config.window_width,
|
||||
self.selenium_config.window_height)
|
||||
self.selenium_config.window_height)
|
||||
self.webdriver.set_page_load_timeout(self.selenium_config.timeout_seconds)
|
||||
except TimeoutException as e:
|
||||
logger.error(f"failed to get new webdriver, possibly due to insufficient system resources or timeout settings: {e}")
|
||||
|
||||
|
||||
def __str__(self) -> str:
|
||||
return json.dumps({
|
||||
"config_file": self.config_file,
|
||||
|
@ -245,6 +258,7 @@ class Config:
|
|||
"local_config": hasattr(self, "local_config"),
|
||||
"wayback_config": self.wayback_config != None,
|
||||
"telegram_config": self.telegram_config != None,
|
||||
"twitter_config": self.twitter_config != None,
|
||||
"vk_config": self.vk_config != None,
|
||||
"gsheets_client": self.gsheets_client != None,
|
||||
"column_names": self.column_names,
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TwitterApiConfig:
|
||||
bearer_token: str
|
||||
consumer_key: str
|
||||
consumer_secret: str
|
||||
access_token: str
|
||||
access_secret: str
|
|
@ -8,7 +8,7 @@ secrets:
|
|||
key: "s3 API key"
|
||||
secret: "s3 API secret"
|
||||
# use region format like such
|
||||
endpoint_url: 'https://{region}.digitaloceanspaces.com'
|
||||
endpoint_url: "https://{region}.digitaloceanspaces.com"
|
||||
#use bucket, region, and key (key is the archived file path generated when executing) format like such as:
|
||||
cdn_url: "https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}"
|
||||
# if private:true S3 urls will not be readable online
|
||||
|
@ -24,7 +24,7 @@ secrets:
|
|||
|
||||
# needed if you use storage=local
|
||||
local:
|
||||
# local path to save files in
|
||||
# local path to save files in
|
||||
save_to: "./local_archive"
|
||||
|
||||
wayback:
|
||||
|
@ -34,11 +34,20 @@ secrets:
|
|||
|
||||
telegram:
|
||||
# to get credentials see: https://telegra.ph/How-to-get-Telegram-APP-ID--API-HASH-05-27
|
||||
api_id: your API key, see
|
||||
api_id: your API key, see
|
||||
api_hash: your API hash
|
||||
# optional, but allows access to more content such as large videos, talk to @botfather
|
||||
bot_token: your bot-token
|
||||
|
||||
twitter:
|
||||
# twitter configuration - API V2 only - either bearer_token only
|
||||
bearer_token: ""
|
||||
# OR all of the below
|
||||
consumer_key: ""
|
||||
consumer_secret: ""
|
||||
access_token: ""
|
||||
access_secret: ""
|
||||
|
||||
# vkontakte (vk.com) credentials
|
||||
vk:
|
||||
username: "phone number or email"
|
||||
|
@ -49,7 +58,7 @@ secrets:
|
|||
service_account: "service_account.json"
|
||||
|
||||
facebook:
|
||||
# optional facebook cookie to have more access to content, from browser, looks like 'cookie: datr= xxxx'
|
||||
# optional facebook cookie to have more access to content, from browser, looks like 'cookie: datr= xxxx'
|
||||
cookie: ""
|
||||
execution:
|
||||
# can be overwritten with CMD --sheet=
|
||||
|
|
Ładowanie…
Reference in New Issue