kopia lustrzana https://github.com/bellingcat/auto-archiver
working
rodzic
d66ec70ca5
commit
0f4bff23d7
|
|
@ -42,27 +42,27 @@
|
|||
},
|
||||
"beautifulsoup4": {
|
||||
"hashes": [
|
||||
"sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf",
|
||||
"sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891"
|
||||
"sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30",
|
||||
"sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.10.0"
|
||||
"version": "==4.11.1"
|
||||
},
|
||||
"boto3": {
|
||||
"hashes": [
|
||||
"sha256:9d8ddfefe0c4a993423e2c40831034c78fcb7b3425bf3610cf0087301dd9098b",
|
||||
"sha256:c06b9b29f80da8cf6d9fac8f41d74a74d0f5347927acf11b15428b295fcbdd31"
|
||||
"sha256:895fb88c69be78f82cfee58a79c97a3ad8d4a2a1209041a411d7d6b9fc5393e4",
|
||||
"sha256:bcb541175a7d190dd919a0af0e807ee6e9d26f135551e741b10d94343f2d7588"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.21.33"
|
||||
"version": "==1.21.42"
|
||||
},
|
||||
"botocore": {
|
||||
"hashes": [
|
||||
"sha256:16ca4a2b72fef8caaf0eeb423dbf6cd64938442c4b9f96deb672468229e5e3f9",
|
||||
"sha256:ea5fd180082030a6c33fa19bf011d72970f3ed23cfff1b41413069e325768103"
|
||||
"sha256:14aee41c8bf59d2dd2d89e8751fa37d3c95dcb92707d1966aa02697e914c1417",
|
||||
"sha256:a2baa9484bbaee96ef312c049b8e360badcab58329e487b57567644a571b5f4a"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.24.33"
|
||||
"version": "==1.24.42"
|
||||
},
|
||||
"brotli": {
|
||||
"hashes": [
|
||||
|
|
@ -298,11 +298,11 @@
|
|||
},
|
||||
"google-auth": {
|
||||
"hashes": [
|
||||
"sha256:3ba4d63cb29c1e6d5ffcc1c0623c03cf02ede6240a072f213084749574e691ab",
|
||||
"sha256:60d449f8142c742db760f4c0be39121bc8d9be855555d784c252deaca1ced3f5"
|
||||
"sha256:04e224f241c0566477bb35a8a93be8c635210de743bde454d49393cfb605266d",
|
||||
"sha256:9a88ee548f6fd49467e2e443dfbfe10344e5a270629a137a3a0b3437ec6b02a6"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
|
||||
"version": "==2.6.2"
|
||||
"version": "==2.6.5"
|
||||
},
|
||||
"google-auth-oauthlib": {
|
||||
"hashes": [
|
||||
|
|
@ -314,11 +314,11 @@
|
|||
},
|
||||
"gspread": {
|
||||
"hashes": [
|
||||
"sha256:a347197628fa1885dcc860701fb1b3f5471386aa863a71cfe232b6473c6fea1b",
|
||||
"sha256:be2220e19723570ed98e8b8eb6a5b6e04afa0f08ec1f08b89e217c354488a047"
|
||||
"sha256:319766d90db05056293f7ee0ad2b35503a1a40683a75897a2922398cd2016283",
|
||||
"sha256:c719e1c024a2a6f3b7d818fbe07c3886b26fd6504b64d1b1359cf242968213cd"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==5.3.0"
|
||||
"version": "==5.3.2"
|
||||
},
|
||||
"h11": {
|
||||
"hashes": [
|
||||
|
|
@ -604,11 +604,11 @@
|
|||
},
|
||||
"pyparsing": {
|
||||
"hashes": [
|
||||
"sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea",
|
||||
"sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"
|
||||
"sha256:7bf433498c016c4314268d95df76c81b842a4cb2b276fa3312cfb1e1d85f6954",
|
||||
"sha256:ef7b523f6356f763771559412c0d7134753f037822dad1b16945b7b846f7ad06"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.0.7"
|
||||
"markers": "python_full_version >= '3.6.8'",
|
||||
"version": "==3.0.8"
|
||||
},
|
||||
"pysocks": {
|
||||
"hashes": [
|
||||
|
|
@ -713,11 +713,11 @@
|
|||
},
|
||||
"soupsieve": {
|
||||
"hashes": [
|
||||
"sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb",
|
||||
"sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"
|
||||
"sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759",
|
||||
"sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2.3.1"
|
||||
"version": "==2.3.2.post1"
|
||||
},
|
||||
"telethon": {
|
||||
"hashes": [
|
||||
|
|
@ -748,7 +748,6 @@
|
|||
"version": "==0.9.2"
|
||||
},
|
||||
"urllib3": {
|
||||
"extras": [],
|
||||
"hashes": [
|
||||
"sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
|
||||
"sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
|
||||
|
|
@ -758,57 +757,57 @@
|
|||
},
|
||||
"websockets": {
|
||||
"hashes": [
|
||||
"sha256:038afef2a05893578d10dadbdbb5f112bd115c46347e1efe99f6a356ff062138",
|
||||
"sha256:05f6e9757017270e7a92a2975e2ae88a9a582ffc4629086fd6039aa80e99cd86",
|
||||
"sha256:0b66421f9f13d4df60cd48ab977ed2c2b6c9147ae1a33caf5a9f46294422fda1",
|
||||
"sha256:0cd02f36d37e503aca88ab23cc0a1a0e92a263d37acf6331521eb38040dcf77b",
|
||||
"sha256:0f73cb2526d6da268e86977b2c4b58f2195994e53070fe567d5487c6436047e6",
|
||||
"sha256:117383d0a17a0dda349f7a8790763dde75c1508ff8e4d6e8328b898b7df48397",
|
||||
"sha256:1c1f3b18c8162e3b09761d0c6a0305fd642934202541cc511ef972cb9463261e",
|
||||
"sha256:1c9031e90ebfc486e9cdad532b94004ade3aa39a31d3c46c105bb0b579cd2490",
|
||||
"sha256:2349fa81b6b959484bb2bda556ccb9eb70ba68987646a0f8a537a1a18319fb03",
|
||||
"sha256:24b879ba7db12bb525d4e58089fcbe6a3df3ce4666523183654170e86d372cbe",
|
||||
"sha256:2aa9b91347ecd0412683f28aabe27f6bad502d89bd363b76e0a3508b1596402e",
|
||||
"sha256:56d48eebe9e39ce0d68701bce3b21df923aa05dcc00f9fd8300de1df31a7c07c",
|
||||
"sha256:5a38a0175ae82e4a8c4bac29fc01b9ee26d7d5a614e5ee11e7813c68a7d938ce",
|
||||
"sha256:5b04270b5613f245ec84bb2c6a482a9d009aefad37c0575f6cda8499125d5d5c",
|
||||
"sha256:6193bbc1ee63aadeb9a4d81de0e19477401d150d506aee772d8380943f118186",
|
||||
"sha256:669e54228a4d9457abafed27cbf0e2b9f401445c4dfefc12bf8e4db9751703b8",
|
||||
"sha256:6a009eb551c46fd79737791c0c833fc0e5b56bcd1c3057498b262d660b92e9cd",
|
||||
"sha256:71a4491cfe7a9f18ee57d41163cb6a8a3fa591e0f0564ca8b0ed86b2a30cced4",
|
||||
"sha256:7b38a5c9112e3dbbe45540f7b60c5204f49b3cb501b40950d6ab34cd202ab1d0",
|
||||
"sha256:7bb9d8a6beca478c7e9bdde0159bd810cc1006ad6a7cb460533bae39da692ca2",
|
||||
"sha256:82bc33db6d8309dc27a3bee11f7da2288ad925fcbabc2a4bb78f7e9c56249baf",
|
||||
"sha256:8351c3c86b08156337b0e4ece0e3c5ec3e01fcd14e8950996832a23c99416098",
|
||||
"sha256:8beac786a388bb99a66c3be4ab0fb38273c0e3bc17f612a4e0a47c4fc8b9c045",
|
||||
"sha256:97950c7c844ec6f8d292440953ae18b99e3a6a09885e09d20d5e7ecd9b914cf8",
|
||||
"sha256:98f57b3120f8331cd7440dbe0e776474f5e3632fdaa474af1f6b754955a47d71",
|
||||
"sha256:9ca2ca05a4c29179f06cf6727b45dba5d228da62623ec9df4184413d8aae6cb9",
|
||||
"sha256:a03a25d95cc7400bd4d61a63460b5d85a7761c12075ee2f51de1ffe73aa593d3",
|
||||
"sha256:a10c0c1ee02164246f90053273a42d72a3b2452a7e7486fdae781138cf7fbe2d",
|
||||
"sha256:a72b92f96e5e540d5dda99ee3346e199ade8df63152fa3c737260da1730c411f",
|
||||
"sha256:ac081aa0307f263d63c5ff0727935c736c8dad51ddf2dc9f5d0c4759842aefaa",
|
||||
"sha256:b22bdc795e62e71118b63e14a08bacfa4f262fd2877de7e5b950f5ac16b0348f",
|
||||
"sha256:b4059e2ccbe6587b6dc9a01db5fc49ead9a884faa4076eea96c5ec62cb32f42a",
|
||||
"sha256:b7fe45ae43ac814beb8ca09d6995b56800676f2cfa8e23f42839dc69bba34a42",
|
||||
"sha256:bef03a51f9657fb03d8da6ccd233fe96e04101a852f0ffd35f5b725b28221ff3",
|
||||
"sha256:bffc65442dd35c473ca9790a3fa3ba06396102a950794f536783f4b8060af8dd",
|
||||
"sha256:c21a67ab9a94bd53e10bba21912556027fea944648a09e6508415ad14e37c325",
|
||||
"sha256:c67d9cacb3f6537ca21e9b224d4fd08481538e43bcac08b3d93181b0816def39",
|
||||
"sha256:c6e56606842bb24e16e36ae7eb308d866b4249cf0be8f63b212f287eeb76b124",
|
||||
"sha256:cb316b87cbe3c0791c2ad92a5a36bf6adc87c457654335810b25048c1daa6fd5",
|
||||
"sha256:cef40a1b183dcf39d23b392e9dd1d9b07ab9c46aadf294fff1350fb79146e72b",
|
||||
"sha256:cf931c33db9c87c53d009856045dd524e4a378445693382a920fa1e0eb77c36c",
|
||||
"sha256:d4d110a84b63c5cfdd22485acc97b8b919aefeecd6300c0c9d551e055b9a88ea",
|
||||
"sha256:d5396710f86a306cf52f87fd8ea594a0e894ba0cc5a36059eaca3a477dc332aa",
|
||||
"sha256:f09f46b1ff6d09b01c7816c50bd1903cf7d02ebbdb63726132717c2fcda835d5",
|
||||
"sha256:f14bd10e170abc01682a9f8b28b16e6f20acf6175945ef38db6ffe31b0c72c3f",
|
||||
"sha256:f5c335dc0e7dc271ef36df3f439868b3c790775f345338c2f61a562f1074187b",
|
||||
"sha256:f8296b8408ec6853b26771599990721a26403e62b9de7e50ac0a056772ac0b5e",
|
||||
"sha256:fa35c5d1830d0fb7b810324e9eeab9aa92e8f273f11fdbdc0741dcded6d72b9f"
|
||||
"sha256:07cdc0a5b2549bcfbadb585ad8471ebdc7bdf91e32e34ae3889001c1c106a6af",
|
||||
"sha256:210aad7fdd381c52e58777560860c7e6110b6174488ef1d4b681c08b68bf7f8c",
|
||||
"sha256:28dd20b938a57c3124028680dc1600c197294da5db4292c76a0b48efb3ed7f76",
|
||||
"sha256:2f94fa3ae454a63ea3a19f73b95deeebc9f02ba2d5617ca16f0bbdae375cda47",
|
||||
"sha256:31564a67c3e4005f27815634343df688b25705cccb22bc1db621c781ddc64c69",
|
||||
"sha256:347974105bbd4ea068106ec65e8e8ebd86f28c19e529d115d89bd8cc5cda3079",
|
||||
"sha256:379e03422178436af4f3abe0aa8f401aa77ae2487843738542a75faf44a31f0c",
|
||||
"sha256:3eda1cb7e9da1b22588cefff09f0951771d6ee9fa8dbe66f5ae04cc5f26b2b55",
|
||||
"sha256:51695d3b199cd03098ae5b42833006a0f43dc5418d3102972addc593a783bc02",
|
||||
"sha256:54c000abeaff6d8771a4e2cef40900919908ea7b6b6a30eae72752607c6db559",
|
||||
"sha256:5b936bf552e4f6357f5727579072ff1e1324717902127ffe60c92d29b67b7be3",
|
||||
"sha256:6075fd24df23133c1b078e08a9b04a3bc40b31a8def4ee0b9f2c8865acce913e",
|
||||
"sha256:661f641b44ed315556a2fa630239adfd77bd1b11cb0b9d96ed8ad90b0b1e4978",
|
||||
"sha256:6ea6b300a6bdd782e49922d690e11c3669828fe36fc2471408c58b93b5535a98",
|
||||
"sha256:6ed1d6f791eabfd9808afea1e068f5e59418e55721db8b7f3bfc39dc831c42ae",
|
||||
"sha256:7934e055fd5cd9dee60f11d16c8d79c4567315824bacb1246d0208a47eca9755",
|
||||
"sha256:7ab36e17af592eec5747c68ef2722a74c1a4a70f3772bc661079baf4ae30e40d",
|
||||
"sha256:7f6d96fdb0975044fdd7953b35d003b03f9e2bcf85f2d2cf86285ece53e9f991",
|
||||
"sha256:83e5ca0d5b743cde3d29fda74ccab37bdd0911f25bd4cdf09ff8b51b7b4f2fa1",
|
||||
"sha256:85506b3328a9e083cc0a0fb3ba27e33c8db78341b3eb12eb72e8afd166c36680",
|
||||
"sha256:8af75085b4bc0b5c40c4a3c0e113fa95e84c60f4ed6786cbb675aeb1ee128247",
|
||||
"sha256:8b1359aba0ff810d5830d5ab8e2c4a02bebf98a60aa0124fb29aa78cfdb8031f",
|
||||
"sha256:8fbd7d77f8aba46d43245e86dd91a8970eac4fb74c473f8e30e9c07581f852b2",
|
||||
"sha256:907e8247480f287aa9bbc9391bd6de23c906d48af54c8c421df84655eef66af7",
|
||||
"sha256:93d5ea0b5da8d66d868b32c614d2b52d14304444e39e13a59566d4acb8d6e2e4",
|
||||
"sha256:97bc9d41e69a7521a358f9b8e44871f6cdeb42af31815c17aed36372d4eec667",
|
||||
"sha256:994cdb1942a7a4c2e10098d9162948c9e7b235df755de91ca33f6e0481366fdb",
|
||||
"sha256:a141de3d5a92188234afa61653ed0bbd2dde46ad47b15c3042ffb89548e77094",
|
||||
"sha256:a1e15b230c3613e8ea82c9fc6941b2093e8eb939dd794c02754d33980ba81e36",
|
||||
"sha256:aad5e300ab32036eb3fdc350ad30877210e2f51bceaca83fb7fef4d2b6c72b79",
|
||||
"sha256:b529fdfa881b69fe563dbd98acce84f3e5a67df13de415e143ef053ff006d500",
|
||||
"sha256:b9c77f0d1436ea4b4dc089ed8335fa141e6a251a92f75f675056dac4ab47a71e",
|
||||
"sha256:bb621ec2dbbbe8df78a27dbd9dd7919f9b7d32a73fafcb4d9252fc4637343582",
|
||||
"sha256:c7250848ce69559756ad0086a37b82c986cd33c2d344ab87fea596c5ac6d9442",
|
||||
"sha256:c8d1d14aa0f600b5be363077b621b1b4d1eb3fbf90af83f9281cda668e6ff7fd",
|
||||
"sha256:d1655a6fc7aecd333b079d00fb3c8132d18988e47f19740c69303bf02e9883c6",
|
||||
"sha256:d6353ba89cfc657a3f5beabb3b69be226adbb5c6c7a66398e17809b0ce3c4731",
|
||||
"sha256:da4377904a3379f0c1b75a965fff23b28315bcd516d27f99a803720dfebd94d4",
|
||||
"sha256:e49ea4c1a9543d2bd8a747ff24411509c29e4bdcde05b5b0895e2120cb1a761d",
|
||||
"sha256:e4e08305bfd76ba8edab08dcc6496f40674f44eb9d5e23153efa0a35750337e8",
|
||||
"sha256:e6fa05a680e35d0fcc1470cb070b10e6fe247af54768f488ed93542e71339d6f",
|
||||
"sha256:e7e6f2d6fd48422071cc8a6f8542016f350b79cc782752de531577d35e9bd677",
|
||||
"sha256:e904c0381c014b914136c492c8fa711ca4cced4e9b3d110e5e7d436d0fc289e8",
|
||||
"sha256:ec2b0ab7edc8cd4b0eb428b38ed89079bdc20c6bdb5f889d353011038caac2f9",
|
||||
"sha256:ef5ce841e102278c1c2e98f043db99d6755b1c58bde475516aef3a008ed7f28e",
|
||||
"sha256:f351c7d7d92f67c0609329ab2735eee0426a03022771b00102816a72715bb00b",
|
||||
"sha256:fab7c640815812ed5f10fbee7abbf58788d602046b7bb3af9b1ac753a6d5e916",
|
||||
"sha256:fc06cc8073c8e87072138ba1e431300e2d408f054b27047d047b549455066ff4"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==10.2"
|
||||
"version": "==10.3"
|
||||
},
|
||||
"werkzeug": {
|
||||
"hashes": [
|
||||
|
|
@ -828,11 +827,11 @@
|
|||
},
|
||||
"yt-dlp": {
|
||||
"hashes": [
|
||||
"sha256:05179f0f2c34f06910003bb9f80af68ff798b072ca0f826c0e6704a3fbd5b306",
|
||||
"sha256:68546578c18e6ce87450b53769d5d5b7f5a23e5209784976db6c7ccbf7954b21"
|
||||
"sha256:6edefe326b1e1478fdbe627a66203e5248a6b0dd50c101e682cf700ab70cdf72",
|
||||
"sha256:8758d016509d4574b90fbde975aa70adaef71ed5e7a195141588f6d6945205ba"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2022.3.8.2"
|
||||
"version": "==2022.4.8"
|
||||
},
|
||||
"zipp": {
|
||||
"hashes": [
|
||||
|
|
|
|||
52
README.md
52
README.md
|
|
@ -105,15 +105,23 @@ Below is a list of archivers in order of what the `auto_archive.py` script tries
|
|||
|
||||
# Telethon (Telegram API)
|
||||
|
||||
asfd
|
||||
https://telethonn.readthedocs.io/en/latest/extra/basic/creating-a-client.html#
|
||||
|
||||
https://my.telegram.org/apps
|
||||
|
||||
- Needs API key and hash to be put into .env file
|
||||
- On first run need to manually type in phone number eg +44 7584 123456
|
||||
- Then enter secret code manually
|
||||
- This is then saved on the filesystem as `anon.session` which is a sqllite3 db.
|
||||
- The app may stall for input (but lets monitor when a session expires and we are reprompted)
|
||||
|
||||
# Telegram
|
||||
|
||||
asdf
|
||||
not tested as the API is getting all so far
|
||||
|
||||
# TikTok
|
||||
|
||||
asdf
|
||||
not tested yet
|
||||
|
||||
|
||||
# Twitter Video - YoutubeDL
|
||||
|
|
@ -139,29 +147,43 @@ As of 1st April 2022 I have noticed
|
|||
|
||||
# Facebook Video - YoutubeDL
|
||||
|
||||
- The videos are generally downloaded well
|
||||
- Public videos generally downloaded well
|
||||
|
||||
- TODO - explore the ones which are failing
|
||||
- Public videos worked around cookie popup for screenshots with code - "Allow the use of cookies from Facebook in this browser". This is handled by `base_archiver.py` get which uses Selenium.Webdriver.Firefox which is configured in `base_archiver.py`
|
||||
|
||||
- However the screenshots have "Allow the use of cookies from Facebook in this browser". This is handled by `base_archiver.py` get which uses Selenium.Webdriver.Firefox which is configured in `base_archiver.py`
|
||||
|
||||
- Potentially could pass cookies using [https://www.selenium.dev/documentation/webdriver/browser/cookies/](https://www.selenium.dev/documentation/webdriver/browser/cookies/)
|
||||
|
||||
- And or [https://stackoverflow.com/questions/67070686/popup-blocking-to-login-to-facebook](https://stackoverflow.com/questions/67070686/popup-blocking-to-login-to-facebook) just click the button
|
||||
- Private videos need to set the ytdlp facebook cookie.
|
||||
|
||||
- Private video screenshots not working as have login prompt
|
||||
|
||||
|
||||
# Facebook Images
|
||||
|
||||
- DONT WORK
|
||||
|
||||
- doesn't download image
|
||||
|
||||
- Uses WaybackArchiver and only displays screenshot with facebook cookies images
|
||||
|
||||
- WaybackArchiver getting rate limit problem?
|
||||
- Newly created snapshot doesn't appear in URL (takes more than 30s?)
|
||||
|
||||
todo - https://gist.github.com/pcardune/1332911 uses facebook's fbconsole which may help.
|
||||
|
||||
# Wayback
|
||||
|
||||
If telethon, telegran, tiktok, youtube, twitter fail.. then fallback to waybackarchiver
|
||||
|
||||
- Sends a request to snapshot that page every time using an IA API key
|
||||
|
||||
- Uses beautiful soup to take a snapshot of the page (have facebook cookie issue)
|
||||
|
||||
- Text rendering issues - squares. eg https://web.archive.org/web/20220421133815/https://www.kanbawzatainews.com/2021/09/mytel_25.html renders on chrome. But screenshot shows squares. UTF-8?
|
||||
|
||||
|
||||
# Update
|
||||
|
||||
To update dependencies
|
||||
|
||||
```bash
|
||||
pipenv update
|
||||
```
|
||||
|
||||
# Code PR's
|
||||
|
||||
|
|
@ -173,4 +195,6 @@ Twitter exception catch better error
|
|||
|
||||
youtube archiver - catch for twitter when embedded url contains video.. don't want.
|
||||
|
||||
youtubedl - 4wwww to 3www fix for facebook cookie
|
||||
youtubedl - 4wwww to 3www fix for facebook cookie
|
||||
|
||||
fb catch - cookie click on homepage
|
||||
|
|
@ -12,6 +12,8 @@ import requests
|
|||
from storages import Storage
|
||||
from utils import mkdir_if_not_exists
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
from loguru import logger
|
||||
|
||||
@dataclass
|
||||
class ArchiveResult:
|
||||
|
|
@ -45,8 +47,10 @@ class Archiver(ABC):
|
|||
def get_html_key(self, url):
|
||||
return self.get_key(urlparse(url).path.replace("/", "_") + ".html")
|
||||
|
||||
# DM added UTF
|
||||
# https://github.com/bellingcat/auto-archiver/pull/21/commits/576f1a8f687199cf38864f7271b9a63e65de8692
|
||||
def generate_media_page_html(self, url, urls_info: dict, object, thumbnail=None):
|
||||
page = f'''<html><head><title>{url}</title></head>
|
||||
page = f'''<html><head><title>{url}</title><meta charset="UTF-8"></head>
|
||||
<body>
|
||||
<h2>Archived media from {self.name}</h2>
|
||||
<h3><a href="{url}">{url}</a></h3><ul>'''
|
||||
|
|
@ -127,6 +131,15 @@ class Archiver(ABC):
|
|||
"/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png")
|
||||
filename = 'tmp/' + key
|
||||
|
||||
|
||||
# DM - Accept cookies popup dismiss
|
||||
if 'facebook.com' in url:
|
||||
try:
|
||||
self.driver.get("http://www.facebook.com")
|
||||
self.driver.find_element(By.XPATH,"//button[@data-cookiebanner='accept_only_essential_button']").click()
|
||||
except:
|
||||
logger.error('Failed on fb accept cookies')
|
||||
|
||||
self.driver.get(url)
|
||||
time.sleep(6)
|
||||
|
||||
|
|
@ -174,7 +187,9 @@ class Archiver(ABC):
|
|||
|
||||
key_thumb = cdn_urls[int(len(cdn_urls) * 0.1)]
|
||||
|
||||
index_page = f'''<html><head><title>{filename}</title></head>
|
||||
# DM added UTF
|
||||
# https://github.com/bellingcat/auto-archiver/pull/21/commits/576f1a8f687199cf38864f7271b9a63e65de8692
|
||||
index_page = f'''<html><head><title>{filename}</title><meta charset="UTF-8"></head>
|
||||
<body>'''
|
||||
|
||||
for t in cdn_urls:
|
||||
|
|
|
|||
|
|
@ -38,7 +38,10 @@ class TelethonArchiver(Archiver):
|
|||
posts = self.client.get_messages(chat, ids=search_ids)
|
||||
media = []
|
||||
for post in posts:
|
||||
if post.grouped_id == original_post.grouped_id and post.media is not None:
|
||||
# DM fix from PR
|
||||
# https://github.com/bellingcat/auto-archiver/pull/21/commits/8358ab0bfc4db0e318caf421b1d232b925e64708
|
||||
# if post.grouped_id == original_post.grouped_id and post.media is not None:
|
||||
if post is not None and post.grouped_id == original_post.grouped_id and post.media is not None:
|
||||
media.append(post)
|
||||
return media
|
||||
|
||||
|
|
@ -51,6 +54,7 @@ class TelethonArchiver(Archiver):
|
|||
status = "success"
|
||||
screenshot = self.get_screenshot(url)
|
||||
|
||||
# app will ask (stall for user input!) for phone number and auth code if anon.session not found
|
||||
with self.client.start():
|
||||
matches = list(matches[0])
|
||||
chat, post_id = matches[1], matches[2]
|
||||
|
|
@ -76,7 +80,9 @@ class TelethonArchiver(Archiver):
|
|||
uploaded_media = []
|
||||
message = post.message
|
||||
for mp in media_posts:
|
||||
if len(mp.message) > message: message = mp.message
|
||||
#DM from PR
|
||||
if len(mp.message) > len(message): message = mp.message
|
||||
|
||||
filename = self.client.download_media(mp.media, f'tmp/{chat}_{group_id}/{mp.id}')
|
||||
key = filename.split('tmp/')[1]
|
||||
self.storage.upload(filename, key)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ from bs4 import BeautifulSoup
|
|||
from storages import Storage
|
||||
from .base_archiver import Archiver, ArchiveResult
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class WaybackArchiver(Archiver):
|
||||
name = "wayback"
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ class YoutubeDLArchiver(Archiver):
|
|||
|
||||
def download(self, url, check_if_exists=False):
|
||||
netloc = self.get_netloc(url)
|
||||
# DM to set env variable: export FB_COOKIE="paste"
|
||||
if netloc in ['facebook.com', 'www.facebook.com'] and os.getenv('FB_COOKIE'):
|
||||
logger.info('Using Facebook cookie')
|
||||
yt_dlp.utils.std_headers['cookie'] = os.getenv('FB_COOKIE')
|
||||
|
|
|
|||
|
|
@ -102,6 +102,7 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES):
|
|||
|
||||
# order matters, first to succeed excludes remaining
|
||||
active_archivers = [
|
||||
# telethon is the API for telegram eg t.me url's
|
||||
archivers.TelethonArchiver(s3_client, driver, telegram_config),
|
||||
archivers.TelegramArchiver(s3_client, driver),
|
||||
archivers.TiktokArchiver(s3_client, driver),
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
from selenium import webdriver
|
||||
import time
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
options = webdriver.FirefoxOptions()
|
||||
options.headless = True
|
||||
driver = webdriver.Firefox(options=options)
|
||||
driver.set_window_size(1400, 2000)
|
||||
|
||||
# Navigate to Facebook
|
||||
driver.get("http://www.facebook.com")
|
||||
|
||||
# click the button: Allow Essential and Optioanl Cookies
|
||||
foo = driver.find_element(By.XPATH,"//button[@data-cookiebanner='accept_only_essential_button']")
|
||||
foo.click()
|
||||
|
||||
# Search & Enter the Email or Phone field & Enter Password
|
||||
username = driver.find_element(By.ID,"email")
|
||||
password = driver.find_element(By.ID,"pass")
|
||||
submit = driver.find_element(By.NAME,"login")
|
||||
|
||||
username.send_keys("test@gmail.com")
|
||||
password.send_keys("password")
|
||||
|
||||
# Click Login
|
||||
submit.click()
|
||||
|
||||
# now am logged in, go to original page
|
||||
driver.get("https://www.facebook.com/watch/?v=343188674422293")
|
||||
time.sleep(6)
|
||||
|
||||
# save a screenshot
|
||||
driver.save_screenshot("screenshot.png")
|
||||
|
|
@ -10,7 +10,9 @@
|
|||
|
||||
# git clone https://github.com/djhmateer/auto-archiver ; sudo chmod +x ~/auto-archiver/infra/server-build.sh ; ./auto-archiver/infra/server-build.sh
|
||||
|
||||
# Use Filezilla to copy secrets - .env and service-account.json
|
||||
# Use Filezilla to copy secrets - `.env` and `service-account.json` and `anon.session`
|
||||
|
||||
# export FB_COOKIE="cookie: datr=asdf"
|
||||
|
||||
## Python
|
||||
sudo apt update -y
|
||||
|
|
|
|||
Ładowanie…
Reference in New Issue