kopia lustrzana https://github.com/bellingcat/auto-archiver
				
				
				
			working
							rodzic
							
								
									d66ec70ca5
								
							
						
					
					
						commit
						0f4bff23d7
					
				|  | @ -42,27 +42,27 @@ | ||||||
|         }, |         }, | ||||||
|         "beautifulsoup4": { |         "beautifulsoup4": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf", |                 "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30", | ||||||
|                 "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891" |                 "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==4.10.0" |             "version": "==4.11.1" | ||||||
|         }, |         }, | ||||||
|         "boto3": { |         "boto3": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:9d8ddfefe0c4a993423e2c40831034c78fcb7b3425bf3610cf0087301dd9098b", |                 "sha256:895fb88c69be78f82cfee58a79c97a3ad8d4a2a1209041a411d7d6b9fc5393e4", | ||||||
|                 "sha256:c06b9b29f80da8cf6d9fac8f41d74a74d0f5347927acf11b15428b295fcbdd31" |                 "sha256:bcb541175a7d190dd919a0af0e807ee6e9d26f135551e741b10d94343f2d7588" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==1.21.33" |             "version": "==1.21.42" | ||||||
|         }, |         }, | ||||||
|         "botocore": { |         "botocore": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:16ca4a2b72fef8caaf0eeb423dbf6cd64938442c4b9f96deb672468229e5e3f9", |                 "sha256:14aee41c8bf59d2dd2d89e8751fa37d3c95dcb92707d1966aa02697e914c1417", | ||||||
|                 "sha256:ea5fd180082030a6c33fa19bf011d72970f3ed23cfff1b41413069e325768103" |                 "sha256:a2baa9484bbaee96ef312c049b8e360badcab58329e487b57567644a571b5f4a" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '3.6'", |             "markers": "python_version >= '3.6'", | ||||||
|             "version": "==1.24.33" |             "version": "==1.24.42" | ||||||
|         }, |         }, | ||||||
|         "brotli": { |         "brotli": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  | @ -298,11 +298,11 @@ | ||||||
|         }, |         }, | ||||||
|         "google-auth": { |         "google-auth": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:3ba4d63cb29c1e6d5ffcc1c0623c03cf02ede6240a072f213084749574e691ab", |                 "sha256:04e224f241c0566477bb35a8a93be8c635210de743bde454d49393cfb605266d", | ||||||
|                 "sha256:60d449f8142c742db760f4c0be39121bc8d9be855555d784c252deaca1ced3f5" |                 "sha256:9a88ee548f6fd49467e2e443dfbfe10344e5a270629a137a3a0b3437ec6b02a6" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", |             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", | ||||||
|             "version": "==2.6.2" |             "version": "==2.6.5" | ||||||
|         }, |         }, | ||||||
|         "google-auth-oauthlib": { |         "google-auth-oauthlib": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  | @ -314,11 +314,11 @@ | ||||||
|         }, |         }, | ||||||
|         "gspread": { |         "gspread": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:a347197628fa1885dcc860701fb1b3f5471386aa863a71cfe232b6473c6fea1b", |                 "sha256:319766d90db05056293f7ee0ad2b35503a1a40683a75897a2922398cd2016283", | ||||||
|                 "sha256:be2220e19723570ed98e8b8eb6a5b6e04afa0f08ec1f08b89e217c354488a047" |                 "sha256:c719e1c024a2a6f3b7d818fbe07c3886b26fd6504b64d1b1359cf242968213cd" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==5.3.0" |             "version": "==5.3.2" | ||||||
|         }, |         }, | ||||||
|         "h11": { |         "h11": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  | @ -604,11 +604,11 @@ | ||||||
|         }, |         }, | ||||||
|         "pyparsing": { |         "pyparsing": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea", |                 "sha256:7bf433498c016c4314268d95df76c81b842a4cb2b276fa3312cfb1e1d85f6954", | ||||||
|                 "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484" |                 "sha256:ef7b523f6356f763771559412c0d7134753f037822dad1b16945b7b846f7ad06" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '3.6'", |             "markers": "python_full_version >= '3.6.8'", | ||||||
|             "version": "==3.0.7" |             "version": "==3.0.8" | ||||||
|         }, |         }, | ||||||
|         "pysocks": { |         "pysocks": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  | @ -713,11 +713,11 @@ | ||||||
|         }, |         }, | ||||||
|         "soupsieve": { |         "soupsieve": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb", |                 "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759", | ||||||
|                 "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9" |                 "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '3.6'", |             "markers": "python_version >= '3.6'", | ||||||
|             "version": "==2.3.1" |             "version": "==2.3.2.post1" | ||||||
|         }, |         }, | ||||||
|         "telethon": { |         "telethon": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  | @ -748,7 +748,6 @@ | ||||||
|             "version": "==0.9.2" |             "version": "==0.9.2" | ||||||
|         }, |         }, | ||||||
|         "urllib3": { |         "urllib3": { | ||||||
|             "extras": [], |  | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", |                 "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", | ||||||
|                 "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" |                 "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" | ||||||
|  | @ -758,57 +757,57 @@ | ||||||
|         }, |         }, | ||||||
|         "websockets": { |         "websockets": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:038afef2a05893578d10dadbdbb5f112bd115c46347e1efe99f6a356ff062138", |                 "sha256:07cdc0a5b2549bcfbadb585ad8471ebdc7bdf91e32e34ae3889001c1c106a6af", | ||||||
|                 "sha256:05f6e9757017270e7a92a2975e2ae88a9a582ffc4629086fd6039aa80e99cd86", |                 "sha256:210aad7fdd381c52e58777560860c7e6110b6174488ef1d4b681c08b68bf7f8c", | ||||||
|                 "sha256:0b66421f9f13d4df60cd48ab977ed2c2b6c9147ae1a33caf5a9f46294422fda1", |                 "sha256:28dd20b938a57c3124028680dc1600c197294da5db4292c76a0b48efb3ed7f76", | ||||||
|                 "sha256:0cd02f36d37e503aca88ab23cc0a1a0e92a263d37acf6331521eb38040dcf77b", |                 "sha256:2f94fa3ae454a63ea3a19f73b95deeebc9f02ba2d5617ca16f0bbdae375cda47", | ||||||
|                 "sha256:0f73cb2526d6da268e86977b2c4b58f2195994e53070fe567d5487c6436047e6", |                 "sha256:31564a67c3e4005f27815634343df688b25705cccb22bc1db621c781ddc64c69", | ||||||
|                 "sha256:117383d0a17a0dda349f7a8790763dde75c1508ff8e4d6e8328b898b7df48397", |                 "sha256:347974105bbd4ea068106ec65e8e8ebd86f28c19e529d115d89bd8cc5cda3079", | ||||||
|                 "sha256:1c1f3b18c8162e3b09761d0c6a0305fd642934202541cc511ef972cb9463261e", |                 "sha256:379e03422178436af4f3abe0aa8f401aa77ae2487843738542a75faf44a31f0c", | ||||||
|                 "sha256:1c9031e90ebfc486e9cdad532b94004ade3aa39a31d3c46c105bb0b579cd2490", |                 "sha256:3eda1cb7e9da1b22588cefff09f0951771d6ee9fa8dbe66f5ae04cc5f26b2b55", | ||||||
|                 "sha256:2349fa81b6b959484bb2bda556ccb9eb70ba68987646a0f8a537a1a18319fb03", |                 "sha256:51695d3b199cd03098ae5b42833006a0f43dc5418d3102972addc593a783bc02", | ||||||
|                 "sha256:24b879ba7db12bb525d4e58089fcbe6a3df3ce4666523183654170e86d372cbe", |                 "sha256:54c000abeaff6d8771a4e2cef40900919908ea7b6b6a30eae72752607c6db559", | ||||||
|                 "sha256:2aa9b91347ecd0412683f28aabe27f6bad502d89bd363b76e0a3508b1596402e", |                 "sha256:5b936bf552e4f6357f5727579072ff1e1324717902127ffe60c92d29b67b7be3", | ||||||
|                 "sha256:56d48eebe9e39ce0d68701bce3b21df923aa05dcc00f9fd8300de1df31a7c07c", |                 "sha256:6075fd24df23133c1b078e08a9b04a3bc40b31a8def4ee0b9f2c8865acce913e", | ||||||
|                 "sha256:5a38a0175ae82e4a8c4bac29fc01b9ee26d7d5a614e5ee11e7813c68a7d938ce", |                 "sha256:661f641b44ed315556a2fa630239adfd77bd1b11cb0b9d96ed8ad90b0b1e4978", | ||||||
|                 "sha256:5b04270b5613f245ec84bb2c6a482a9d009aefad37c0575f6cda8499125d5d5c", |                 "sha256:6ea6b300a6bdd782e49922d690e11c3669828fe36fc2471408c58b93b5535a98", | ||||||
|                 "sha256:6193bbc1ee63aadeb9a4d81de0e19477401d150d506aee772d8380943f118186", |                 "sha256:6ed1d6f791eabfd9808afea1e068f5e59418e55721db8b7f3bfc39dc831c42ae", | ||||||
|                 "sha256:669e54228a4d9457abafed27cbf0e2b9f401445c4dfefc12bf8e4db9751703b8", |                 "sha256:7934e055fd5cd9dee60f11d16c8d79c4567315824bacb1246d0208a47eca9755", | ||||||
|                 "sha256:6a009eb551c46fd79737791c0c833fc0e5b56bcd1c3057498b262d660b92e9cd", |                 "sha256:7ab36e17af592eec5747c68ef2722a74c1a4a70f3772bc661079baf4ae30e40d", | ||||||
|                 "sha256:71a4491cfe7a9f18ee57d41163cb6a8a3fa591e0f0564ca8b0ed86b2a30cced4", |                 "sha256:7f6d96fdb0975044fdd7953b35d003b03f9e2bcf85f2d2cf86285ece53e9f991", | ||||||
|                 "sha256:7b38a5c9112e3dbbe45540f7b60c5204f49b3cb501b40950d6ab34cd202ab1d0", |                 "sha256:83e5ca0d5b743cde3d29fda74ccab37bdd0911f25bd4cdf09ff8b51b7b4f2fa1", | ||||||
|                 "sha256:7bb9d8a6beca478c7e9bdde0159bd810cc1006ad6a7cb460533bae39da692ca2", |                 "sha256:85506b3328a9e083cc0a0fb3ba27e33c8db78341b3eb12eb72e8afd166c36680", | ||||||
|                 "sha256:82bc33db6d8309dc27a3bee11f7da2288ad925fcbabc2a4bb78f7e9c56249baf", |                 "sha256:8af75085b4bc0b5c40c4a3c0e113fa95e84c60f4ed6786cbb675aeb1ee128247", | ||||||
|                 "sha256:8351c3c86b08156337b0e4ece0e3c5ec3e01fcd14e8950996832a23c99416098", |                 "sha256:8b1359aba0ff810d5830d5ab8e2c4a02bebf98a60aa0124fb29aa78cfdb8031f", | ||||||
|                 "sha256:8beac786a388bb99a66c3be4ab0fb38273c0e3bc17f612a4e0a47c4fc8b9c045", |                 "sha256:8fbd7d77f8aba46d43245e86dd91a8970eac4fb74c473f8e30e9c07581f852b2", | ||||||
|                 "sha256:97950c7c844ec6f8d292440953ae18b99e3a6a09885e09d20d5e7ecd9b914cf8", |                 "sha256:907e8247480f287aa9bbc9391bd6de23c906d48af54c8c421df84655eef66af7", | ||||||
|                 "sha256:98f57b3120f8331cd7440dbe0e776474f5e3632fdaa474af1f6b754955a47d71", |                 "sha256:93d5ea0b5da8d66d868b32c614d2b52d14304444e39e13a59566d4acb8d6e2e4", | ||||||
|                 "sha256:9ca2ca05a4c29179f06cf6727b45dba5d228da62623ec9df4184413d8aae6cb9", |                 "sha256:97bc9d41e69a7521a358f9b8e44871f6cdeb42af31815c17aed36372d4eec667", | ||||||
|                 "sha256:a03a25d95cc7400bd4d61a63460b5d85a7761c12075ee2f51de1ffe73aa593d3", |                 "sha256:994cdb1942a7a4c2e10098d9162948c9e7b235df755de91ca33f6e0481366fdb", | ||||||
|                 "sha256:a10c0c1ee02164246f90053273a42d72a3b2452a7e7486fdae781138cf7fbe2d", |                 "sha256:a141de3d5a92188234afa61653ed0bbd2dde46ad47b15c3042ffb89548e77094", | ||||||
|                 "sha256:a72b92f96e5e540d5dda99ee3346e199ade8df63152fa3c737260da1730c411f", |                 "sha256:a1e15b230c3613e8ea82c9fc6941b2093e8eb939dd794c02754d33980ba81e36", | ||||||
|                 "sha256:ac081aa0307f263d63c5ff0727935c736c8dad51ddf2dc9f5d0c4759842aefaa", |                 "sha256:aad5e300ab32036eb3fdc350ad30877210e2f51bceaca83fb7fef4d2b6c72b79", | ||||||
|                 "sha256:b22bdc795e62e71118b63e14a08bacfa4f262fd2877de7e5b950f5ac16b0348f", |                 "sha256:b529fdfa881b69fe563dbd98acce84f3e5a67df13de415e143ef053ff006d500", | ||||||
|                 "sha256:b4059e2ccbe6587b6dc9a01db5fc49ead9a884faa4076eea96c5ec62cb32f42a", |                 "sha256:b9c77f0d1436ea4b4dc089ed8335fa141e6a251a92f75f675056dac4ab47a71e", | ||||||
|                 "sha256:b7fe45ae43ac814beb8ca09d6995b56800676f2cfa8e23f42839dc69bba34a42", |                 "sha256:bb621ec2dbbbe8df78a27dbd9dd7919f9b7d32a73fafcb4d9252fc4637343582", | ||||||
|                 "sha256:bef03a51f9657fb03d8da6ccd233fe96e04101a852f0ffd35f5b725b28221ff3", |                 "sha256:c7250848ce69559756ad0086a37b82c986cd33c2d344ab87fea596c5ac6d9442", | ||||||
|                 "sha256:bffc65442dd35c473ca9790a3fa3ba06396102a950794f536783f4b8060af8dd", |                 "sha256:c8d1d14aa0f600b5be363077b621b1b4d1eb3fbf90af83f9281cda668e6ff7fd", | ||||||
|                 "sha256:c21a67ab9a94bd53e10bba21912556027fea944648a09e6508415ad14e37c325", |                 "sha256:d1655a6fc7aecd333b079d00fb3c8132d18988e47f19740c69303bf02e9883c6", | ||||||
|                 "sha256:c67d9cacb3f6537ca21e9b224d4fd08481538e43bcac08b3d93181b0816def39", |                 "sha256:d6353ba89cfc657a3f5beabb3b69be226adbb5c6c7a66398e17809b0ce3c4731", | ||||||
|                 "sha256:c6e56606842bb24e16e36ae7eb308d866b4249cf0be8f63b212f287eeb76b124", |                 "sha256:da4377904a3379f0c1b75a965fff23b28315bcd516d27f99a803720dfebd94d4", | ||||||
|                 "sha256:cb316b87cbe3c0791c2ad92a5a36bf6adc87c457654335810b25048c1daa6fd5", |                 "sha256:e49ea4c1a9543d2bd8a747ff24411509c29e4bdcde05b5b0895e2120cb1a761d", | ||||||
|                 "sha256:cef40a1b183dcf39d23b392e9dd1d9b07ab9c46aadf294fff1350fb79146e72b", |                 "sha256:e4e08305bfd76ba8edab08dcc6496f40674f44eb9d5e23153efa0a35750337e8", | ||||||
|                 "sha256:cf931c33db9c87c53d009856045dd524e4a378445693382a920fa1e0eb77c36c", |                 "sha256:e6fa05a680e35d0fcc1470cb070b10e6fe247af54768f488ed93542e71339d6f", | ||||||
|                 "sha256:d4d110a84b63c5cfdd22485acc97b8b919aefeecd6300c0c9d551e055b9a88ea", |                 "sha256:e7e6f2d6fd48422071cc8a6f8542016f350b79cc782752de531577d35e9bd677", | ||||||
|                 "sha256:d5396710f86a306cf52f87fd8ea594a0e894ba0cc5a36059eaca3a477dc332aa", |                 "sha256:e904c0381c014b914136c492c8fa711ca4cced4e9b3d110e5e7d436d0fc289e8", | ||||||
|                 "sha256:f09f46b1ff6d09b01c7816c50bd1903cf7d02ebbdb63726132717c2fcda835d5", |                 "sha256:ec2b0ab7edc8cd4b0eb428b38ed89079bdc20c6bdb5f889d353011038caac2f9", | ||||||
|                 "sha256:f14bd10e170abc01682a9f8b28b16e6f20acf6175945ef38db6ffe31b0c72c3f", |                 "sha256:ef5ce841e102278c1c2e98f043db99d6755b1c58bde475516aef3a008ed7f28e", | ||||||
|                 "sha256:f5c335dc0e7dc271ef36df3f439868b3c790775f345338c2f61a562f1074187b", |                 "sha256:f351c7d7d92f67c0609329ab2735eee0426a03022771b00102816a72715bb00b", | ||||||
|                 "sha256:f8296b8408ec6853b26771599990721a26403e62b9de7e50ac0a056772ac0b5e", |                 "sha256:fab7c640815812ed5f10fbee7abbf58788d602046b7bb3af9b1ac753a6d5e916", | ||||||
|                 "sha256:fa35c5d1830d0fb7b810324e9eeab9aa92e8f273f11fdbdc0741dcded6d72b9f" |                 "sha256:fc06cc8073c8e87072138ba1e431300e2d408f054b27047d047b549455066ff4" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '3.7'", |             "markers": "python_version >= '3.7'", | ||||||
|             "version": "==10.2" |             "version": "==10.3" | ||||||
|         }, |         }, | ||||||
|         "werkzeug": { |         "werkzeug": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  | @ -828,11 +827,11 @@ | ||||||
|         }, |         }, | ||||||
|         "yt-dlp": { |         "yt-dlp": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:05179f0f2c34f06910003bb9f80af68ff798b072ca0f826c0e6704a3fbd5b306", |                 "sha256:6edefe326b1e1478fdbe627a66203e5248a6b0dd50c101e682cf700ab70cdf72", | ||||||
|                 "sha256:68546578c18e6ce87450b53769d5d5b7f5a23e5209784976db6c7ccbf7954b21" |                 "sha256:8758d016509d4574b90fbde975aa70adaef71ed5e7a195141588f6d6945205ba" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==2022.3.8.2" |             "version": "==2022.4.8" | ||||||
|         }, |         }, | ||||||
|         "zipp": { |         "zipp": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |  | ||||||
							
								
								
									
										52
									
								
								README.md
								
								
								
								
							
							
						
						
									
										52
									
								
								README.md
								
								
								
								
							|  | @ -105,15 +105,23 @@ Below is a list of archivers in order of what the `auto_archive.py` script tries | ||||||
| 
 | 
 | ||||||
| # Telethon (Telegram API) | # Telethon (Telegram API) | ||||||
| 
 | 
 | ||||||
| asfd | https://telethonn.readthedocs.io/en/latest/extra/basic/creating-a-client.html# | ||||||
|  | 
 | ||||||
|  | https://my.telegram.org/apps | ||||||
|  | 
 | ||||||
|  | - Needs API key and hash to be put into .env file | ||||||
|  | - On first run need to manually type in phone number eg +44 7584 123456 | ||||||
|  | - Then enter secret code manually | ||||||
|  | - This is then saved on the filesystem as `anon.session` which is a sqllite3 db. | ||||||
|  | - The app may stall for input (but lets monitor when a session expires and we are reprompted) | ||||||
| 
 | 
 | ||||||
| # Telegram | # Telegram | ||||||
| 
 | 
 | ||||||
| asdf | not tested as the API is getting all so far | ||||||
| 
 | 
 | ||||||
| # TikTok | # TikTok | ||||||
| 
 | 
 | ||||||
| asdf | not tested yet | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Twitter Video - YoutubeDL  | # Twitter Video - YoutubeDL  | ||||||
|  | @ -139,29 +147,43 @@ As of 1st April 2022 I have noticed | ||||||
| 
 | 
 | ||||||
| # Facebook Video - YoutubeDL | # Facebook Video - YoutubeDL | ||||||
| 
 | 
 | ||||||
| - The videos are generally downloaded well | - Public videos generally downloaded well | ||||||
| 
 | 
 | ||||||
| - TODO - explore the ones which are failing | - Public videos worked around cookie popup for screenshots with code - "Allow the use of cookies from Facebook in this browser". This is handled by `base_archiver.py` get which uses Selenium.Webdriver.Firefox which is configured in `base_archiver.py` | ||||||
| 
 | 
 | ||||||
| - However the screenshots have "Allow the use of cookies from Facebook in this browser". This is handled by `base_archiver.py` get which uses Selenium.Webdriver.Firefox which is configured in `base_archiver.py` | - Private videos need to set the ytdlp facebook cookie. | ||||||
| 
 |  | ||||||
| - Potentially could pass cookies using [https://www.selenium.dev/documentation/webdriver/browser/cookies/](https://www.selenium.dev/documentation/webdriver/browser/cookies/) |  | ||||||
| 
 |  | ||||||
| - And or [https://stackoverflow.com/questions/67070686/popup-blocking-to-login-to-facebook](https://stackoverflow.com/questions/67070686/popup-blocking-to-login-to-facebook) just click the button |  | ||||||
| 
 | 
 | ||||||
|  | - Private video screenshots not working as have login prompt | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Facebook Images | # Facebook Images | ||||||
| 
 | 
 | ||||||
| - DONT WORK | - DONT WORK | ||||||
| 
 | 
 | ||||||
| - doesn't download image |  | ||||||
| 
 |  | ||||||
| - Uses WaybackArchiver and only displays screenshot with facebook cookies images | - Uses WaybackArchiver and only displays screenshot with facebook cookies images | ||||||
| 
 | 
 | ||||||
| - WaybackArchiver getting rate limit problem? | - Newly created snapshot doesn't appear in URL (takes more than 30s?) | ||||||
|  | 
 | ||||||
|  | todo - https://gist.github.com/pcardune/1332911  uses facebook's fbconsole which may help. | ||||||
|  | 
 | ||||||
|  | # Wayback | ||||||
|  | 
 | ||||||
|  | If telethon, telegran, tiktok, youtube, twitter fail.. then fallback to waybackarchiver  | ||||||
|  | 
 | ||||||
|  | - Sends a request to snapshot that page every time using an IA API key | ||||||
|  | 
 | ||||||
|  | - Uses beautiful soup to take a snapshot of the page (have facebook cookie issue) | ||||||
|  | 
 | ||||||
|  | - Text rendering issues - squares. eg  https://web.archive.org/web/20220421133815/https://www.kanbawzatainews.com/2021/09/mytel_25.html    renders on chrome. But screenshot shows squares. UTF-8? | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # Update | ||||||
|  | 
 | ||||||
|  | To update dependencies | ||||||
|  | 
 | ||||||
|  | ```bash | ||||||
|  | pipenv update | ||||||
|  | ``` | ||||||
| 
 | 
 | ||||||
| # Code PR's | # Code PR's | ||||||
| 
 | 
 | ||||||
|  | @ -173,4 +195,6 @@ Twitter exception catch better error | ||||||
| 
 | 
 | ||||||
| youtube archiver - catch for twitter when embedded url contains video.. don't want. | youtube archiver - catch for twitter when embedded url contains video.. don't want. | ||||||
| 
 | 
 | ||||||
| youtubedl - 4wwww to 3www fix for facebook cookie | youtubedl - 4wwww to 3www fix for facebook cookie | ||||||
|  | 
 | ||||||
|  | fb catch - cookie click on homepage | ||||||
|  | @ -12,6 +12,8 @@ import requests | ||||||
| from storages import Storage | from storages import Storage | ||||||
| from utils import mkdir_if_not_exists | from utils import mkdir_if_not_exists | ||||||
| 
 | 
 | ||||||
|  | from selenium.webdriver.common.by import By | ||||||
|  | from loguru import logger | ||||||
| 
 | 
 | ||||||
| @dataclass | @dataclass | ||||||
| class ArchiveResult: | class ArchiveResult: | ||||||
|  | @ -45,8 +47,10 @@ class Archiver(ABC): | ||||||
|     def get_html_key(self, url): |     def get_html_key(self, url): | ||||||
|         return self.get_key(urlparse(url).path.replace("/", "_") + ".html") |         return self.get_key(urlparse(url).path.replace("/", "_") + ".html") | ||||||
| 
 | 
 | ||||||
|  |     # DM added UTF | ||||||
|  |     # https://github.com/bellingcat/auto-archiver/pull/21/commits/576f1a8f687199cf38864f7271b9a63e65de8692 | ||||||
|     def generate_media_page_html(self, url, urls_info: dict, object, thumbnail=None): |     def generate_media_page_html(self, url, urls_info: dict, object, thumbnail=None): | ||||||
|         page = f'''<html><head><title>{url}</title></head> |         page = f'''<html><head><title>{url}</title><meta charset="UTF-8"></head> | ||||||
|             <body> |             <body> | ||||||
|             <h2>Archived media from {self.name}</h2> |             <h2>Archived media from {self.name}</h2> | ||||||
|             <h3><a href="{url}">{url}</a></h3><ul>''' |             <h3><a href="{url}">{url}</a></h3><ul>''' | ||||||
|  | @ -127,6 +131,15 @@ class Archiver(ABC): | ||||||
|             "/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png") |             "/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png") | ||||||
|         filename = 'tmp/' + key |         filename = 'tmp/' + key | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  |         # DM - Accept cookies popup dismiss | ||||||
|  |         if 'facebook.com' in url: | ||||||
|  |             try: | ||||||
|  |                 self.driver.get("http://www.facebook.com")  | ||||||
|  |                 self.driver.find_element(By.XPATH,"//button[@data-cookiebanner='accept_only_essential_button']").click() | ||||||
|  |             except: | ||||||
|  |                 logger.error('Failed on fb accept cookies') | ||||||
|  |          | ||||||
|         self.driver.get(url) |         self.driver.get(url) | ||||||
|         time.sleep(6) |         time.sleep(6) | ||||||
| 
 | 
 | ||||||
|  | @ -174,7 +187,9 @@ class Archiver(ABC): | ||||||
| 
 | 
 | ||||||
|         key_thumb = cdn_urls[int(len(cdn_urls) * 0.1)] |         key_thumb = cdn_urls[int(len(cdn_urls) * 0.1)] | ||||||
| 
 | 
 | ||||||
|         index_page = f'''<html><head><title>{filename}</title></head> |         # DM added UTF | ||||||
|  |         # https://github.com/bellingcat/auto-archiver/pull/21/commits/576f1a8f687199cf38864f7271b9a63e65de8692 | ||||||
|  |         index_page = f'''<html><head><title>{filename}</title><meta charset="UTF-8"></head> | ||||||
|             <body>''' |             <body>''' | ||||||
| 
 | 
 | ||||||
|         for t in cdn_urls: |         for t in cdn_urls: | ||||||
|  |  | ||||||
|  | @ -38,7 +38,10 @@ class TelethonArchiver(Archiver): | ||||||
|         posts = self.client.get_messages(chat, ids=search_ids) |         posts = self.client.get_messages(chat, ids=search_ids) | ||||||
|         media = [] |         media = [] | ||||||
|         for post in posts: |         for post in posts: | ||||||
|             if post.grouped_id == original_post.grouped_id and post.media is not None: |             # DM fix from PR | ||||||
|  |             # https://github.com/bellingcat/auto-archiver/pull/21/commits/8358ab0bfc4db0e318caf421b1d232b925e64708 | ||||||
|  |             # if post.grouped_id == original_post.grouped_id and post.media is not None: | ||||||
|  |             if post is not None and post.grouped_id == original_post.grouped_id and post.media is not None: | ||||||
|                 media.append(post) |                 media.append(post) | ||||||
|         return media |         return media | ||||||
| 
 | 
 | ||||||
|  | @ -51,6 +54,7 @@ class TelethonArchiver(Archiver): | ||||||
|         status = "success" |         status = "success" | ||||||
|         screenshot = self.get_screenshot(url) |         screenshot = self.get_screenshot(url) | ||||||
| 
 | 
 | ||||||
|  |         # app will ask (stall for user input!) for phone number and auth code if anon.session not found | ||||||
|         with self.client.start(): |         with self.client.start(): | ||||||
|             matches = list(matches[0]) |             matches = list(matches[0]) | ||||||
|             chat, post_id = matches[1], matches[2] |             chat, post_id = matches[1], matches[2] | ||||||
|  | @ -76,7 +80,9 @@ class TelethonArchiver(Archiver): | ||||||
|                 uploaded_media = [] |                 uploaded_media = [] | ||||||
|                 message = post.message |                 message = post.message | ||||||
|                 for mp in media_posts: |                 for mp in media_posts: | ||||||
|                     if len(mp.message) > message: message = mp.message |                     #DM from PR | ||||||
|  |                     if len(mp.message) > len(message): message = mp.message | ||||||
|  | 
 | ||||||
|                     filename = self.client.download_media(mp.media, f'tmp/{chat}_{group_id}/{mp.id}') |                     filename = self.client.download_media(mp.media, f'tmp/{chat}_{group_id}/{mp.id}') | ||||||
|                     key = filename.split('tmp/')[1] |                     key = filename.split('tmp/')[1] | ||||||
|                     self.storage.upload(filename, key) |                     self.storage.upload(filename, key) | ||||||
|  |  | ||||||
|  | @ -4,6 +4,8 @@ from bs4 import BeautifulSoup | ||||||
| from storages import Storage | from storages import Storage | ||||||
| from .base_archiver import Archiver, ArchiveResult | from .base_archiver import Archiver, ArchiveResult | ||||||
| 
 | 
 | ||||||
|  | from loguru import logger | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class WaybackArchiver(Archiver): | class WaybackArchiver(Archiver): | ||||||
|     name = "wayback" |     name = "wayback" | ||||||
|  |  | ||||||
|  | @ -13,6 +13,7 @@ class YoutubeDLArchiver(Archiver): | ||||||
| 
 | 
 | ||||||
|     def download(self, url, check_if_exists=False): |     def download(self, url, check_if_exists=False): | ||||||
|         netloc = self.get_netloc(url) |         netloc = self.get_netloc(url) | ||||||
|  |         # DM to set env variable: export FB_COOKIE="paste" | ||||||
|         if netloc in ['facebook.com', 'www.facebook.com'] and os.getenv('FB_COOKIE'): |         if netloc in ['facebook.com', 'www.facebook.com'] and os.getenv('FB_COOKIE'): | ||||||
|             logger.info('Using Facebook cookie') |             logger.info('Using Facebook cookie') | ||||||
|             yt_dlp.utils.std_headers['cookie'] = os.getenv('FB_COOKIE') |             yt_dlp.utils.std_headers['cookie'] = os.getenv('FB_COOKIE') | ||||||
|  |  | ||||||
|  | @ -102,6 +102,7 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES): | ||||||
| 
 | 
 | ||||||
|         # order matters, first to succeed excludes remaining |         # order matters, first to succeed excludes remaining | ||||||
|         active_archivers = [ |         active_archivers = [ | ||||||
|  |             # telethon is the API for telegram eg t.me url's | ||||||
|             archivers.TelethonArchiver(s3_client, driver, telegram_config), |             archivers.TelethonArchiver(s3_client, driver, telegram_config), | ||||||
|             archivers.TelegramArchiver(s3_client, driver), |             archivers.TelegramArchiver(s3_client, driver), | ||||||
|             archivers.TiktokArchiver(s3_client, driver), |             archivers.TiktokArchiver(s3_client, driver), | ||||||
|  |  | ||||||
|  | @ -0,0 +1,33 @@ | ||||||
|  | from selenium import webdriver | ||||||
|  | import time | ||||||
|  | from selenium.webdriver.common.by import By | ||||||
|  | 
 | ||||||
|  | options = webdriver.FirefoxOptions() | ||||||
|  | options.headless = True | ||||||
|  | driver = webdriver.Firefox(options=options) | ||||||
|  | driver.set_window_size(1400, 2000) | ||||||
|  | 
 | ||||||
|  | # Navigate to Facebook | ||||||
|  | driver.get("http://www.facebook.com") | ||||||
|  | 
 | ||||||
|  | # click the button: Allow Essential and Optioanl Cookies | ||||||
|  | foo = driver.find_element(By.XPATH,"//button[@data-cookiebanner='accept_only_essential_button']") | ||||||
|  | foo.click() | ||||||
|  | 
 | ||||||
|  | # Search & Enter the Email or Phone field & Enter Password | ||||||
|  | username = driver.find_element(By.ID,"email") | ||||||
|  | password = driver.find_element(By.ID,"pass") | ||||||
|  | submit = driver.find_element(By.NAME,"login") | ||||||
|  | 
 | ||||||
|  | username.send_keys("test@gmail.com") | ||||||
|  | password.send_keys("password") | ||||||
|  | 
 | ||||||
|  | # Click Login | ||||||
|  | submit.click() | ||||||
|  | 
 | ||||||
|  | # now am logged in, go to original page | ||||||
|  | driver.get("https://www.facebook.com/watch/?v=343188674422293") | ||||||
|  | time.sleep(6) | ||||||
|  | 
 | ||||||
|  | # save a screenshot | ||||||
|  | driver.save_screenshot("screenshot.png") | ||||||
|  | @ -10,7 +10,9 @@ | ||||||
| 
 | 
 | ||||||
| # git clone https://github.com/djhmateer/auto-archiver ;  sudo chmod +x ~/auto-archiver/infra/server-build.sh ; ./auto-archiver/infra/server-build.sh | # git clone https://github.com/djhmateer/auto-archiver ;  sudo chmod +x ~/auto-archiver/infra/server-build.sh ; ./auto-archiver/infra/server-build.sh | ||||||
| 
 | 
 | ||||||
| # Use Filezilla to copy secrets - .env and service-account.json | # Use Filezilla to copy secrets - `.env` and `service-account.json` and `anon.session` | ||||||
|  | 
 | ||||||
|  | # export FB_COOKIE="cookie: datr=asdf" | ||||||
| 
 | 
 | ||||||
| ## Python | ## Python | ||||||
| sudo apt update -y | sudo apt update -y | ||||||
|  |  | ||||||
		Ładowanie…
	
		Reference in New Issue
	
	 Dave Mateer
						Dave Mateer