kopia lustrzana https://github.com/bellingcat/auto-archiver
vk-archiver implemented
rodzic
3bffee41a0
commit
59afe7fd63
|
@ -15,4 +15,5 @@ config-*.json
|
|||
config.yaml
|
||||
config-*.yaml
|
||||
logs/*
|
||||
local_archive/
|
||||
local_archive/
|
||||
vk_config*.json
|
2
Pipfile
2
Pipfile
|
@ -22,6 +22,8 @@ google-auth-oauthlib = "*"
|
|||
oauth2client = "*"
|
||||
python-slugify = "*"
|
||||
pyyaml = "*"
|
||||
vk-api = "*"
|
||||
dateparser = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.9"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "602a05a8fa475181c24714ab57188a417fdfddf373a7dab4fa0ba0fcb7ce8d0a"
|
||||
"sha256": "d06498403429a8fffcd6d049b314872c0095abee7fb9c6ffd3ba3d7b0c31c8cd"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
|
@ -50,19 +50,19 @@
|
|||
},
|
||||
"boto3": {
|
||||
"hashes": [
|
||||
"sha256:28ab0947c49a6fb2409004d4a10b2828aec231cb95ca1d800cb1411e191cc201",
|
||||
"sha256:833e67edfb73f2cc22ff27a1c33728686dc90a9e81ba2551f9462ea2d1b04f41"
|
||||
"sha256:0821212ff521cb934801b1f655cef3c0e976775324b1018f1751700d0f42dbb4",
|
||||
"sha256:87d34861727699c795bf8d65703f2435e75f12879bdd483e08b35b7c5510e8c8"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.24.8"
|
||||
"version": "==1.24.9"
|
||||
},
|
||||
"botocore": {
|
||||
"hashes": [
|
||||
"sha256:ad92702930d6cb7b587fc2f619672feb74d5218f8de387a28c2905820db79027",
|
||||
"sha256:db6667b8dfd175d16187653942cd91dd1f0cf36adc0ea9d7a0805ba4d2a3321f"
|
||||
"sha256:5669b982b0583e73daef1fe0a4df311055e6287326f857dbb1dcc2de1d8412ad",
|
||||
"sha256:7a7588b0170e571317496ac4104803329d5bc792bc008e8a757ffd440f1b6fa6"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.27.8"
|
||||
"version": "==1.27.9"
|
||||
},
|
||||
"brotli": {
|
||||
"hashes": [
|
||||
|
@ -152,7 +152,7 @@
|
|||
"sha256:9c5705e395cd70084351dd8ad5c41e65655e08ce46f2ec9cf6c2c08390f71eb7",
|
||||
"sha256:f1d53542ee8cbedbe2118b5686372fb33c297fcd6379b050cca0ef13a597382a"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==2022.5.18.1"
|
||||
},
|
||||
"cffi": {
|
||||
|
@ -267,6 +267,14 @@
|
|||
],
|
||||
"version": "==37.0.2"
|
||||
},
|
||||
"dateparser": {
|
||||
"hashes": [
|
||||
"sha256:038196b1f12c7397e38aad3d61588833257f6f552baa63a1499e6987fa8d42d9",
|
||||
"sha256:9600874312ff28a41f96ec7ccdc73be1d1c44435719da47fea3339d55ff5a628"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.1.1"
|
||||
},
|
||||
"ffmpeg-python": {
|
||||
"hashes": [
|
||||
"sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127",
|
||||
|
@ -303,7 +311,7 @@
|
|||
"sha256:958024c6aa3460b08f35741231076a4dd9a4c819a6a39d44da9627febe8b28f0",
|
||||
"sha256:ce1daa49644b50398093d2a9ad886501aa845e2602af70c3001b9f402a9d7359"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==2.8.1"
|
||||
},
|
||||
"google-api-python-client": {
|
||||
|
@ -316,11 +324,11 @@
|
|||
},
|
||||
"google-auth": {
|
||||
"hashes": [
|
||||
"sha256:8a954960f852d5f19e6af14dd8e75c20159609e85d8db37e4013cc8c3824a7e1",
|
||||
"sha256:df549a1433108801b11bdcc0e312eaf0d5f0500db42f0523e4d65c78722e8475"
|
||||
"sha256:819b70140d05501739e1387291d39f0de3b4dff3b00ae4aff8e7a05369957f89",
|
||||
"sha256:9b1da39ab8731c3061f36fefde9f8bb902dbee9eb28e3a67e8cfa7dc1be76227"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
|
||||
"version": "==2.7.0"
|
||||
"version": "==2.8.0"
|
||||
},
|
||||
"google-auth-httplib2": {
|
||||
"hashes": [
|
||||
|
@ -343,7 +351,7 @@
|
|||
"sha256:023eaea9d8c1cceccd9587c6af6c20f33eeeb05d4148670f2b0322dc1511700c",
|
||||
"sha256:b09b56f5463070c2153753ef123f07d2e49235e89148e9b2459ec8ed2f68d7d3"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==1.56.2"
|
||||
},
|
||||
"gspread": {
|
||||
|
@ -359,7 +367,7 @@
|
|||
"sha256:70813c1135087a248a4d38cc0e1a0181ffab2188141a93eaf567940c3957ff06",
|
||||
"sha256:8ddd78563b633ca55346c8cd41ec0af27d3c79931828beffb46ce70a379e7442"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==0.13.0"
|
||||
},
|
||||
"httplib2": {
|
||||
|
@ -554,7 +562,7 @@
|
|||
"sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2",
|
||||
"sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==3.2.0"
|
||||
},
|
||||
"outcome": {
|
||||
|
@ -682,7 +690,7 @@
|
|||
"sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb",
|
||||
"sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==2.12.0"
|
||||
},
|
||||
"pyopenssl": {
|
||||
|
@ -724,6 +732,21 @@
|
|||
"index": "pypi",
|
||||
"version": "==6.1.2"
|
||||
},
|
||||
"pytz": {
|
||||
"hashes": [
|
||||
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
|
||||
"sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"
|
||||
],
|
||||
"version": "==2022.1"
|
||||
},
|
||||
"pytz-deprecation-shim": {
|
||||
"hashes": [
|
||||
"sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6",
|
||||
"sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
|
||||
"version": "==0.1.0.post0"
|
||||
},
|
||||
"pyyaml": {
|
||||
"hashes": [
|
||||
"sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
|
||||
|
@ -763,7 +786,88 @@
|
|||
"index": "pypi",
|
||||
"version": "==6.0"
|
||||
},
|
||||
"regex": {
|
||||
"hashes": [
|
||||
"sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14",
|
||||
"sha256:03299b0bcaa7824eb7c0ebd7ef1e3663302d1b533653bfe9dc7e595d453e2ae9",
|
||||
"sha256:06b1df01cf2aef3a9790858af524ae2588762c8a90e784ba00d003f045306204",
|
||||
"sha256:09b4b6ccc61d4119342b26246ddd5a04accdeebe36bdfe865ad87a0784efd77f",
|
||||
"sha256:0be0c34a39e5d04a62fd5342f0886d0e57592a4f4993b3f9d257c1f688b19737",
|
||||
"sha256:0d96eec8550fd2fd26f8e675f6d8b61b159482ad8ffa26991b894ed5ee19038b",
|
||||
"sha256:0eb0e2845e81bdea92b8281a3969632686502565abf4a0b9e4ab1471c863d8f3",
|
||||
"sha256:13bbf0c9453c6d16e5867bda7f6c0c7cff1decf96c5498318bb87f8136d2abd4",
|
||||
"sha256:17e51ad1e6131c496b58d317bc9abec71f44eb1957d32629d06013a21bc99cac",
|
||||
"sha256:1977bb64264815d3ef016625adc9df90e6d0e27e76260280c63eca993e3f455f",
|
||||
"sha256:1e30762ddddb22f7f14c4f59c34d3addabc789216d813b0f3e2788d7bcf0cf29",
|
||||
"sha256:1e73652057473ad3e6934944af090852a02590c349357b79182c1b681da2c772",
|
||||
"sha256:20e6a27959f162f979165e496add0d7d56d7038237092d1aba20b46de79158f1",
|
||||
"sha256:286ff9ec2709d56ae7517040be0d6c502642517ce9937ab6d89b1e7d0904f863",
|
||||
"sha256:297c42ede2c81f0cb6f34ea60b5cf6dc965d97fa6936c11fc3286019231f0d66",
|
||||
"sha256:320c2f4106962ecea0f33d8d31b985d3c185757c49c1fb735501515f963715ed",
|
||||
"sha256:35ed2f3c918a00b109157428abfc4e8d1ffabc37c8f9abc5939ebd1e95dabc47",
|
||||
"sha256:3d146e5591cb67c5e836229a04723a30af795ef9b70a0bbd913572e14b7b940f",
|
||||
"sha256:42bb37e2b2d25d958c25903f6125a41aaaa1ed49ca62c103331f24b8a459142f",
|
||||
"sha256:42d6007722d46bd2c95cce700181570b56edc0dcbadbfe7855ec26c3f2d7e008",
|
||||
"sha256:43eba5c46208deedec833663201752e865feddc840433285fbadee07b84b464d",
|
||||
"sha256:452519bc4c973e961b1620c815ea6dd8944a12d68e71002be5a7aff0a8361571",
|
||||
"sha256:4b9c16a807b17b17c4fa3a1d8c242467237be67ba92ad24ff51425329e7ae3d0",
|
||||
"sha256:5510932596a0f33399b7fff1bd61c59c977f2b8ee987b36539ba97eb3513584a",
|
||||
"sha256:55820bc631684172b9b56a991d217ec7c2e580d956591dc2144985113980f5a3",
|
||||
"sha256:57484d39447f94967e83e56db1b1108c68918c44ab519b8ecfc34b790ca52bf7",
|
||||
"sha256:58ba41e462653eaf68fc4a84ec4d350b26a98d030be1ab24aba1adcc78ffe447",
|
||||
"sha256:5bc5f921be39ccb65fdda741e04b2555917a4bced24b4df14eddc7569be3b493",
|
||||
"sha256:5dcc4168536c8f68654f014a3db49b6b4a26b226f735708be2054314ed4964f4",
|
||||
"sha256:5f92a7cdc6a0ae2abd184e8dfd6ef2279989d24c85d2c85d0423206284103ede",
|
||||
"sha256:67250b36edfa714ba62dc62d3f238e86db1065fccb538278804790f578253640",
|
||||
"sha256:6df070a986fc064d865c381aecf0aaff914178fdf6874da2f2387e82d93cc5bd",
|
||||
"sha256:729aa8ca624c42f309397c5fc9e21db90bf7e2fdd872461aabdbada33de9063c",
|
||||
"sha256:72bc3a5effa5974be6d965ed8301ac1e869bc18425c8a8fac179fbe7876e3aee",
|
||||
"sha256:74d86e8924835f863c34e646392ef39039405f6ce52956d8af16497af4064a30",
|
||||
"sha256:79e5af1ff258bc0fe0bdd6f69bc4ae33935a898e3cbefbbccf22e88a27fa053b",
|
||||
"sha256:7b103dffb9f6a47ed7ffdf352b78cfe058b1777617371226c1894e1be443afec",
|
||||
"sha256:83f03f0bd88c12e63ca2d024adeee75234d69808b341e88343b0232329e1f1a1",
|
||||
"sha256:86d7a68fa53688e1f612c3246044157117403c7ce19ebab7d02daf45bd63913e",
|
||||
"sha256:878c626cbca3b649e14e972c14539a01191d79e58934e3f3ef4a9e17f90277f8",
|
||||
"sha256:878f5d649ba1db9f52cc4ef491f7dba2d061cdc48dd444c54260eebc0b1729b9",
|
||||
"sha256:87bc01226cd288f0bd9a4f9f07bf6827134dc97a96c22e2d28628e824c8de231",
|
||||
"sha256:8babb2b5751105dc0aef2a2e539f4ba391e738c62038d8cb331c710f6b0f3da7",
|
||||
"sha256:91e0f7e7be77250b808a5f46d90bf0032527d3c032b2131b63dee54753a4d729",
|
||||
"sha256:9557545c10d52c845f270b665b52a6a972884725aa5cf12777374e18f2ea8960",
|
||||
"sha256:9ccb0a4ab926016867260c24c192d9df9586e834f5db83dfa2c8fffb3a6e5056",
|
||||
"sha256:9d828c5987d543d052b53c579a01a52d96b86f937b1777bbfe11ef2728929357",
|
||||
"sha256:9efa41d1527b366c88f265a227b20bcec65bda879962e3fc8a2aee11e81266d7",
|
||||
"sha256:aaf5317c961d93c1a200b9370fb1c6b6836cc7144fef3e5a951326912bf1f5a3",
|
||||
"sha256:ab69b4fe09e296261377d209068d52402fb85ef89dc78a9ac4a29a895f4e24a7",
|
||||
"sha256:ad397bc7d51d69cb07ef89e44243f971a04ce1dca9bf24c992c362406c0c6573",
|
||||
"sha256:ae17fc8103f3b63345709d3e9654a274eee1c6072592aec32b026efd401931d0",
|
||||
"sha256:af4d8cc28e4c7a2f6a9fed544228c567340f8258b6d7ea815b62a72817bbd178",
|
||||
"sha256:b22ff939a8856a44f4822da38ef4868bd3a9ade22bb6d9062b36957c850e404f",
|
||||
"sha256:b549d851f91a4efb3e65498bd4249b1447ab6035a9972f7fc215eb1f59328834",
|
||||
"sha256:be319f4eb400ee567b722e9ea63d5b2bb31464e3cf1b016502e3ee2de4f86f5c",
|
||||
"sha256:c0446b2871335d5a5e9fcf1462f954586b09a845832263db95059dcd01442015",
|
||||
"sha256:c68d2c04f7701a418ec2e5631b7f3552efc32f6bcc1739369c6eeb1af55f62e0",
|
||||
"sha256:c87ac58b9baaf50b6c1b81a18d20eda7e2883aa9a4fb4f1ca70f2e443bfcdc57",
|
||||
"sha256:caa2734ada16a44ae57b229d45091f06e30a9a52ace76d7574546ab23008c635",
|
||||
"sha256:cb34c2d66355fb70ae47b5595aafd7218e59bb9c00ad8cc3abd1406ca5874f07",
|
||||
"sha256:cb3652bbe6720786b9137862205986f3ae54a09dec8499a995ed58292bdf77c2",
|
||||
"sha256:cf668f26604e9f7aee9f8eaae4ca07a948168af90b96be97a4b7fa902a6d2ac1",
|
||||
"sha256:d326ff80ed531bf2507cba93011c30fff2dd51454c85f55df0f59f2030b1687b",
|
||||
"sha256:d6c2441538e4fadd4291c8420853431a229fcbefc1bf521810fbc2629d8ae8c2",
|
||||
"sha256:d6ecfd1970b3380a569d7b3ecc5dd70dba295897418ed9e31ec3c16a5ab099a5",
|
||||
"sha256:e5602a9b5074dcacc113bba4d2f011d2748f50e3201c8139ac5b68cf2a76bd8b",
|
||||
"sha256:ef806f684f17dbd6263d72a54ad4073af42b42effa3eb42b877e750c24c76f86",
|
||||
"sha256:f3356afbb301ec34a500b8ba8b47cba0b44ed4641c306e1dd981a08b416170b5",
|
||||
"sha256:f6f7ee2289176cb1d2c59a24f50900f8b9580259fa9f1a739432242e7d254f93",
|
||||
"sha256:f7e8f1ee28e0a05831c92dc1c0c1c94af5289963b7cf09eca5b5e3ce4f8c91b0",
|
||||
"sha256:f8169ec628880bdbca67082a9196e2106060a4a5cbd486ac51881a4df805a36f",
|
||||
"sha256:fbc88d3ba402b5d041d204ec2449c4078898f89c4a6e6f0ed1c1a510ef1e221d",
|
||||
"sha256:fbd3fe37353c62fd0eb19fb76f78aa693716262bcd5f9c14bb9e5aca4b3f0dc4"
|
||||
],
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==2022.3.2"
|
||||
},
|
||||
"requests": {
|
||||
"extras": [],
|
||||
"hashes": [
|
||||
"sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f",
|
||||
"sha256:d568723a7ebd25875d8d1eaf5dfa068cd2fc8194b2e483d7b1f7c81918dbec6b"
|
||||
|
@ -799,7 +903,7 @@
|
|||
"sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17",
|
||||
"sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb"
|
||||
],
|
||||
"markers": "python_version >= '3.6' and python_version < '4'",
|
||||
"markers": "python_version < '4' and python_full_version >= '3.6.0'",
|
||||
"version": "==4.8"
|
||||
},
|
||||
"s3transfer": {
|
||||
|
@ -853,7 +957,7 @@
|
|||
"sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759",
|
||||
"sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==2.3.2.post1"
|
||||
},
|
||||
"telethon": {
|
||||
|
@ -902,12 +1006,28 @@
|
|||
"markers": "python_version >= '3.5'",
|
||||
"version": "==0.9.2"
|
||||
},
|
||||
"tzdata": {
|
||||
"hashes": [
|
||||
"sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9",
|
||||
"sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3"
|
||||
],
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==2022.1"
|
||||
},
|
||||
"tzlocal": {
|
||||
"hashes": [
|
||||
"sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745",
|
||||
"sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7"
|
||||
],
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==4.2"
|
||||
},
|
||||
"uritemplate": {
|
||||
"hashes": [
|
||||
"sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0",
|
||||
"sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==4.1.1"
|
||||
},
|
||||
"urllib3": {
|
||||
|
@ -922,6 +1042,14 @@
|
|||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
|
||||
"version": "==1.26.9"
|
||||
},
|
||||
"vk-api": {
|
||||
"hashes": [
|
||||
"sha256:11c731e214ebc7fa911db81efb021f97587493a5402b992f24748fe1cd9d7afc",
|
||||
"sha256:d0ae766fa93a40d47c5da045d94201721bf766dbde122a1d2253516b35c5edf3"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==11.9.8"
|
||||
},
|
||||
"websockets": {
|
||||
"hashes": [
|
||||
"sha256:07cdc0a5b2549bcfbadb585ad8471ebdc7bdf91e32e34ae3889001c1c106a6af",
|
||||
|
|
|
@ -5,4 +5,5 @@ from .telethon_archiver import TelethonArchiver
|
|||
from .tiktok_archiver import TiktokArchiver
|
||||
from .wayback_archiver import WaybackArchiver
|
||||
from .youtubedl_archiver import YoutubeDLArchiver
|
||||
from .twitter_archiver import TwitterArchiver
|
||||
from .twitter_archiver import TwitterArchiver
|
||||
from .vk_archiver import VkArchiver
|
|
@ -0,0 +1,72 @@
|
|||
import re, json
|
||||
|
||||
import vk_api, dateparser
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
|
||||
from storages import Storage
|
||||
from .base_archiver import Archiver, ArchiveResult
|
||||
from configs import VkConfig
|
||||
|
||||
|
||||
class VkArchiver(Archiver):
|
||||
""""
|
||||
VK videos are handled by YTDownloader, this archiver gets posts text and images.
|
||||
Currently only works for /wall posts
|
||||
"""
|
||||
name = "vk"
|
||||
wall_pattern = re.compile(r"(wall.{0,1}\d+_\d+)")
|
||||
onclick_pattern = re.compile(r"({.*})")
|
||||
|
||||
def __init__(self, storage: Storage, driver, config: VkConfig):
|
||||
super().__init__(storage, driver)
|
||||
if config != None:
|
||||
self.vk_session = vk_api.VkApi(config.username, config.password)
|
||||
self.vk_session.auth(token_only=True)
|
||||
|
||||
def download(self, url, check_if_exists=False):
|
||||
# detect URLs that this archiver can handle
|
||||
has_wall = self.wall_pattern.search(url)
|
||||
if has_wall:
|
||||
wall_url = f'https://vk.com/{has_wall[0]}'
|
||||
logger.info(f"found valid wall id from {url=} : {wall_url=}")
|
||||
return self.archive_wall(wall_url, check_if_exists)
|
||||
return False
|
||||
|
||||
def archive_wall(self, wall_url, check_if_exists):
|
||||
res = self.vk_session.http.get(wall_url).text
|
||||
soup = BeautifulSoup(res, "html.parser")
|
||||
image_urls = []
|
||||
time = None
|
||||
try:
|
||||
rel_date = soup.find("a", class_="post_link").find("span", class_="rel_date")
|
||||
t = rel_date.get_text()
|
||||
if "time" in rel_date.attrs:
|
||||
t = rel_date["time"]
|
||||
elif "abs_time" in rel_date.attrs:
|
||||
t = rel_date["abs_time"]
|
||||
time = dateparser.parse(t, settings={"RETURN_AS_TIMEZONE_AWARE": True, "TO_TIMEZONE": "UTC"})
|
||||
except Exception as e:
|
||||
logger.warning(f"could not fetch time from post: {e}")
|
||||
|
||||
post = soup.find("div", class_="wall_text")
|
||||
post_text = post.find(class_="wall_post_text").get_text()
|
||||
for anchor in post.find_all("a", attrs={"aria-label": "photo"}):
|
||||
if img_url := self.get_image_from_anchor(anchor):
|
||||
image_urls.append(img_url)
|
||||
|
||||
page_cdn, page_hash, thumbnail = self.generate_media_page(image_urls, wall_url, post_text, requester=self.vk_session.http)
|
||||
screenshot = self.get_screenshot(wall_url)
|
||||
return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, timestamp=time)
|
||||
|
||||
def get_image_from_anchor(self, anchor):
|
||||
try:
|
||||
# get anchor.onlick text, retrieve the JSON value there
|
||||
# retrieve "temp"."z" which contains the image with more quality
|
||||
temp_json = json.loads(self.onclick_pattern.search(anchor["onclick"])[0])["temp"]
|
||||
for quality in ["z", "y", "x"]: # decreasing quality
|
||||
if quality in temp_json:
|
||||
return temp_json[quality]
|
||||
except Exception as e:
|
||||
logger.warning(f"failed to get image from vk wall anchor: {e}")
|
||||
return False
|
|
@ -3,7 +3,7 @@ import os, datetime, shutil, traceback, random
|
|||
from loguru import logger
|
||||
from slugify import slugify
|
||||
|
||||
from archivers import TelethonArchiver, TelegramArchiver, TiktokArchiver, YoutubeDLArchiver, TwitterArchiver, WaybackArchiver, ArchiveResult, Archiver
|
||||
from archivers import TelethonArchiver, TelegramArchiver, TiktokArchiver, YoutubeDLArchiver, TwitterArchiver, VkArchiver, WaybackArchiver, ArchiveResult, Archiver
|
||||
from utils import GWorksheet, mkdir_if_not_exists, expand_url
|
||||
from configs import Config
|
||||
from storages import Storage
|
||||
|
@ -95,6 +95,7 @@ def process_sheet(c: Config):
|
|||
YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie),
|
||||
TelegramArchiver(storage, c.webdriver),
|
||||
TwitterArchiver(storage, c.webdriver),
|
||||
VkArchiver(storage, c.webdriver, c.vk_config),
|
||||
WaybackArchiver(storage, c.webdriver, c.wayback_config)
|
||||
]
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from .config import Config
|
||||
from .selenium_config import SeleniumConfig
|
||||
from .telethon_config import TelethonConfig
|
||||
from .wayback_config import WaybackConfig
|
||||
from .wayback_config import WaybackConfig
|
||||
from .vk_config import VkConfig
|
|
@ -9,6 +9,7 @@ from utils import GWorksheet, getattr_or
|
|||
from .wayback_config import WaybackConfig
|
||||
from .telethon_config import TelethonConfig
|
||||
from .selenium_config import SeleniumConfig
|
||||
from .vk_config import VkConfig
|
||||
from storages import Storage, S3Config, S3Storage, GDStorage, GDConfig, LocalStorage, LocalConfig
|
||||
|
||||
|
||||
|
@ -120,6 +121,7 @@ class Config:
|
|||
secret=secrets["wayback"]["secret"],
|
||||
)
|
||||
else:
|
||||
self.wayback_config = None
|
||||
logger.debug(f"'wayback' key not present in the {self.config_file=}")
|
||||
|
||||
# telethon config
|
||||
|
@ -130,8 +132,19 @@ class Config:
|
|||
bot_token=secrets["telegram"].get("bot_token", None)
|
||||
)
|
||||
else:
|
||||
self.telegram_config = None
|
||||
logger.debug(f"'telegram' key not present in the {self.config_file=}")
|
||||
|
||||
# vk config
|
||||
if "vk" in secrets:
|
||||
self.vk_config = VkConfig(
|
||||
username=secrets["vk"]["username"],
|
||||
password=secrets["vk"]["password"]
|
||||
)
|
||||
else:
|
||||
self.vk_config = None
|
||||
logger.debug(f"'vk' key not present in the {self.config_file=}")
|
||||
|
||||
del self.config["secrets"] # delete to prevent leaks
|
||||
|
||||
def set_log_files(self):
|
||||
|
@ -225,6 +238,7 @@ class Config:
|
|||
"local_config": hasattr(self, "local_config"),
|
||||
"wayback_config": self.wayback_config != None,
|
||||
"telegram_config": self.telegram_config != None,
|
||||
"vk_config": self.vk_config != None,
|
||||
"gsheets_client": self.gsheets_client != None,
|
||||
"column_names": self.column_names,
|
||||
}, ensure_ascii=False, indent=4)
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class VkConfig:
|
||||
username: str
|
||||
password: str
|
Ładowanie…
Reference in New Issue