kopia lustrzana https://github.com/bellingcat/auto-archiver
adds antibot tiktok logic for photos closes #295
rodzic
37c6d97275
commit
3a34a49822
|
@ -0,0 +1,50 @@
|
|||
from contextlib import suppress
|
||||
from typing import Mapping
|
||||
from auto_archiver.modules.antibot_extractor_enricher.dropin import Dropin
|
||||
|
||||
|
||||
class TikTokDropin(Dropin):
|
||||
"""
|
||||
A class to handle TikTok drop-in functionality for the antibot extractor enricher module.
|
||||
"""
|
||||
|
||||
def documentation() -> Mapping[str, str]:
|
||||
return {
|
||||
"name": "TikTok Dropin",
|
||||
"description": "Handles TikTok posts and works without authentication.",
|
||||
"site": "tiktok.com",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def suitable(url: str) -> bool:
|
||||
return "tiktok.com" in url
|
||||
|
||||
@staticmethod
|
||||
def images_selectors() -> str:
|
||||
return '[data-e2e="detail-photo"] img'
|
||||
|
||||
@staticmethod
|
||||
def video_selectors() -> str:
|
||||
return None # TikTok videos should be handled by the generic extractor
|
||||
|
||||
def open_page(self, url) -> bool:
|
||||
self.sb.wait_for_ready_state_complete()
|
||||
self._close_cookies_banner()
|
||||
# TODO: implement login logic
|
||||
if url != self.sb.get_current_url():
|
||||
return False
|
||||
return True
|
||||
|
||||
def hit_auth_wall(self) -> bool:
|
||||
return False # TikTok does not require authentication for public posts
|
||||
|
||||
def _close_cookies_banner(self):
|
||||
with suppress(Exception): # selenium.common.exceptions.JavascriptException
|
||||
self.sb.execute_script("""
|
||||
document
|
||||
.querySelector("tiktok-cookie-banner")
|
||||
.shadowRoot.querySelector("faceplate-dialog")
|
||||
.querySelector("button")
|
||||
.click()
|
||||
""")
|
||||
self.sb.click_if_visible("Skip")
|
|
@ -88,6 +88,13 @@ class TestAntibotExtractorEnricher(TestExtractorBase):
|
|||
5,
|
||||
0,
|
||||
),
|
||||
(
|
||||
"https://www.tiktok.com/@tracy_2424/photo/7418200173953830162",
|
||||
"TikTok",
|
||||
"Dito ko lang",
|
||||
1,
|
||||
0,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_download_pages_with_media(self, setup_module, make_item, url, in_title, in_text, image_count, video_count):
|
||||
|
|
Ładowanie…
Reference in New Issue