Fix Selenium driver issues with telegram links

pull/21/head
Logan Williams 2022-03-18 11:10:27 +01:00
rodzic 538bb05395
commit 398f296789
4 zmienionych plików z 15 dodań i 4 usunięć

Wyświetl plik

@ -8,6 +8,8 @@ from urllib.parse import urlparse
import hashlib
import time
import requests
from loguru import logger
from selenium.common.exceptions import TimeoutException
from storages import Storage
from utils import mkdir_if_not_exists
@ -54,6 +56,7 @@ class Archiver(ABC):
for url_info in urls_info:
page += f'''<li><a href="{url_info['cdn_url']}">{url_info['key']}</a>: {url_info['hash']}</li>'''
# TODO/ISSUE: character encoding is incorrect for Cyrillic, produces garbled text
page += f"</ul><h2>{self.name} object data:</h2><code>{object}</code>"
page += f"</body></html>"
@ -125,8 +128,11 @@ class Archiver(ABC):
"/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png")
filename = 'tmp/' + key
self.driver.get(url)
time.sleep(6)
try:
self.driver.get(url)
time.sleep(6)
except TimeoutException:
logger.info("TimeoutException loading page for screenshot")
self.driver.save_screenshot(filename)
self.storage.upload(filename, key, extra_args={

Wyświetl plik

@ -76,7 +76,7 @@ class TelethonArchiver(Archiver):
uploaded_media = []
message = post.message
for mp in media_posts:
if len(mp.message) > message: message = mp.message
if len(mp.message) > len(message): message = mp.message
filename = self.client.download_media(mp.media, f'tmp/{chat}_{group_id}/{mp.id}')
key = filename.split('tmp/')[1]
self.storage.upload(filename, key)

Wyświetl plik

@ -78,8 +78,12 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES):
options = webdriver.FirefoxOptions()
options.headless = True
driver = webdriver.Firefox(options=options)
profile = webdriver.FirefoxProfile()
profile.set_preference('network.protocol-handler.external.tg', False)
driver = webdriver.Firefox(profile, options=options)
driver.set_window_size(1400, 2000)
driver.set_page_load_timeout(10)
# loop through worksheets to check
for ii, wks in enumerate(sh.worksheets()):

Wyświetl plik

@ -20,6 +20,7 @@ class S3Storage(Storage):
self.bucket = config.bucket
self.region = config.region
self.folder = config.folder
self.private = config.private
if len(self.folder) and self.folder[-1] != '/':
self.folder += '/'