pull/32/head
Dave Mateer 2022-04-26 16:55:18 +01:00
rodzic 014e154fb4
commit 583342e348
8 zmienionych plików z 44 dodań i 18 usunięć

12
.vscode/launch.json vendored
Wyświetl plik

@ -1,8 +1,9 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Python: auto_archive --sheet",
"name": "Test Hashing",
"type": "python",
"request": "launch",
"program": "auto_archive.py",
@ -37,6 +38,15 @@
"justMyCode": true,
"args": ["--sheet","Kayleigh - test - DM", "--header=2"]
},
{
"name": "Kayleigh - test",
"type": "python",
"request": "launch",
"program": "auto_archive.py",
"console": "integratedTerminal",
"justMyCode": true,
"args": ["--sheet","Kayleigh - test", "--header=2"]
},
{
"name": "Python: Current File",
"type": "python",

Wyświetl plik

@ -139,10 +139,13 @@ class Archiver(ABC):
foo = self.driver.find_element(By.XPATH,"//button[@data-cookiebanner='accept_only_essential_button']")
foo.click()
logger.debug(f'fb click worked')
# linux server needs a sleep otherwise facebook cookie wont have worked and we'll get a popup on next page
time.sleep(2)
# DM some FB videos needs to be logged in
except:
logger.warning(f'Failed on fb accept cookies for url {url}')
self.driver.get(url)
time.sleep(6)

Wyświetl plik

@ -27,9 +27,11 @@ class WaybackArchiver(Archiver):
'https://web.archive.org/save/', headers=ia_headers, data={'url': url})
if r.status_code != 200:
logger.warning(f"Internet archive failed with status of {r.status_code}")
return ArchiveResult(status="Internet archive failed")
if 'job_id' not in r.json() and 'message' in r.json():
logger.warning(f"Internet archive failed json \n {r.json()}")
return ArchiveResult(status=f"Internet archive failed: {r.json()['message']}")
job_id = r.json()['job_id']

Wyświetl plik

@ -124,7 +124,7 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES):
driver = webdriver.Firefox(options=options)
driver.set_window_size(1400, 2000)
# DM put in for telegram screenshots which don't come back
driver.set_page_load_timeout(60)
driver.set_page_load_timeout(120)
# order matters, first to succeed excludes remaining
active_archivers = [

31
dm.py
Wyświetl plik

@ -7,27 +7,28 @@ options.headless = True
driver = webdriver.Firefox(options=options)
driver.set_window_size(1400, 2000)
# Navigate to Facebook
driver.get("http://www.facebook.com")
# fonts showing up as boxes with numbers
driver.get("https://www.kanbawzatainews.com/2021/09/mytel_25.html")
# driver.get("http://www.chinatoday.com.cn/")
# click the button: Allow Essential and Optioanl Cookies
foo = driver.find_element(By.XPATH,"//button[@data-cookiebanner='accept_only_essential_button']")
foo.click()
# foo = driver.find_element(By.XPATH,"//button[@data-cookiebanner='accept_only_essential_button']")
# foo.click()
# Search & Enter the Email or Phone field & Enter Password
username = driver.find_element(By.ID,"email")
password = driver.find_element(By.ID,"pass")
submit = driver.find_element(By.NAME,"login")
# # Search & Enter the Email or Phone field & Enter Password
# username = driver.find_element(By.ID,"email")
# password = driver.find_element(By.ID,"pass")
# submit = driver.find_element(By.NAME,"login")
username.send_keys("test@gmail.com")
password.send_keys("password")
# username.send_keys("test@gmail.com")
# password.send_keys("password")
# Click Login
submit.click()
# # Click Login
# submit.click()
# now am logged in, go to original page
driver.get("https://www.facebook.com/watch/?v=343188674422293")
time.sleep(6)
# # now am logged in, go to original page
# driver.get("https://www.facebook.com/watch/?v=343188674422293")
time.sleep(1)
# save a screenshot
driver.save_screenshot("screenshot.png")

4
go.sh 100644
Wyświetl plik

@ -0,0 +1,4 @@
#!/bin/bash
# for debugging on server
pipenv run python auto_archive.py --sheet "Test Hashing"

Wyświetl plik

@ -77,7 +77,7 @@ sudo chmod +x ~/auto-archiver/infra/cron.sh
# to stop errors
# https://askubuntu.com/questions/1383506/deprecation-warnings-python3-8-packages
#/usr/local/lib/python3.8/dist-packages/pkg_resources/__init__.py:123: PkgResourcesDeprecationWarning: 0.23ubuntu1 is an invalid version and will not be supported in a future release
sudo mv /usr/local/lib/python3.8/dist-packages/pkg_resources /usr/local/lib/python3.8/dist-packages/pkg_resources/pkg_resources_back
sudo mv /usr/local/lib/python3.8/dist-packages/pkg_resources /usr/local/lib/python3.8/dist-packages/pkg_resources_back
# don't want service to run until a reboot
@ -94,6 +94,12 @@ sudo mv run-auto-archive /etc/cron.d
sudo chown root /etc/cron.d/run-auto-archive
sudo chmod 600 /etc/cron.d/run-auto-archive
sudo chmod 600 ~/auto-archive/go.sh
# install fonts eg burmese, chinese for rendering in selenium firefox
# https://stackoverflow.com/questions/72015245/firefox-unicode-boxes-in-selenium-screenshot-instead-of-characters/72015719#72015719
sudo apt install fonts-noto -y
sudo reboot now
# MONITORING

BIN
screenshot.png 100644

Plik binarny nie jest wyświetlany.

Po

Szerokość:  |  Wysokość:  |  Rozmiar: 972 KiB