kopia lustrzana https://github.com/bellingcat/auto-archiver
auto
rodzic
014e154fb4
commit
583342e348
|
@ -1,8 +1,9 @@
|
|||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "Python: auto_archive --sheet",
|
||||
"name": "Test Hashing",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "auto_archive.py",
|
||||
|
@ -37,6 +38,15 @@
|
|||
"justMyCode": true,
|
||||
"args": ["--sheet","Kayleigh - test - DM", "--header=2"]
|
||||
},
|
||||
{
|
||||
"name": "Kayleigh - test",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "auto_archive.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true,
|
||||
"args": ["--sheet","Kayleigh - test", "--header=2"]
|
||||
},
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"type": "python",
|
||||
|
|
|
@ -139,10 +139,13 @@ class Archiver(ABC):
|
|||
foo = self.driver.find_element(By.XPATH,"//button[@data-cookiebanner='accept_only_essential_button']")
|
||||
foo.click()
|
||||
logger.debug(f'fb click worked')
|
||||
# linux server needs a sleep otherwise facebook cookie wont have worked and we'll get a popup on next page
|
||||
time.sleep(2)
|
||||
# DM some FB videos needs to be logged in
|
||||
except:
|
||||
logger.warning(f'Failed on fb accept cookies for url {url}')
|
||||
|
||||
|
||||
self.driver.get(url)
|
||||
|
||||
time.sleep(6)
|
||||
|
|
|
@ -27,9 +27,11 @@ class WaybackArchiver(Archiver):
|
|||
'https://web.archive.org/save/', headers=ia_headers, data={'url': url})
|
||||
|
||||
if r.status_code != 200:
|
||||
logger.warning(f"Internet archive failed with status of {r.status_code}")
|
||||
return ArchiveResult(status="Internet archive failed")
|
||||
|
||||
if 'job_id' not in r.json() and 'message' in r.json():
|
||||
logger.warning(f"Internet archive failed json \n {r.json()}")
|
||||
return ArchiveResult(status=f"Internet archive failed: {r.json()['message']}")
|
||||
|
||||
job_id = r.json()['job_id']
|
||||
|
|
|
@ -124,7 +124,7 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES):
|
|||
driver = webdriver.Firefox(options=options)
|
||||
driver.set_window_size(1400, 2000)
|
||||
# DM put in for telegram screenshots which don't come back
|
||||
driver.set_page_load_timeout(60)
|
||||
driver.set_page_load_timeout(120)
|
||||
|
||||
# order matters, first to succeed excludes remaining
|
||||
active_archivers = [
|
||||
|
|
31
dm.py
31
dm.py
|
@ -7,27 +7,28 @@ options.headless = True
|
|||
driver = webdriver.Firefox(options=options)
|
||||
driver.set_window_size(1400, 2000)
|
||||
|
||||
# Navigate to Facebook
|
||||
driver.get("http://www.facebook.com")
|
||||
# fonts showing up as boxes with numbers
|
||||
driver.get("https://www.kanbawzatainews.com/2021/09/mytel_25.html")
|
||||
# driver.get("http://www.chinatoday.com.cn/")
|
||||
|
||||
# click the button: Allow Essential and Optioanl Cookies
|
||||
foo = driver.find_element(By.XPATH,"//button[@data-cookiebanner='accept_only_essential_button']")
|
||||
foo.click()
|
||||
# foo = driver.find_element(By.XPATH,"//button[@data-cookiebanner='accept_only_essential_button']")
|
||||
# foo.click()
|
||||
|
||||
# Search & Enter the Email or Phone field & Enter Password
|
||||
username = driver.find_element(By.ID,"email")
|
||||
password = driver.find_element(By.ID,"pass")
|
||||
submit = driver.find_element(By.NAME,"login")
|
||||
# # Search & Enter the Email or Phone field & Enter Password
|
||||
# username = driver.find_element(By.ID,"email")
|
||||
# password = driver.find_element(By.ID,"pass")
|
||||
# submit = driver.find_element(By.NAME,"login")
|
||||
|
||||
username.send_keys("test@gmail.com")
|
||||
password.send_keys("password")
|
||||
# username.send_keys("test@gmail.com")
|
||||
# password.send_keys("password")
|
||||
|
||||
# Click Login
|
||||
submit.click()
|
||||
# # Click Login
|
||||
# submit.click()
|
||||
|
||||
# now am logged in, go to original page
|
||||
driver.get("https://www.facebook.com/watch/?v=343188674422293")
|
||||
time.sleep(6)
|
||||
# # now am logged in, go to original page
|
||||
# driver.get("https://www.facebook.com/watch/?v=343188674422293")
|
||||
time.sleep(1)
|
||||
|
||||
# save a screenshot
|
||||
driver.save_screenshot("screenshot.png")
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/bash
|
||||
|
||||
# for debugging on server
|
||||
pipenv run python auto_archive.py --sheet "Test Hashing"
|
|
@ -77,7 +77,7 @@ sudo chmod +x ~/auto-archiver/infra/cron.sh
|
|||
# to stop errors
|
||||
# https://askubuntu.com/questions/1383506/deprecation-warnings-python3-8-packages
|
||||
#/usr/local/lib/python3.8/dist-packages/pkg_resources/__init__.py:123: PkgResourcesDeprecationWarning: 0.23ubuntu1 is an invalid version and will not be supported in a future release
|
||||
sudo mv /usr/local/lib/python3.8/dist-packages/pkg_resources /usr/local/lib/python3.8/dist-packages/pkg_resources/pkg_resources_back
|
||||
sudo mv /usr/local/lib/python3.8/dist-packages/pkg_resources /usr/local/lib/python3.8/dist-packages/pkg_resources_back
|
||||
|
||||
|
||||
# don't want service to run until a reboot
|
||||
|
@ -94,6 +94,12 @@ sudo mv run-auto-archive /etc/cron.d
|
|||
sudo chown root /etc/cron.d/run-auto-archive
|
||||
sudo chmod 600 /etc/cron.d/run-auto-archive
|
||||
|
||||
sudo chmod 600 ~/auto-archive/go.sh
|
||||
|
||||
# install fonts eg burmese, chinese for rendering in selenium firefox
|
||||
# https://stackoverflow.com/questions/72015245/firefox-unicode-boxes-in-selenium-screenshot-instead-of-characters/72015719#72015719
|
||||
sudo apt install fonts-noto -y
|
||||
|
||||
sudo reboot now
|
||||
|
||||
# MONITORING
|
||||
|
|
Plik binarny nie jest wyświetlany.
Po Szerokość: | Wysokość: | Rozmiar: 972 KiB |
Ładowanie…
Reference in New Issue