kopia lustrzana https://github.com/bellingcat/auto-archiver
cleanup
rodzic
2a01038c0c
commit
ea261635a2
|
@ -1,4 +1,5 @@
|
|||
tmp/
|
||||
temp/
|
||||
.env*
|
||||
.DS_Store
|
||||
expmt/
|
||||
|
@ -10,4 +11,5 @@ anu.html
|
|||
.pytest_cach
|
||||
anon*
|
||||
config.json
|
||||
config-*.json
|
||||
config-*.json
|
||||
logs/*
|
|
@ -1,13 +1,10 @@
|
|||
import time, requests, os
|
||||
import time, requests
|
||||
from bs4 import BeautifulSoup
|
||||
# from dataclasses import dataclass
|
||||
|
||||
from storages import Storage
|
||||
from .base_archiver import Archiver, ArchiveResult
|
||||
|
||||
from configs import WaybackConfig
|
||||
|
||||
# TODO: use WaybackConfig
|
||||
class WaybackArchiver(Archiver):
|
||||
name = "wayback"
|
||||
|
||||
|
|
|
@ -133,7 +133,7 @@ def main():
|
|||
mkdir_if_not_exists(c.tmp_folder)
|
||||
process_sheet(c, c.sheet, header=c.header, columns=c.column_names)
|
||||
shutil.rmtree(c.tmp_folder)
|
||||
c.webdriver.quit()
|
||||
c.destroy_webdriver()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
51
test.py
51
test.py
|
@ -1,51 +0,0 @@
|
|||
import os
|
||||
import datetime
|
||||
import argparse
|
||||
import requests
|
||||
import shutil
|
||||
import gspread
|
||||
from loguru import logger
|
||||
from dotenv import load_dotenv
|
||||
from selenium import webdriver
|
||||
import traceback
|
||||
|
||||
import archivers
|
||||
from storages import S3Storage, S3Config
|
||||
from utils import GWorksheet, mkdir_if_not_exists
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
options = webdriver.FirefoxOptions()
|
||||
options.headless = True
|
||||
driver = webdriver.Firefox(options=options)
|
||||
driver.set_window_size(1400, 2000)
|
||||
|
||||
s3_config = S3Config(
|
||||
bucket=os.getenv('DO_BUCKET'),
|
||||
region=os.getenv('DO_SPACES_REGION'),
|
||||
key=os.getenv('DO_SPACES_KEY'),
|
||||
secret=os.getenv('DO_SPACES_SECRET'),
|
||||
folder="temp"
|
||||
)
|
||||
s3_client = S3Storage(s3_config)
|
||||
telegram_config = archivers.TelegramConfig(
|
||||
api_id=os.getenv('TELEGRAM_API_ID'),
|
||||
api_hash=os.getenv('TELEGRAM_API_HASH')
|
||||
)
|
||||
|
||||
archiver = archivers.TelethonArchiver(s3_client, driver, telegram_config)
|
||||
|
||||
URLs = [
|
||||
# "https://t.me/c/1226032830/24864",
|
||||
# "https://t.me/truexanewsua/32650",
|
||||
"https://t.me/informatsia_obstanovka/5239",
|
||||
# "https://t.me/informatsia_obstanovka/5240",
|
||||
# "https://t.me/informatsia_obstanovka/5241",
|
||||
# "https://t.me/informatsia_obstanovka/5242"
|
||||
]
|
||||
|
||||
|
||||
for url in URLs:
|
||||
print(url)
|
||||
print(archiver.download(url, False))
|
Ładowanie…
Reference in New Issue