kopia lustrzana https://github.com/bellingcat/auto-archiver
Fix generator programmatic setup (#197)
* Fix returning a generator of a generator * Move download test test to pytest.mark.downloadpull/210/head
rodzic
9297697ef5
commit
6d43bc7d4d
|
@ -317,9 +317,11 @@ class ArchivingOrchestrator:
|
|||
exit()
|
||||
|
||||
return read_yaml(config_file)
|
||||
|
||||
def run(self, args: list) -> Generator[Metadata]:
|
||||
|
||||
|
||||
def setup(self, args: list):
|
||||
"""
|
||||
Main entry point for the orchestrator, sets up the basic parser, loads the config file, and sets up the complete parser
|
||||
"""
|
||||
self.setup_basic_parser()
|
||||
|
||||
# parse the known arguments for now (basically, we want the config file)
|
||||
|
@ -342,8 +344,10 @@ class ArchivingOrchestrator:
|
|||
for module_type in BaseModule.MODULE_TYPES:
|
||||
logger.info(f"{module_type.upper()}S: " + ", ".join(m.display_name for m in getattr(self, f"{module_type}s")))
|
||||
|
||||
for result in self.feed():
|
||||
yield result
|
||||
def run(self, args: list) -> Generator[Metadata]:
|
||||
|
||||
self.setup(args)
|
||||
return self.feed()
|
||||
|
||||
def cleanup(self) -> None:
|
||||
logger.info("Cleaning up")
|
||||
|
@ -351,7 +355,7 @@ class ArchivingOrchestrator:
|
|||
e.cleanup()
|
||||
|
||||
def feed(self) -> Generator[Metadata]:
|
||||
|
||||
|
||||
url_count = 0
|
||||
for feeder in self.feeders:
|
||||
for item in feeder:
|
||||
|
|
|
@ -23,7 +23,6 @@ class TestTwitterApiExtractor(TestExtractorBase):
|
|||
}
|
||||
|
||||
@pytest.mark.parametrize("url, expected", [
|
||||
("https://t.co/yl3oOJatFp", "https://www.bellingcat.com/category/resources/"), # t.co URL
|
||||
("https://x.com/bellingcat/status/1874097816571961839", "https://x.com/bellingcat/status/1874097816571961839"), # x.com urls unchanged
|
||||
("https://twitter.com/bellingcat/status/1874097816571961839", "https://twitter.com/bellingcat/status/1874097816571961839"), # twitter urls unchanged
|
||||
("https://twitter.com/bellingcat/status/1874097816571961839?s=20&t=3d0g4ZQis7dCbSDg-mE7-w", "https://twitter.com/bellingcat/status/1874097816571961839?s=20&t=3d0g4ZQis7dCbSDg-mE7-w"), # don't strip params from twitter urls (changed Jan 2025)
|
||||
|
@ -32,7 +31,11 @@ class TestTwitterApiExtractor(TestExtractorBase):
|
|||
])
|
||||
def test_sanitize_url(self, url, expected):
|
||||
assert expected == self.extractor.sanitize_url(url)
|
||||
|
||||
|
||||
@pytest.mark.download
|
||||
def test_sanitize_url_download(self):
|
||||
assert "https://t.co/yl3oOJatFp" == self.extractor.sanitize_url("https://www.bellingcat.com/category/resources/")
|
||||
|
||||
@pytest.mark.parametrize("url, exptected_username, exptected_tweetid", [
|
||||
("https://twitter.com/bellingcat/status/1874097816571961839", "bellingcat", "1874097816571961839"),
|
||||
("https://x.com/bellingcat/status/1874097816571961839", "bellingcat", "1874097816571961839"),
|
||||
|
|
Ładowanie…
Reference in New Issue