kopia lustrzana https://github.com/bellingcat/auto-archiver
Fix running 'cleanup' method on extractors that fail to start
rodzic
580de88366
commit
17d2d14680
|
@ -277,6 +277,9 @@ class LazyBaseModule:
|
||||||
# finally, get the class instance
|
# finally, get the class instance
|
||||||
instance: BaseModule = getattr(sys.modules[sub_qualname], class_name)()
|
instance: BaseModule = getattr(sys.modules[sub_qualname], class_name)()
|
||||||
|
|
||||||
|
# save the instance for future easy loading
|
||||||
|
self._instance = instance
|
||||||
|
|
||||||
# set the name, display name and module factory
|
# set the name, display name and module factory
|
||||||
instance.name = self.name
|
instance.name = self.name
|
||||||
instance.display_name = self.display_name
|
instance.display_name = self.display_name
|
||||||
|
@ -289,8 +292,6 @@ class LazyBaseModule:
|
||||||
instance.config_setup(config)
|
instance.config_setup(config)
|
||||||
instance.setup()
|
instance.setup()
|
||||||
|
|
||||||
# save the instance for future easy loading
|
|
||||||
self._instance = instance
|
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
|
|
@ -387,8 +387,10 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||||
except (KeyboardInterrupt, Exception) as e:
|
except (KeyboardInterrupt, Exception) as e:
|
||||||
if not isinstance(e, KeyboardInterrupt) and not isinstance(e, SetupError):
|
if not isinstance(e, KeyboardInterrupt) and not isinstance(e, SetupError):
|
||||||
logger.error(f"Error during setup of modules: {e}\n{traceback.format_exc()}")
|
logger.error(f"Error during setup of modules: {e}\n{traceback.format_exc()}")
|
||||||
if loaded_module and module_type == "extractor":
|
|
||||||
loaded_module.cleanup()
|
# access the _instance here because loaded_module may not return if there's an error
|
||||||
|
if lazy_module._instance and module_type == "extractor":
|
||||||
|
lazy_module._instance.cleanup()
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
if not loaded_module:
|
if not loaded_module:
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
from auto_archiver.core import Extractor
|
from auto_archiver.core import Extractor
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
class ExampleExtractor(Extractor):
|
class ExampleExtractor(Extractor):
|
||||||
def download(self, item):
|
def download(self, item):
|
||||||
print("download")
|
logger.info("download")
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
logger.info("cleanup")
|
||||||
|
|
|
@ -1,27 +1,29 @@
|
||||||
from auto_archiver.core import Extractor, Enricher, Feeder, Database, Storage, Formatter, Metadata
|
from auto_archiver.core import Extractor, Enricher, Feeder, Database, Storage, Formatter, Metadata
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
class ExampleModule(Extractor, Enricher, Feeder, Database, Storage, Formatter):
|
class ExampleModule(Extractor, Enricher, Feeder, Database, Storage, Formatter):
|
||||||
def download(self, item):
|
def download(self, item):
|
||||||
print("download")
|
logger.info("download")
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
yield Metadata().set_url("https://example.com")
|
yield Metadata().set_url("https://example.com")
|
||||||
|
|
||||||
def done(self, result):
|
def done(self, result):
|
||||||
print("done")
|
logger.info("done")
|
||||||
|
|
||||||
def enrich(self, to_enrich):
|
def enrich(self, to_enrich):
|
||||||
print("enrich")
|
logger.info("enrich")
|
||||||
|
|
||||||
def get_cdn_url(self, media):
|
def get_cdn_url(self, media):
|
||||||
return "nice_url"
|
return "nice_url"
|
||||||
|
|
||||||
def save(self, item):
|
def save(self, item):
|
||||||
print("save")
|
logger.info("save")
|
||||||
|
|
||||||
def uploadf(self, file, key, **kwargs):
|
def uploadf(self, file, key, **kwargs):
|
||||||
print("uploadf")
|
logger.info("uploadf")
|
||||||
|
|
||||||
def format(self, item):
|
def format(self, item):
|
||||||
print("format")
|
logger.info("format")
|
||||||
|
|
|
@ -237,3 +237,23 @@ def test_wrong_step_type(test_args, caplog):
|
||||||
with pytest.raises(SetupError) as err:
|
with pytest.raises(SetupError) as err:
|
||||||
orchestrator.setup(args)
|
orchestrator.setup(args)
|
||||||
assert "Module 'example_extractor' is not a feeder" in str(err.value)
|
assert "Module 'example_extractor' is not a feeder" in str(err.value)
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_failed_extractor_cleanup(test_args, mocker, caplog):
|
||||||
|
orchestrator = ArchivingOrchestrator()
|
||||||
|
|
||||||
|
# hack to set up the paths so we can patch properly
|
||||||
|
orchestrator.module_factory.setup_paths([TEST_MODULES])
|
||||||
|
|
||||||
|
# patch example_module.setup to throw an exception
|
||||||
|
mocker.patch(
|
||||||
|
"auto_archiver.modules.example_extractor.example_extractor.ExampleExtractor.setup",
|
||||||
|
side_effect=Exception("Test exception"),
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
orchestrator.setup(test_args + ["--extractors", "example_extractor"])
|
||||||
|
|
||||||
|
assert "Error during setup of modules: Test exception" in caplog.text
|
||||||
|
# make sure the 'cleanup' is called
|
||||||
|
assert "cleanup" in caplog.text
|
||||||
|
|
Ładowanie…
Reference in New Issue