kopia lustrzana https://github.com/bellingcat/auto-archiver
Fix running 'cleanup' method on extractors that fail to start
rodzic
580de88366
commit
17d2d14680
|
@ -277,6 +277,9 @@ class LazyBaseModule:
|
|||
# finally, get the class instance
|
||||
instance: BaseModule = getattr(sys.modules[sub_qualname], class_name)()
|
||||
|
||||
# save the instance for future easy loading
|
||||
self._instance = instance
|
||||
|
||||
# set the name, display name and module factory
|
||||
instance.name = self.name
|
||||
instance.display_name = self.display_name
|
||||
|
@ -289,8 +292,6 @@ class LazyBaseModule:
|
|||
instance.config_setup(config)
|
||||
instance.setup()
|
||||
|
||||
# save the instance for future easy loading
|
||||
self._instance = instance
|
||||
return instance
|
||||
|
||||
def __repr__(self):
|
||||
|
|
|
@ -387,8 +387,10 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
|||
except (KeyboardInterrupt, Exception) as e:
|
||||
if not isinstance(e, KeyboardInterrupt) and not isinstance(e, SetupError):
|
||||
logger.error(f"Error during setup of modules: {e}\n{traceback.format_exc()}")
|
||||
if loaded_module and module_type == "extractor":
|
||||
loaded_module.cleanup()
|
||||
|
||||
# access the _instance here because loaded_module may not return if there's an error
|
||||
if lazy_module._instance and module_type == "extractor":
|
||||
lazy_module._instance.cleanup()
|
||||
raise e
|
||||
|
||||
if not loaded_module:
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
from auto_archiver.core import Extractor
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class ExampleExtractor(Extractor):
|
||||
def download(self, item):
|
||||
print("download")
|
||||
logger.info("download")
|
||||
|
||||
def cleanup(self):
|
||||
logger.info("cleanup")
|
||||
|
|
|
@ -1,27 +1,29 @@
|
|||
from auto_archiver.core import Extractor, Enricher, Feeder, Database, Storage, Formatter, Metadata
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class ExampleModule(Extractor, Enricher, Feeder, Database, Storage, Formatter):
|
||||
def download(self, item):
|
||||
print("download")
|
||||
logger.info("download")
|
||||
|
||||
def __iter__(self):
|
||||
yield Metadata().set_url("https://example.com")
|
||||
|
||||
def done(self, result):
|
||||
print("done")
|
||||
logger.info("done")
|
||||
|
||||
def enrich(self, to_enrich):
|
||||
print("enrich")
|
||||
logger.info("enrich")
|
||||
|
||||
def get_cdn_url(self, media):
|
||||
return "nice_url"
|
||||
|
||||
def save(self, item):
|
||||
print("save")
|
||||
logger.info("save")
|
||||
|
||||
def uploadf(self, file, key, **kwargs):
|
||||
print("uploadf")
|
||||
logger.info("uploadf")
|
||||
|
||||
def format(self, item):
|
||||
print("format")
|
||||
logger.info("format")
|
||||
|
|
|
@ -237,3 +237,23 @@ def test_wrong_step_type(test_args, caplog):
|
|||
with pytest.raises(SetupError) as err:
|
||||
orchestrator.setup(args)
|
||||
assert "Module 'example_extractor' is not a feeder" in str(err.value)
|
||||
|
||||
|
||||
def test_load_failed_extractor_cleanup(test_args, mocker, caplog):
|
||||
orchestrator = ArchivingOrchestrator()
|
||||
|
||||
# hack to set up the paths so we can patch properly
|
||||
orchestrator.module_factory.setup_paths([TEST_MODULES])
|
||||
|
||||
# patch example_module.setup to throw an exception
|
||||
mocker.patch(
|
||||
"auto_archiver.modules.example_extractor.example_extractor.ExampleExtractor.setup",
|
||||
side_effect=Exception("Test exception"),
|
||||
)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
orchestrator.setup(test_args + ["--extractors", "example_extractor"])
|
||||
|
||||
assert "Error during setup of modules: Test exception" in caplog.text
|
||||
# make sure the 'cleanup' is called
|
||||
assert "cleanup" in caplog.text
|
||||
|
|
Ładowanie…
Reference in New Issue