kopia lustrzana https://github.com/bellingcat/auto-archiver
Add a cheat sheet for configs and better folder structure for core modules
rodzic
2f51d3917a
commit
895c843f04
|
@ -15,12 +15,17 @@ type_color = {
|
||||||
'formatter': "<span style='color: #00FFFF'>[formatter](/core_modules.md#formatter-modules)</a></span>",
|
'formatter': "<span style='color: #00FFFF'>[formatter](/core_modules.md#formatter-modules)</a></span>",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TABLE_HEADER = ("Option", "Description", "Default", "Type")
|
||||||
|
|
||||||
def generate_module_docs():
|
def generate_module_docs():
|
||||||
SAVE_FOLDER.mkdir(exist_ok=True)
|
SAVE_FOLDER.mkdir(exist_ok=True)
|
||||||
modules_by_type = {}
|
modules_by_type = {}
|
||||||
|
|
||||||
for module in available_modules(with_manifest=True):
|
header_row = "| " + " | ".join(TABLE_HEADER) + "|\n" + "| --- " * len(TABLE_HEADER) + "|\n"
|
||||||
|
configs_cheatsheet = "\n## Configuration Options\n"
|
||||||
|
configs_cheatsheet += header_row
|
||||||
|
|
||||||
|
for module in sorted(available_modules(with_manifest=True), key=lambda x: (x.requires_setup, x.name)):
|
||||||
# generate the markdown file from the __manifest__.py file.
|
# generate the markdown file from the __manifest__.py file.
|
||||||
|
|
||||||
manifest = module.manifest
|
manifest = module.manifest
|
||||||
|
@ -39,8 +44,7 @@ def generate_module_docs():
|
||||||
"""
|
"""
|
||||||
if manifest['configs']:
|
if manifest['configs']:
|
||||||
readme_str += "\n## Configuration Options\n"
|
readme_str += "\n## Configuration Options\n"
|
||||||
readme_str += "| Option | Description | Default | Type |\n"
|
readme_str += header_row
|
||||||
readme_str += "| --- | --- | --- | --- |\n"
|
|
||||||
for key, value in manifest['configs'].items():
|
for key, value in manifest['configs'].items():
|
||||||
type = value.get('type', 'string')
|
type = value.get('type', 'string')
|
||||||
if type == 'auto_archiver.utils.json_loader':
|
if type == 'auto_archiver.utils.json_loader':
|
||||||
|
@ -51,24 +55,30 @@ def generate_module_docs():
|
||||||
help = "**Required**. " if value.get('required', False) else "Optional. "
|
help = "**Required**. " if value.get('required', False) else "Optional. "
|
||||||
help += value.get('help', '')
|
help += value.get('help', '')
|
||||||
readme_str += f"| `{module.name}.{key}` | {help} | {value.get('default', '')} | {type} |\n"
|
readme_str += f"| `{module.name}.{key}` | {help} | {value.get('default', '')} | {type} |\n"
|
||||||
|
configs_cheatsheet += f"| `{module.name}.{key}` | {help} | {value.get('default', '')} | {type} |\n"
|
||||||
|
|
||||||
|
|
||||||
# make type folder if it doesn't exist
|
# make type folder if it doesn't exist
|
||||||
|
|
||||||
|
# create the module.type folder, use the first type just for where to store the file
|
||||||
with open(SAVE_FOLDER / f"{module.name}.md", "w") as f:
|
type_folder = SAVE_FOLDER / module.type[0]
|
||||||
|
type_folder.mkdir(exist_ok=True)
|
||||||
|
with open(type_folder / f"{module.name}.md", "w") as f:
|
||||||
print("writing", SAVE_FOLDER)
|
print("writing", SAVE_FOLDER)
|
||||||
f.write(readme_str)
|
f.write(readme_str)
|
||||||
|
|
||||||
generate_index(modules_by_type)
|
generate_index(modules_by_type)
|
||||||
|
|
||||||
|
with open(SAVE_FOLDER / "configs_cheatsheet.md", "w") as f:
|
||||||
|
f.write(configs_cheatsheet)
|
||||||
|
|
||||||
|
|
||||||
def generate_index(modules_by_type):
|
def generate_index(modules_by_type):
|
||||||
readme_str = ""
|
readme_str = ""
|
||||||
for type in BaseModule.MODULE_TYPES:
|
for type in BaseModule.MODULE_TYPES:
|
||||||
modules = modules_by_type.get(type, [])
|
modules = modules_by_type.get(type, [])
|
||||||
module_str = f"## {type.capitalize()} Modules\n"
|
module_str = f"## {type.capitalize()} Modules\n"
|
||||||
for module in modules:
|
for module in modules:
|
||||||
module_str += f"\n[{module.manifest['name']}](/modules/autogen/{module.name}.md)\n"
|
module_str += f"\n[{module.manifest['name']}](/modules/autogen/{module.type[0]}/{module.name}.md)\n"
|
||||||
with open(SAVE_FOLDER / f"{type}.md", "w") as f:
|
with open(SAVE_FOLDER / f"{type}.md", "w") as f:
|
||||||
print("writing", SAVE_FOLDER / f"{type}.md")
|
print("writing", SAVE_FOLDER / f"{type}.md")
|
||||||
f.write(module_str)
|
f.write(module_str)
|
||||||
|
|
|
@ -18,6 +18,7 @@ These pages describe the core modules that come with `auto-archiver` and provide
|
||||||
:caption: Core Modules
|
:caption: Core Modules
|
||||||
:hidden:
|
:hidden:
|
||||||
|
|
||||||
|
modules/config_cheatsheet
|
||||||
modules/feeder
|
modules/feeder
|
||||||
modules/extractor
|
modules/extractor
|
||||||
modules/enricher
|
modules/enricher
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
# Configuration Cheat Sheet
|
||||||
|
|
||||||
|
Below is a list of all configurations for the core modules in Auto Archiver
|
||||||
|
|
||||||
|
```{include} ../modules/autogen/configs_cheatsheet.md
|
||||||
|
```
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
# Configuring
|
# Configuration
|
||||||
|
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
:depth: 1
|
:depth: 1
|
||||||
|
|
||||||
configurations.md
|
configurations.md
|
||||||
|
config_cheatsheet.md
|
||||||
```
|
```
|
||||||
|
|
||||||
There are 3 main ways to use the auto-archiver:
|
There are 3 main ways to use the auto-archiver:
|
||||||
|
|
|
@ -6,3 +6,9 @@ The default (enabled) databases are the CSV Database and the Console Database.
|
||||||
|
|
||||||
```{include} autogen/database.md
|
```{include} autogen/database.md
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:depth: 1
|
||||||
|
:glob:
|
||||||
|
autogen/database/*
|
||||||
|
```
|
|
@ -4,4 +4,10 @@ Enricher modules are used to add additional information to the items that have
|
||||||
|
|
||||||
|
|
||||||
```{include} autogen/enricher.md
|
```{include} autogen/enricher.md
|
||||||
|
```
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:depth: 1
|
||||||
|
:glob:
|
||||||
|
autogen/enricher/*
|
||||||
```
|
```
|
|
@ -8,4 +8,10 @@ Extractors that are able to extract content from a wide range of websites includ
|
||||||
3. WACZ Extractor: runs a web browser to 'browse' the URL and save a copy of the page in WACZ format.
|
3. WACZ Extractor: runs a web browser to 'browse' the URL and save a copy of the page in WACZ format.
|
||||||
|
|
||||||
```{include} autogen/extractor.md
|
```{include} autogen/extractor.md
|
||||||
|
```
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:depth: 1
|
||||||
|
:glob:
|
||||||
|
autogen/extractor/*
|
||||||
```
|
```
|
|
@ -5,4 +5,10 @@ Feeder modules are used to feed URLs into the `auto-archiver` for processing. Fe
|
||||||
The default feeder is the command line feeder, which allows you to input URLs directly into the `auto-archiver` from the command line.
|
The default feeder is the command line feeder, which allows you to input URLs directly into the `auto-archiver` from the command line.
|
||||||
|
|
||||||
```{include} autogen/feeder.md
|
```{include} autogen/feeder.md
|
||||||
|
```
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:depth: 1
|
||||||
|
:glob:
|
||||||
|
autogen/feeder/*
|
||||||
```
|
```
|
|
@ -3,4 +3,10 @@
|
||||||
Formatter modules are used to format the data extracted from a URL into a specific format. Currently the most widely-used formatter is the HTML formatter, which formats the data into an easily viewable HTML page.
|
Formatter modules are used to format the data extracted from a URL into a specific format. Currently the most widely-used formatter is the HTML formatter, which formats the data into an easily viewable HTML page.
|
||||||
|
|
||||||
```{include} autogen/formatter.md
|
```{include} autogen/formatter.md
|
||||||
|
```
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:depth: 1
|
||||||
|
:glob:
|
||||||
|
autogen/formatter/*
|
||||||
```
|
```
|
Ładowanie…
Reference in New Issue