kopia lustrzana https://github.com/bellingcat/auto-archiver
Add a cheat sheet for configs and better folder structure for core modules
rodzic
2f51d3917a
commit
895c843f04
|
@ -15,12 +15,17 @@ type_color = {
|
|||
'formatter': "<span style='color: #00FFFF'>[formatter](/core_modules.md#formatter-modules)</a></span>",
|
||||
}
|
||||
|
||||
TABLE_HEADER = ("Option", "Description", "Default", "Type")
|
||||
|
||||
def generate_module_docs():
|
||||
SAVE_FOLDER.mkdir(exist_ok=True)
|
||||
modules_by_type = {}
|
||||
|
||||
for module in available_modules(with_manifest=True):
|
||||
header_row = "| " + " | ".join(TABLE_HEADER) + "|\n" + "| --- " * len(TABLE_HEADER) + "|\n"
|
||||
configs_cheatsheet = "\n## Configuration Options\n"
|
||||
configs_cheatsheet += header_row
|
||||
|
||||
for module in sorted(available_modules(with_manifest=True), key=lambda x: (x.requires_setup, x.name)):
|
||||
# generate the markdown file from the __manifest__.py file.
|
||||
|
||||
manifest = module.manifest
|
||||
|
@ -39,8 +44,7 @@ def generate_module_docs():
|
|||
"""
|
||||
if manifest['configs']:
|
||||
readme_str += "\n## Configuration Options\n"
|
||||
readme_str += "| Option | Description | Default | Type |\n"
|
||||
readme_str += "| --- | --- | --- | --- |\n"
|
||||
readme_str += header_row
|
||||
for key, value in manifest['configs'].items():
|
||||
type = value.get('type', 'string')
|
||||
if type == 'auto_archiver.utils.json_loader':
|
||||
|
@ -51,24 +55,30 @@ def generate_module_docs():
|
|||
help = "**Required**. " if value.get('required', False) else "Optional. "
|
||||
help += value.get('help', '')
|
||||
readme_str += f"| `{module.name}.{key}` | {help} | {value.get('default', '')} | {type} |\n"
|
||||
configs_cheatsheet += f"| `{module.name}.{key}` | {help} | {value.get('default', '')} | {type} |\n"
|
||||
|
||||
|
||||
# make type folder if it doesn't exist
|
||||
|
||||
|
||||
with open(SAVE_FOLDER / f"{module.name}.md", "w") as f:
|
||||
# create the module.type folder, use the first type just for where to store the file
|
||||
type_folder = SAVE_FOLDER / module.type[0]
|
||||
type_folder.mkdir(exist_ok=True)
|
||||
with open(type_folder / f"{module.name}.md", "w") as f:
|
||||
print("writing", SAVE_FOLDER)
|
||||
f.write(readme_str)
|
||||
|
||||
generate_index(modules_by_type)
|
||||
|
||||
with open(SAVE_FOLDER / "configs_cheatsheet.md", "w") as f:
|
||||
f.write(configs_cheatsheet)
|
||||
|
||||
|
||||
def generate_index(modules_by_type):
|
||||
readme_str = ""
|
||||
for type in BaseModule.MODULE_TYPES:
|
||||
modules = modules_by_type.get(type, [])
|
||||
module_str = f"## {type.capitalize()} Modules\n"
|
||||
for module in modules:
|
||||
module_str += f"\n[{module.manifest['name']}](/modules/autogen/{module.name}.md)\n"
|
||||
module_str += f"\n[{module.manifest['name']}](/modules/autogen/{module.type[0]}/{module.name}.md)\n"
|
||||
with open(SAVE_FOLDER / f"{type}.md", "w") as f:
|
||||
print("writing", SAVE_FOLDER / f"{type}.md")
|
||||
f.write(module_str)
|
||||
|
|
|
@ -18,6 +18,7 @@ These pages describe the core modules that come with `auto-archiver` and provide
|
|||
:caption: Core Modules
|
||||
:hidden:
|
||||
|
||||
modules/config_cheatsheet
|
||||
modules/feeder
|
||||
modules/extractor
|
||||
modules/enricher
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
# Configuration Cheat Sheet
|
||||
|
||||
Below is a list of all configurations for the core modules in Auto Archiver
|
||||
|
||||
```{include} ../modules/autogen/configs_cheatsheet.md
|
||||
```
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
# Configuring
|
||||
# Configuration
|
||||
|
||||
|
||||
```{toctree}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
:depth: 1
|
||||
|
||||
configurations.md
|
||||
config_cheatsheet.md
|
||||
```
|
||||
|
||||
There are 3 main ways to use the auto-archiver:
|
||||
|
|
|
@ -6,3 +6,9 @@ The default (enabled) databases are the CSV Database and the Console Database.
|
|||
|
||||
```{include} autogen/database.md
|
||||
```
|
||||
|
||||
```{toctree}
|
||||
:depth: 1
|
||||
:glob:
|
||||
autogen/database/*
|
||||
```
|
|
@ -4,4 +4,10 @@ Enricher modules are used to add additional information to the items that have
|
|||
|
||||
|
||||
```{include} autogen/enricher.md
|
||||
```
|
||||
|
||||
```{toctree}
|
||||
:depth: 1
|
||||
:glob:
|
||||
autogen/enricher/*
|
||||
```
|
|
@ -8,4 +8,10 @@ Extractors that are able to extract content from a wide range of websites includ
|
|||
3. WACZ Extractor: runs a web browser to 'browse' the URL and save a copy of the page in WACZ format.
|
||||
|
||||
```{include} autogen/extractor.md
|
||||
```
|
||||
|
||||
```{toctree}
|
||||
:depth: 1
|
||||
:glob:
|
||||
autogen/extractor/*
|
||||
```
|
|
@ -5,4 +5,10 @@ Feeder modules are used to feed URLs into the `auto-archiver` for processing. Fe
|
|||
The default feeder is the command line feeder, which allows you to input URLs directly into the `auto-archiver` from the command line.
|
||||
|
||||
```{include} autogen/feeder.md
|
||||
```
|
||||
|
||||
```{toctree}
|
||||
:depth: 1
|
||||
:glob:
|
||||
autogen/feeder/*
|
||||
```
|
|
@ -3,4 +3,10 @@
|
|||
Formatter modules are used to format the data extracted from a URL into a specific format. Currently the most widely-used formatter is the HTML formatter, which formats the data into an easily viewable HTML page.
|
||||
|
||||
```{include} autogen/formatter.md
|
||||
```
|
||||
|
||||
```{toctree}
|
||||
:depth: 1
|
||||
:glob:
|
||||
autogen/formatter/*
|
||||
```
|
Ładowanie…
Reference in New Issue