kopia lustrzana https://github.com/bellingcat/auto-archiver
Further docs improvements/tidy ups
rodzic
77212e8e3f
commit
091a19e25c
|
@ -3,6 +3,7 @@ from pathlib import Path
|
|||
from auto_archiver.core.module import ModuleFactory
|
||||
from auto_archiver.core.base_module import BaseModule
|
||||
from ruamel.yaml import YAML
|
||||
from ruamel.yaml.comments import CommentedMap
|
||||
import io
|
||||
|
||||
MODULES_FOLDER = Path(__file__).parent.parent.parent.parent / "src" / "auto_archiver" / "modules"
|
||||
|
@ -30,6 +31,7 @@ steps:
|
|||
...
|
||||
|
||||
{config_string}
|
||||
|
||||
"""
|
||||
|
||||
def generate_module_docs():
|
||||
|
@ -38,8 +40,9 @@ def generate_module_docs():
|
|||
modules_by_type = {}
|
||||
|
||||
header_row = "| " + " | ".join(TABLE_HEADER) + "|\n" + "| --- " * len(TABLE_HEADER) + "|\n"
|
||||
configs_cheatsheet = "\n## Configuration Options\n"
|
||||
configs_cheatsheet += header_row
|
||||
global_table = "\n## Configuration Options\n" + header_row
|
||||
|
||||
global_yaml = yaml.load("""\n# Module configuration\nplaceholder: {}""")
|
||||
|
||||
for module in sorted(ModuleFactory().available_modules(), key=lambda x: (x.requires_setup, x.name)):
|
||||
# generate the markdown file from the __manifest__.py file.
|
||||
|
@ -66,6 +69,11 @@ def generate_module_docs():
|
|||
|
||||
config_table = header_row
|
||||
config_yaml = {}
|
||||
|
||||
global_yaml[module.name] = CommentedMap()
|
||||
global_yaml.yaml_set_comment_before_after_key(module.name, f"\n\n{module.display_name} configuration options")
|
||||
|
||||
|
||||
for key, value in manifest['configs'].items():
|
||||
type = value.get('type', 'string')
|
||||
if type == 'json_loader':
|
||||
|
@ -75,10 +83,16 @@ def generate_module_docs():
|
|||
|
||||
default = value.get('default', '')
|
||||
config_yaml[key] = default
|
||||
|
||||
global_yaml[module.name][key] = default
|
||||
|
||||
if value.get('help', ''):
|
||||
global_yaml[module.name].yaml_add_eol_comment(value.get('help', ''), key)
|
||||
|
||||
help = "**Required**. " if value.get('required', False) else "Optional. "
|
||||
help += value.get('help', '')
|
||||
config_table += f"| `{module.name}.{key}` | {help} | {value.get('default', '')} | {type} |\n"
|
||||
configs_cheatsheet += f"| `{module.name}.{key}` | {help} | {default} | {type} |\n"
|
||||
global_table += f"| `{module.name}.{key}` | {help} | {default} | {type} |\n"
|
||||
readme_str += "\n## Configuration Options\n"
|
||||
readme_str += "\n### YAML\n"
|
||||
|
||||
|
@ -103,8 +117,13 @@ def generate_module_docs():
|
|||
f.write(readme_str)
|
||||
generate_index(modules_by_type)
|
||||
|
||||
del global_yaml['placeholder']
|
||||
global_string = io.BytesIO()
|
||||
global_yaml = yaml.dump(global_yaml, global_string)
|
||||
global_string = global_string.getvalue().decode('utf-8')
|
||||
global_yaml = f"```yaml\n{global_string}\n```"
|
||||
with open(SAVE_FOLDER / "configs_cheatsheet.md", "w") as f:
|
||||
f.write(configs_cheatsheet)
|
||||
f.write("### Configuration File\n" + global_yaml + "\n### Command Line\n" + global_table)
|
||||
|
||||
|
||||
def generate_index(modules_by_type):
|
||||
|
|
Plik binarny nie jest wyświetlany.
Po Szerokość: | Wysokość: | Rozmiar: 42 KiB |
|
@ -3,9 +3,11 @@
|
|||
import sys
|
||||
import os
|
||||
from importlib.metadata import metadata
|
||||
from datetime import datetime
|
||||
|
||||
sys.path.append(os.path.abspath('../scripts'))
|
||||
from scripts import generate_module_docs
|
||||
from auto_archiver.version import __version__
|
||||
|
||||
# -- Project Hooks -----------------------------------------------------------
|
||||
# convert the module __manifest__.py files into markdown files
|
||||
|
@ -15,7 +17,8 @@ generate_module_docs()
|
|||
# -- Project information -----------------------------------------------------
|
||||
package_metadata = metadata("auto-archiver")
|
||||
project = package_metadata["name"]
|
||||
authors = "Bellingcat"
|
||||
copyright = str(datetime.now().year)
|
||||
author = "Bellingcat"
|
||||
release = package_metadata["version"]
|
||||
language = 'en'
|
||||
|
||||
|
@ -32,7 +35,7 @@ extensions = [
|
|||
]
|
||||
|
||||
templates_path = ['_templates']
|
||||
exclude_patterns = []
|
||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", ""]
|
||||
|
||||
|
||||
# -- AutoAPI Configuration ---------------------------------------------------
|
||||
|
@ -76,6 +79,14 @@ source_suffix = {
|
|||
html_theme = 'sphinx_book_theme'
|
||||
html_static_path = ["../_static"]
|
||||
html_css_files = ["custom.css"]
|
||||
html_title = f"Auto Archiver v{__version__}"
|
||||
html_logo = "bc.png"
|
||||
html_theme_options = {
|
||||
"repository_url": "https://github.com/bellingcat/auto-archiver",
|
||||
"use_repository_button": True,
|
||||
}
|
||||
|
||||
|
||||
|
||||
copybutton_prompt_text = r">>> |\.\.\."
|
||||
copybutton_prompt_is_regexp = True
|
||||
|
|
|
@ -8,10 +8,10 @@
|
|||
:caption: Contents:
|
||||
|
||||
Overview <self>
|
||||
contributing
|
||||
installation/installation
|
||||
installation/setup
|
||||
core_modules.md
|
||||
how_to
|
||||
contributing
|
||||
development/developer_guidelines
|
||||
autoapi/index.rst
|
||||
```
|
|
@ -1,13 +1,18 @@
|
|||
|
||||
# Configuration
|
||||
|
||||
This section of the documentation provides guidelines for configuring the tool.
|
||||
The recommended way to configure auto-archiver for first-time users is to [run the Auto Archiver](setup.md#running) and have it auto-generate a default configuration for you. Then, if needed, you can edit the configuration file using one of the following methods.
|
||||
|
||||
## Configuring using a file
|
||||
|
||||
The recommended way to configure auto-archiver for long-term and deployed projects is a configuration file, typically called `orchestration.yaml`. This is a YAML file containing all the settings for your entire workflow.
|
||||
## 1. Configuration file
|
||||
|
||||
The structure of orchestration file is split into 2 parts: `steps` (what [steps](../flow_overview.md) to use) and `configurations` (settings for different modules), here's a simplification:
|
||||
The configuration file is typically called `orchestration.yaml` and stored in the `secrets` folder on your desktop. The configuration file contains all the settings for your entire Auto Archiver workflow in one easy-to-find place.
|
||||
|
||||
If you want to have Auto Archiver run with the recommended 'basic' setup,
|
||||
|
||||
### Advanced Configuration
|
||||
|
||||
The structure of orchestration file is split into 2 parts: `steps` (what [steps](../flow_overview.md) to use) and `configurations` (settings for individual modules).
|
||||
|
||||
A default `orchestration.yaml` will be created for you the first time you run auto-archiver (without any arguments). Here's what it looks like:
|
||||
|
||||
|
@ -21,7 +26,7 @@ A default `orchestration.yaml` will be created for you the first time you run au
|
|||
|
||||
</details>
|
||||
|
||||
## Configuring from the Command Line
|
||||
## 2. Command Line configuration
|
||||
|
||||
You can run auto-archiver directly from the command line, without the need for a configuration file, command line arguments are parsed using the format `module_name.config_value`. For example, a config value of `api_key` in the `instagram_extractor` module would be passed on the command line with the flag `--instagram_extractor.api_key=API_KEY`.
|
||||
|
||||
|
|
|
@ -1,81 +1,44 @@
|
|||
# Installing Auto Archiver
|
||||
# Installation
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 1
|
||||
:hidden:
|
||||
There are 3 main ways to use the auto-archiver. We recommend the 'docker' method for most uses. This installs all the requirements in one command.
|
||||
|
||||
configurations.md
|
||||
authentication.md
|
||||
config_cheatsheet.md
|
||||
```
|
||||
|
||||
There are 3 main ways to use the auto-archiver:
|
||||
1. Easiest: [via docker](#installing-with-docker)
|
||||
1. Easiest (recommended): [via docker](#installing-with-docker)
|
||||
2. Local Install: [using pip](#installing-locally-with-pip)
|
||||
3. Developer Install: [see the developer guidelines](../development/developer_guidelines)
|
||||
|
||||
|
||||
But **you always need a configuration/orchestration file**, which is where you'll configure where/what/how to archive. Make sure you read the [](configurations) page.
|
||||
|
||||
|
||||
## Installing with Docker
|
||||
## 1. Installing with Docker
|
||||
|
||||
[](https://hub.docker.com/r/bellingcat/auto-archiver)
|
||||
|
||||
Docker works like a virtual machine running inside your computer, it isolates everything and makes installation simple. Since it is an isolated environment when you need to pass it your orchestration file or get downloaded media out of docker you will need to connect folders on your machine with folders inside docker with the `-v` volume flag.
|
||||
Docker works like a virtual machine running inside your computer, making installation simple. You'll need to first set up Docker, and then download the Auto Archiver 'image':
|
||||
|
||||
|
||||
1. Install [docker](https://docs.docker.com/get-docker/)
|
||||
2. Pull the auto-archiver docker [image](https://hub.docker.com/r/bellingcat/auto-archiver) with `docker pull bellingcat/auto-archiver`
|
||||
3. Run the docker image locally in a container: `docker run --rm -v $PWD/secrets:/app/secrets -v $PWD/local_archive:/app/local_archive bellingcat/auto-archiver --config secrets/orchestration.yaml` breaking this command down:
|
||||
1. `docker run` tells docker to start a new container (an instance of the image)
|
||||
2. `--rm` makes sure this container is removed after execution (less garbage locally)
|
||||
3. `-v $PWD/secrets:/app/secrets` - your secrets folder
|
||||
1. `-v` is a volume flag which means a folder that you have on your computer will be connected to a folder inside the docker container
|
||||
2. `$PWD/secrets` points to a `secrets/` folder in your current working directory (where your console points to), we use this folder as a best practice to hold all the secrets/tokens/passwords/... you use
|
||||
3. `/app/secrets` points to the path the docker container where this image can be found
|
||||
4. `-v $PWD/local_archive:/app/local_archive` - (optional) if you use local_storage
|
||||
1. `-v` same as above, this is a volume instruction
|
||||
2. `$PWD/local_archive` is a folder `local_archive/` in case you want to archive locally and have the files accessible outside docker
|
||||
3. `/app/local_archive` is a folder inside docker that you can reference in your orchestration.yml file
|
||||
**a) Download and install docker**
|
||||
|
||||
### Example invocations
|
||||
Go to the [Docker website](https://docs.docker.com/get-docker/) and download right version for your operating system.
|
||||
|
||||
The invocations below will run the auto-archiver Docker image using a configuration file that you have specified
|
||||
**b) Pull the Auto Archiver docker image**
|
||||
|
||||
Open your command line terminal, and copy-paste / type:
|
||||
|
||||
```bash
|
||||
# all the configurations come from ./secrets/orchestration.yaml
|
||||
docker run --rm -v $PWD/secrets:/app/secrets -v $PWD/local_archive:/app/local_archive bellingcat/auto-archiver --config secrets/orchestration.yaml
|
||||
# uses the same configurations but for another google docs sheet
|
||||
# with a header on row 2 and with some different column names
|
||||
# notice that columns is a dictionary so you need to pass it as JSON and it will override only the values provided
|
||||
docker run --rm -v $PWD/secrets:/app/secrets -v $PWD/local_archive:/app/local_archive bellingcat/auto-archiver --config secrets/orchestration.yaml --gsheet_feeder.sheet="use it on another sheets doc" --gsheet_feeder.header=2 --gsheet_feeder.columns='{"url": "link"}'
|
||||
# all the configurations come from orchestration.yaml and specifies that s3 files should be private
|
||||
docker run --rm -v $PWD/secrets:/app/secrets -v $PWD/local_archive:/app/local_archive bellingcat/auto-archiver --config secrets/orchestration.yaml --s3_storage.private=1
|
||||
docker pull bellingcat/auto-archiver
|
||||
```
|
||||
|
||||
## Installing Locally with Pip
|
||||
This will download the docker image, which may take a while.
|
||||
|
||||
That's it, all done! You're now ready to set up [your configuration file](configurations.md). Or, if you want to use the recommended defaults, then you can [run Auto Archiver immediately](setup.md#running-a-docker-install).
|
||||
|
||||
------------
|
||||
|
||||
## 2. Installing Locally with Pip
|
||||
|
||||
1. Make sure you have python 3.10 or higher installed
|
||||
2. Install the package with your preferred package manager: `pip/pipenv/conda install auto-archiver` or `poetry add auto-archiver`
|
||||
3. Test it's installed with `auto-archiver --help`
|
||||
4. Install other local dependency requirements (for )
|
||||
5. Run it with your orchestration file and pass any flags you want in the command line `auto-archiver --config secrets/orchestration.yaml` if your orchestration file is inside a `secrets/`, which we advise
|
||||
4. Install other local dependency requirements (for example `ffmpeg`, `firefox`)
|
||||
|
||||
### Example invocations
|
||||
|
||||
Once all your [local requirements](#installing-local-requirements) are correctly installed, the
|
||||
|
||||
```bash
|
||||
# all the configurations come from ./secrets/orchestration.yaml
|
||||
auto-archiver --config secrets/orchestration.yaml
|
||||
# uses the same configurations but for another google docs sheet
|
||||
# with a header on row 2 and with some different column names
|
||||
# notice that columns is a dictionary so you need to pass it as JSON and it will override only the values provided
|
||||
auto-archiver --config secrets/orchestration.yaml --gsheet_feeder.sheet="use it on another sheets doc" --gsheet_feeder.header=2 --gsheet_feeder.columns='{"url": "link"}'
|
||||
# all the configurations come from orchestration.yaml and specifies that s3 files should be private
|
||||
auto-archiver --config secrets/orchestration.yaml --s3_storage.private=1
|
||||
```
|
||||
After this, you're ready to set up your [your configuration file](configurations.md), or if you want to use the recommended defaults, then you can [run Auto Archiver immediately](setup.md#running-a-local-install).
|
||||
|
||||
### Installing Local Requirements
|
||||
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
# Requirements
|
||||
|
||||
Using the Auto Archiver is very simple, but ideally you have some familiarity with using the command line to run programs. ([Command line crash course](https://developer.mozilla.org/en-US/docs/Learn_web_development/Getting_started/Environment_setup/Command_line)).
|
||||
|
||||
### System Requirements
|
||||
|
||||
* Auto Archiver works on any Windows, macOS and Linux computer
|
||||
* If you're using the **local install** method, then you should make sure to have python3.10+ installed
|
||||
|
||||
### Storage Requirements
|
||||
|
||||
By default, Auto Archiver uses your local computer storage for any downloaded media (videos, images etc.). If you're downloading large files, this may take up a lot of your local computer's space (more than 5GB of space).
|
||||
|
||||
If your storage space is limited, then you may want to set up an [alternative storage method](../modules/storage.md) for your media.
|
|
@ -0,0 +1,76 @@
|
|||
# Getting Started
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 1
|
||||
:hidden:
|
||||
|
||||
installation.md
|
||||
configurations.md
|
||||
authentication.md
|
||||
requirements.md
|
||||
config_cheatsheet.md
|
||||
```
|
||||
|
||||
## Getting Started
|
||||
|
||||
To get started with Auto Archiver, there are 3 main steps you need to complete.
|
||||
|
||||
1. [Install Auto Archiver](installation.md)
|
||||
2. [Setup up your configuration](configurations.md) (if you are ok with the default settings, you can skip this step)
|
||||
3. Run the archiving process<a id="running"></a>
|
||||
|
||||
The way you run the Auto Archiver depends on how you installed it (docker install or local install)
|
||||
|
||||
### Running a Docker Install
|
||||
|
||||
If you installed Auto Archiver using docker, open up your terminal, and copy-paste / type the following command:
|
||||
|
||||
```bash
|
||||
docker run --rm -v $PWD/secrets:/app/secrets -v $PWD/local_archive:/app/local_archive bellingcat/auto-archiver
|
||||
```
|
||||
|
||||
breaking this command down:
|
||||
1. `docker run` tells docker to start a new container (an instance of the image)
|
||||
2. `--rm` makes sure this container is removed after execution (less garbage locally)
|
||||
3. `-v $PWD/secrets:/app/secrets` - your secrets folder with settings
|
||||
1. `-v` is a volume flag which means a folder that you have on your computer will be connected to a folder inside the docker container
|
||||
2. `$PWD/secrets` points to a `secrets/` folder in your current working directory (where your console points to), we use this folder as a best practice to hold all the secrets/tokens/passwords/... you use
|
||||
3. `/app/secrets` points to the path the docker container where this image can be found
|
||||
4. `-v $PWD/local_archive:/app/local_archive` - (optional) if you use local_storage
|
||||
1. `-v` same as above, this is a volume instruction
|
||||
2. `$PWD/local_archive` is a folder `local_archive/` in case you want to archive locally and have the files accessible outside docker
|
||||
3. `/app/local_archive` is a folder inside docker that you can reference in your orchestration.yml file
|
||||
|
||||
### Example invocations
|
||||
|
||||
The invocations below will run the auto-archiver Docker image using a configuration file that you have specified
|
||||
|
||||
```bash
|
||||
# Have auto-archiver run with the default settings, generating a settings file in ./secrets/orchestration.yaml
|
||||
docker run --rm -v $PWD/secrets:/app/secrets -v $PWD/local_archive:/app/local_archive bellingcat/auto-archiver
|
||||
|
||||
# uses the same configuration, but with the `gsheet_feeder`, a header on row 2 and with some different column names
|
||||
# notice that columns is a dictionary so you need to pass it as JSON and it will override only the values provided
|
||||
docker run --rm -v $PWD/secrets:/app/secrets -v $PWD/local_archive:/app/local_archive bellingcat/auto-archiver --feeders=gsheet_feeder --gsheet_feeder.sheet="use it on another sheets doc" --gsheet_feeder.header=2 --gsheet_feeder.columns='{"url": "link"}'
|
||||
# Runs auto-archiver for the first time, but in 'full' mode, enabling all modules to get a full settings file
|
||||
docker run --rm -v $PWD/secrets:/app/secrets -v $PWD/local_archive:/app/local_archive bellingcat/auto-archiver --mode full
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Running a Local Install
|
||||
|
||||
### Example invocations
|
||||
|
||||
Once all your [local requirements](#installing-local-requirements) are correctly installed, the
|
||||
|
||||
```bash
|
||||
# all the configurations come from ./secrets/orchestration.yaml
|
||||
auto-archiver --config secrets/orchestration.yaml
|
||||
# uses the same configurations but for another google docs sheet
|
||||
# with a header on row 2 and with some different column names
|
||||
# notice that columns is a dictionary so you need to pass it as JSON and it will override only the values provided
|
||||
auto-archiver --config secrets/orchestration.yaml --gsheet_feeder.sheet="use it on another sheets doc" --gsheet_feeder.header=2 --gsheet_feeder.columns='{"url": "link"}'
|
||||
# all the configurations come from orchestration.yaml and specifies that s3 files should be private
|
||||
auto-archiver --config secrets/orchestration.yaml --s3_storage.private=1
|
||||
```
|
|
@ -18,7 +18,7 @@ from typing import Any, List, Type, Tuple
|
|||
|
||||
_yaml: YAML = YAML()
|
||||
|
||||
DEFAULT_CONFIG_FILE = "orchestration.yaml"
|
||||
DEFAULT_CONFIG_FILE = "secrets/orchestration.yaml"
|
||||
|
||||
EMPTY_CONFIG = _yaml.load("""
|
||||
# Auto Archiver Configuration
|
||||
|
|
Ładowanie…
Reference in New Issue