mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
Further addition to docs: creating modules, configurations, installation
This commit is contained in:
@@ -21,13 +21,12 @@ language = 'en'
|
|||||||
|
|
||||||
# -- General configuration ---------------------------------------------------
|
# -- General configuration ---------------------------------------------------
|
||||||
extensions = [
|
extensions = [
|
||||||
|
"myst_parser", # Markdown support
|
||||||
"autoapi.extension", # Generate API documentation from docstrings
|
"autoapi.extension", # Generate API documentation from docstrings
|
||||||
"sphinxcontrib.mermaid", # Mermaid diagrams
|
"sphinxcontrib.mermaid", # Mermaid diagrams
|
||||||
"myst_parser", # Markdown support
|
|
||||||
"sphinx.ext.viewcode", # Source code links
|
"sphinx.ext.viewcode", # Source code links
|
||||||
"sphinx.ext.napoleon", # Google-style and NumPy-style docstrings
|
"sphinx.ext.napoleon", # Google-style and NumPy-style docstrings
|
||||||
"sphinx.ext.autosectionlabel",
|
"sphinx.ext.autosectionlabel",
|
||||||
# "sphinx.ext.autodoc", # Include custom docstrings
|
|
||||||
# 'sphinx.ext.autosummary', # Summarize module/class/function docs
|
# 'sphinx.ext.autosummary', # Summarize module/class/function docs
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -55,7 +54,6 @@ autoapi_options = [
|
|||||||
|
|
||||||
# -- Markdown Support --------------------------------------------------------
|
# -- Markdown Support --------------------------------------------------------
|
||||||
myst_enable_extensions = [
|
myst_enable_extensions = [
|
||||||
"colon_fence", # ::: fences
|
|
||||||
"deflist", # Definition lists
|
"deflist", # Definition lists
|
||||||
"html_admonition", # HTML-style admonitions
|
"html_admonition", # HTML-style admonitions
|
||||||
"html_image", # Inline HTML images
|
"html_image", # Inline HTML images
|
||||||
@@ -63,7 +61,6 @@ myst_enable_extensions = [
|
|||||||
"smartquotes", # Smart quotes
|
"smartquotes", # Smart quotes
|
||||||
"linkify", # Auto-detect links
|
"linkify", # Auto-detect links
|
||||||
"substitution", # Text substitutions
|
"substitution", # Text substitutions
|
||||||
"attrs_block",
|
|
||||||
]
|
]
|
||||||
myst_heading_anchors = 2
|
myst_heading_anchors = 2
|
||||||
myst_fence_as_directive = ["mermaid"]
|
myst_fence_as_directive = ["mermaid"]
|
||||||
|
|||||||
59
docs/source/development/creating_modules.md
Normal file
59
docs/source/development/creating_modules.md
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# Creating Your Own Modules
|
||||||
|
|
||||||
|
Modules are what's used to extend `auto-archiver` to process different websites or media, and/or transform the data in a way that suits your needs. In most cases, the [](../core_modules.md) should be sufficient for every day use, but the most common use-cases for making your own Modules include:
|
||||||
|
|
||||||
|
1. Extracting data from a website which doesn't work with the current core extractors.
|
||||||
|
2. Enriching or altering the data before saving with additional information that the core enrichers do not offer.
|
||||||
|
3. Storing your data in a different format/location from what the core storage providers offer.
|
||||||
|
|
||||||
|
## Setting up the folder structure
|
||||||
|
|
||||||
|
1. First, decide what type of module you wish to create. Check the types of modules on the [](../core_modules.md) page to decide what type you need. (Note: a module can be more than one type, more on that below)
|
||||||
|
2. Create a new python package (a folder) with the name of your module (in this tutorial, we'll call it `awesome_extractor`).
|
||||||
|
3. Create the `__manifest__.py` and an the `awesome_extractor.py` files in this folder.
|
||||||
|
|
||||||
|
When done, you should have a module structure as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
.
|
||||||
|
├── awesome_extractor
|
||||||
|
│ ├── __manifest__.py
|
||||||
|
│ └── awesome_extractor.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Check out the [core modules](https://github.com/bellingcat/auto-archiver/tree/main/src/auto_archiver/modules) in the `auto-archiver` repository for examples of the folder structure for real-world modules.
|
||||||
|
|
||||||
|
## Populating the Manifest File
|
||||||
|
|
||||||
|
The manifest file is where you define the core information of your module. It is a python dict containing important information, here's an example file:
|
||||||
|
|
||||||
|
```{code} python
|
||||||
|
:filename: myfile.py
|
||||||
|
|
||||||
|
def setup():
|
||||||
|
pass
|
||||||
|
```
|
||||||
|
|
||||||
|
```{include} ../../../tests/data/test_modules/example_module/__manifest__.py
|
||||||
|
:name: __manifest__.py
|
||||||
|
:literal:
|
||||||
|
:parser: python
|
||||||
|
```
|
||||||
|
|
||||||
|
## Creating the Python Code
|
||||||
|
|
||||||
|
The next step is to create your module code. First, create a class which should subclass the base module types from `auto_archiver.core`, here's an example class for the `awesome_extractor` module which is an `extractor`:
|
||||||
|
|
||||||
|
```{code-block} python
|
||||||
|
:filename: awesome_extractor.py
|
||||||
|
|
||||||
|
from auto_archiver.core import Extractor, Metadata
|
||||||
|
|
||||||
|
def AwesomeExtractor(Extractor):
|
||||||
|
|
||||||
|
def download(self, item: Metadata) -> Metadata | False:
|
||||||
|
url = item.get_url()
|
||||||
|
# download the content and create the metadata object
|
||||||
|
metadata = ...
|
||||||
|
return metadata
|
||||||
|
```
|
||||||
@@ -26,7 +26,7 @@ Install development packages (used for unit tests etc.) using:
|
|||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:hidden:
|
:hidden:
|
||||||
|
creating_modules
|
||||||
docker_development
|
docker_development
|
||||||
testing
|
testing
|
||||||
docs
|
docs
|
||||||
|
|||||||
79
docs/source/example.orchestration.yaml
Normal file
79
docs/source/example.orchestration.yaml
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
# Auto Archiver Configuration
|
||||||
|
# Steps are the modules that will be run in the order they are defined
|
||||||
|
|
||||||
|
steps:
|
||||||
|
feeders:
|
||||||
|
- cli_feeder
|
||||||
|
extractors:
|
||||||
|
- generic_extractor
|
||||||
|
- telegram_extractor
|
||||||
|
enrichers:
|
||||||
|
- thumbnail_enricher
|
||||||
|
- meta_enricher
|
||||||
|
- pdq_hash_enricher
|
||||||
|
- ssl_enricher
|
||||||
|
- hash_enricher
|
||||||
|
databases:
|
||||||
|
- console_db
|
||||||
|
- csv_db
|
||||||
|
storages:
|
||||||
|
- local_storage
|
||||||
|
formatters:
|
||||||
|
- html_formatter
|
||||||
|
|
||||||
|
# Global configuration
|
||||||
|
|
||||||
|
# Authentication
|
||||||
|
# a dictionary of authentication information that can be used by extractors to login to website.
|
||||||
|
# you can use a comma separated list for multiple domains on the same line (common usecase: x.com,twitter.com)
|
||||||
|
# Common login 'types' are username/password, cookie, api key/token.
|
||||||
|
# There are two special keys for using cookies, they are: cookies_file and cookies_from_browser.
|
||||||
|
# Some Examples:
|
||||||
|
# facebook.com:
|
||||||
|
# username: "my_username"
|
||||||
|
# password: "my_password"
|
||||||
|
# or for a site that uses an API key:
|
||||||
|
# twitter.com,x.com:
|
||||||
|
# api_key
|
||||||
|
# api_secret
|
||||||
|
# youtube.com:
|
||||||
|
# cookie: "login_cookie=value ; other_cookie=123" # multiple 'key=value' pairs should be separated by ;
|
||||||
|
|
||||||
|
authentication: {}
|
||||||
|
|
||||||
|
# Logging settings for your project. See the logging settings with --help
|
||||||
|
|
||||||
|
logging:
|
||||||
|
level: INFO
|
||||||
|
|
||||||
|
# These are the global configurations that are used by the modules
|
||||||
|
|
||||||
|
file:
|
||||||
|
rotation:
|
||||||
|
local_storage:
|
||||||
|
path_generator: flat
|
||||||
|
filename_generator: static
|
||||||
|
save_to: ./local_archive
|
||||||
|
save_absolute: false
|
||||||
|
html_formatter:
|
||||||
|
detect_thumbnails: true
|
||||||
|
thumbnail_enricher:
|
||||||
|
thumbnails_per_minute: 60
|
||||||
|
max_thumbnails: 16
|
||||||
|
generic_extractor:
|
||||||
|
subtitles: true
|
||||||
|
comments: false
|
||||||
|
livestreams: false
|
||||||
|
live_from_start: false
|
||||||
|
proxy: ''
|
||||||
|
end_means_success: true
|
||||||
|
allow_playlist: false
|
||||||
|
max_downloads: inf
|
||||||
|
csv_db:
|
||||||
|
csv_file: db.csv
|
||||||
|
ssl_enricher:
|
||||||
|
skip_when_nothing_archived: true
|
||||||
|
hash_enricher:
|
||||||
|
algorithm: SHA-256
|
||||||
|
chunksize: 16000000
|
||||||
|
|
||||||
100
docs/source/installation/configurations.md
Normal file
100
docs/source/installation/configurations.md
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
|
||||||
|
# Configuring
|
||||||
|
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:hidden:
|
||||||
|
|
||||||
|
configurations
|
||||||
|
```
|
||||||
|
|
||||||
|
This section of the documentation provides guidelines for configuring the tool.
|
||||||
|
|
||||||
|
## Configuring from the Command Line
|
||||||
|
|
||||||
|
You can run auto-archiver directy from the command line, without the need for a configuration file, command line arguments are parsed using the format `module_name.config_value`. For example, a config value of `api_key` in the `instagram_extractor` module would be passed on the command line with the flag `--instagram_extractor.api_key=API_KEY`.
|
||||||
|
|
||||||
|
The command line arguments are useful for testing or editing config values and enabling/disabling modules on the fly. When you are happy with your settings, you can store them back in your configuration file by passing the `-s/--store` flag on the command line.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
auto-archiver --instagram_extractor.api_key=123 --other_module.setting --store
|
||||||
|
# will store the new settings into the configuration file (default: orchestration.yaml)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuring using a file
|
||||||
|
|
||||||
|
The recommended way to configure auto-archiver for long-term and deployed projects is a configuration file, typically called `orchestration.yaml`. This is a YAML file containing all the settings for your entire workflow.
|
||||||
|
|
||||||
|
A default `orchestration.yaml` will be created for you the first time you run auto-archiver (without any arguments). Here's what it looks like:
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>View example.orchestration.yaml</summary>
|
||||||
|
|
||||||
|
```{literalinclude} ../example.orchestration.yaml
|
||||||
|
:language: yaml
|
||||||
|
:caption: example.orchestration.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
## Core Module Configuration
|
||||||
|
|
||||||
|
View the configurable settings for the core modules on the individual doc pages for each [](../core_modules.md).
|
||||||
|
You can also view all settings available for the modules you have on your system using the `--help` flag in auto-archiver.
|
||||||
|
|
||||||
|
```{code-block} console
|
||||||
|
:caption: Example output when using the --help flag with auto-archiver
|
||||||
|
$ auto-archiver --help
|
||||||
|
...
|
||||||
|
Positional Arguments:
|
||||||
|
urls URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--help, -h show a full help message and exit
|
||||||
|
--version show program's version number and exit
|
||||||
|
--config CONFIG_FILE the filename of the YAML configuration file (defaults to 'config.yaml')
|
||||||
|
--mode {simple,full} the mode to run the archiver in
|
||||||
|
-s, --store, --no-store
|
||||||
|
Store the created config in the config file
|
||||||
|
--module_paths MODULE_PATHS [MODULE_PATHS ...]
|
||||||
|
additional paths to search for modules
|
||||||
|
--feeders STEPS.FEEDERS [STEPS.FEEDERS ...]
|
||||||
|
the feeders to use
|
||||||
|
--enrichers STEPS.ENRICHERS [STEPS.ENRICHERS ...]
|
||||||
|
the enrichers to use
|
||||||
|
--extractors STEPS.EXTRACTORS [STEPS.EXTRACTORS ...]
|
||||||
|
the extractors to use
|
||||||
|
--databases STEPS.DATABASES [STEPS.DATABASES ...]
|
||||||
|
the databases to use
|
||||||
|
--storages STEPS.STORAGES [STEPS.STORAGES ...]
|
||||||
|
the storages to use
|
||||||
|
--formatters STEPS.FORMATTERS [STEPS.FORMATTERS ...]
|
||||||
|
the formatter to use
|
||||||
|
--authentication AUTHENTICATION
|
||||||
|
A dictionary of sites and their authentication methods (token, username etc.) that extractors can use to log into a website. If passing this on the command line, use a JSON string. You may
|
||||||
|
also pass a path to a valid JSON/YAML file which will be parsed.
|
||||||
|
--logging.level {INFO,DEBUG,ERROR,WARNING}
|
||||||
|
the logging level to use
|
||||||
|
--logging.file LOGGING.FILE
|
||||||
|
the logging file to write to
|
||||||
|
--logging.rotation LOGGING.ROTATION
|
||||||
|
the logging rotation to use
|
||||||
|
|
||||||
|
Wayback Machine Enricher:
|
||||||
|
Submits the current URL to the Wayback Machine for archiving and returns either a job ID or the...
|
||||||
|
|
||||||
|
--wayback_extractor_enricher.timeout TIMEOUT
|
||||||
|
seconds to wait for successful archive confirmation from wayback, if more than this passes the result contains the job_id so the status can later be checked manually.
|
||||||
|
--wayback_extractor_enricher.if_not_archived_within IF_NOT_ARCHIVED_WITHIN
|
||||||
|
only tell wayback to archive if no archive is available before the number of seconds specified, use None to ignore this option. For more information:
|
||||||
|
https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA
|
||||||
|
--wayback_extractor_enricher.key KEY
|
||||||
|
wayback API key. to get credentials visit https://archive.org/account/s3.php
|
||||||
|
--wayback_extractor_enricher.secret SECRET
|
||||||
|
wayback API secret. to get credentials visit https://archive.org/account/s3.php
|
||||||
|
--wayback_extractor_enricher.proxy_http PROXY_HTTP
|
||||||
|
http proxy to use for wayback requests, eg http://proxy-user:password@proxy-ip:port
|
||||||
|
--wayback_extractor_enricher.proxy_https PROXY_HTTPS
|
||||||
|
https proxy to use for wayback requests, eg https://proxy-user:password@proxy-ip:port
|
||||||
|
```
|
||||||
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
|
|
||||||
Configurations
|
|
||||||
==============
|
|
||||||
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:hidden:
|
|
||||||
|
|
||||||
configurations
|
|
||||||
```
|
|
||||||
|
|
||||||
This section of the documentation provides guidelines for configuring the tool.
|
|
||||||
|
|
||||||
File Reference
|
|
||||||
--------------
|
|
||||||
|
|
||||||
|
|
||||||
Below is the content of the `example.orchestration.yaml` file:
|
|
||||||
|
|
||||||
.. raw:: html
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary>View example.orchestration.yaml</summary>
|
|
||||||
|
|
||||||
.. literalinclude:: ../../example.orchestration.yaml
|
|
||||||
:language: yaml
|
|
||||||
:caption: example.orchestration.yaml
|
|
||||||
|
|
||||||
.. raw:: html
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
|
|
||||||
Configs
|
|
||||||
-------
|
|
||||||
|
|
||||||
This section of the documentation will show the custom configurations for the individual steps of the tool.
|
|
||||||
|
|
||||||
.. include:: _auto/configs.rst
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,5 +1,10 @@
|
|||||||
# Installing Auto Archiver
|
# Installing Auto Archiver
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:depth: 1
|
||||||
|
|
||||||
|
configurations.md
|
||||||
|
```
|
||||||
|
|
||||||
There are 3 main ways to use the auto-archiver:
|
There are 3 main ways to use the auto-archiver:
|
||||||
1. Easiest: [via docker](#installing-with-docker)
|
1. Easiest: [via docker](#installing-with-docker)
|
||||||
|
|||||||
@@ -1,156 +0,0 @@
|
|||||||
steps:
|
|
||||||
# only 1 feeder allowed
|
|
||||||
feeder: gsheet_feeder # defaults to cli_feeder
|
|
||||||
archivers: # order matters, uncomment to activate
|
|
||||||
- bluesky_archiver
|
|
||||||
# - vk_archiver
|
|
||||||
# - telethon_archiver
|
|
||||||
# - telegram_archiver
|
|
||||||
# - twitter_archiver
|
|
||||||
# - twitter_api_archiver
|
|
||||||
# - instagram_api_archiver
|
|
||||||
# - instagram_tbot_archiver
|
|
||||||
# - instagram_archiver
|
|
||||||
# - tiktok_archiver
|
|
||||||
- youtubedl_archiver
|
|
||||||
# - wayback_archiver_enricher
|
|
||||||
# - wacz_archiver_enricher
|
|
||||||
enrichers:
|
|
||||||
- hash_enricher
|
|
||||||
# - meta_enricher
|
|
||||||
# - metadata_enricher
|
|
||||||
# - screenshot_enricher
|
|
||||||
# - pdq_hash_enricher
|
|
||||||
# - ssl_enricher
|
|
||||||
# - timestamping_enricher
|
|
||||||
# - whisper_enricher
|
|
||||||
# - thumbnail_enricher
|
|
||||||
# - wayback_archiver_enricher
|
|
||||||
# - wacz_archiver_enricher
|
|
||||||
# - pdq_hash_enricher # if you want to calculate hashes for thumbnails, include this after thumbnail_enricher
|
|
||||||
formatter: html_formatter # defaults to mute_formatter
|
|
||||||
storages:
|
|
||||||
- local_storage
|
|
||||||
# - s3_storage
|
|
||||||
# - gdrive_storage
|
|
||||||
databases:
|
|
||||||
- console_db
|
|
||||||
# - csv_db
|
|
||||||
# - gsheet_db
|
|
||||||
# - mongo_db
|
|
||||||
|
|
||||||
configurations:
|
|
||||||
gsheet_feeder:
|
|
||||||
sheet: "your sheet name"
|
|
||||||
header: 1
|
|
||||||
service_account: "secrets/service_account.json"
|
|
||||||
# allow_worksheets: "only parse this worksheet"
|
|
||||||
# block_worksheets: "blocked sheet 1,blocked sheet 2"
|
|
||||||
use_sheet_names_in_stored_paths: false
|
|
||||||
columns:
|
|
||||||
url: link
|
|
||||||
status: archive status
|
|
||||||
folder: destination folder
|
|
||||||
archive: archive location
|
|
||||||
date: archive date
|
|
||||||
thumbnail: thumbnail
|
|
||||||
timestamp: upload timestamp
|
|
||||||
title: upload title
|
|
||||||
text: textual content
|
|
||||||
screenshot: screenshot
|
|
||||||
hash: hash
|
|
||||||
pdq_hash: perceptual hashes
|
|
||||||
wacz: wacz
|
|
||||||
replaywebpage: replaywebpage
|
|
||||||
instagram_tbot_archiver:
|
|
||||||
api_id: "TELEGRAM_BOT_API_ID"
|
|
||||||
api_hash: "TELEGRAM_BOT_API_HASH"
|
|
||||||
# session_file: "secrets/anon"
|
|
||||||
telethon_archiver:
|
|
||||||
api_id: "TELEGRAM_BOT_API_ID"
|
|
||||||
api_hash: "TELEGRAM_BOT_API_HASH"
|
|
||||||
# session_file: "secrets/anon"
|
|
||||||
join_channels: false
|
|
||||||
channel_invites: # if you want to archive from private channels
|
|
||||||
- invite: https://t.me/+123456789
|
|
||||||
id: 0000000001
|
|
||||||
- invite: https://t.me/+123456788
|
|
||||||
id: 0000000002
|
|
||||||
|
|
||||||
twitter_api_archiver:
|
|
||||||
# either bearer_token only
|
|
||||||
bearer_token: "TWITTER_BEARER_TOKEN"
|
|
||||||
# OR all of the below
|
|
||||||
# consumer_key: ""
|
|
||||||
# consumer_secret: ""
|
|
||||||
# access_token: ""
|
|
||||||
# access_secret: ""
|
|
||||||
instagram_archiver:
|
|
||||||
username: "INSTAGRAM_USERNAME"
|
|
||||||
password: "INSTAGRAM_PASSWORD"
|
|
||||||
# session_file: "secrets/instaloader.session"
|
|
||||||
|
|
||||||
vk_archiver:
|
|
||||||
username: "or phone number"
|
|
||||||
password: "vk pass"
|
|
||||||
session_file: "secrets/vk_config.v2.json"
|
|
||||||
|
|
||||||
youtubedl_archiver:
|
|
||||||
subtitles: true
|
|
||||||
# use one of the following two methods to authenticate in youtube - either provide a cookies file or use the cookies of the given browser
|
|
||||||
# for more information, see https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp
|
|
||||||
# cookie_file: "secrets/youtube_cookies.txt"
|
|
||||||
# cookies_from_browser: firefox
|
|
||||||
# proxy: socks5://proxy-user:password@proxy-ip:port
|
|
||||||
|
|
||||||
screenshot_enricher:
|
|
||||||
width: 1280
|
|
||||||
height: 2300
|
|
||||||
# to save as pdf, uncomment the following lines and adjust the print options
|
|
||||||
# save_to_pdf: true
|
|
||||||
# print_options:
|
|
||||||
# for all options see https://www.selenium.dev/selenium/docs/api/py/webdriver/selenium.webdriver.common.print_page_options.html
|
|
||||||
# background: true
|
|
||||||
# orientation: "portrait"
|
|
||||||
# scale: 1
|
|
||||||
# page_width: 8.5in
|
|
||||||
# page_height: 11in
|
|
||||||
# margin_top: 0.4in
|
|
||||||
# margin_bottom: 0.4in
|
|
||||||
# margin_left: 0.4in
|
|
||||||
# margin_right: 0.4in
|
|
||||||
# page_ranges: ""
|
|
||||||
# shrink_to_fit: true
|
|
||||||
|
|
||||||
wayback_archiver_enricher:
|
|
||||||
timeout: 10
|
|
||||||
key: "wayback key"
|
|
||||||
secret: "wayback secret"
|
|
||||||
hash_enricher:
|
|
||||||
algorithm: "SHA3-512" # can also be SHA-256
|
|
||||||
wacz_archiver_enricher:
|
|
||||||
profile: secrets/profile.tar.gz
|
|
||||||
local_storage:
|
|
||||||
save_to: "./local_archive"
|
|
||||||
save_absolute: true
|
|
||||||
filename_generator: static
|
|
||||||
path_generator: flat
|
|
||||||
s3_storage:
|
|
||||||
bucket: your-bucket-name
|
|
||||||
region: reg1
|
|
||||||
key: S3_KEY
|
|
||||||
secret: S3_SECRET
|
|
||||||
endpoint_url: "https://{region}.digitaloceanspaces.com"
|
|
||||||
cdn_url: "https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}"
|
|
||||||
# if private:true S3 urls will not be readable online
|
|
||||||
private: false
|
|
||||||
# with 'random' you can generate a random UUID for the URL instead of a predictable path, useful to still have public but unlisted files, alternative is 'default' or not omitted from config
|
|
||||||
key_path: random
|
|
||||||
gdrive_storage:
|
|
||||||
path_generator: url
|
|
||||||
filename_generator: random
|
|
||||||
root_folder_id: folder_id_from_url
|
|
||||||
oauth_token: secrets/gd-token.json # needs to be generated with scripts/create_update_gdrive_oauth_token.py
|
|
||||||
service_account: "secrets/service_account.json"
|
|
||||||
csv_db:
|
|
||||||
csv_file: "./local_archive/db.csv"
|
|
||||||
@@ -48,6 +48,7 @@ authentication: {}
|
|||||||
|
|
||||||
logging:
|
logging:
|
||||||
level: INFO
|
level: INFO
|
||||||
|
|
||||||
""")
|
""")
|
||||||
# note: 'logging' is explicitly added above in order to better format the config file
|
# note: 'logging' is explicitly added above in order to better format the config file
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,29 @@
|
|||||||
{
|
{
|
||||||
|
# Display Name of your module
|
||||||
"name": "Example Module",
|
"name": "Example Module",
|
||||||
|
# The author of your module (optional)
|
||||||
|
"author": "John Doe",
|
||||||
|
# Optional version number, for your own versioning purposes
|
||||||
|
"version": 2.0,
|
||||||
|
# The type of the module, must be one (or more) of the built in module types
|
||||||
"type": ["extractor", "feeder", "formatter", "storage", "enricher", "database"],
|
"type": ["extractor", "feeder", "formatter", "storage", "enricher", "database"],
|
||||||
|
# a boolean indicating whether or not a module requires additional user setup before it can be used
|
||||||
|
# for example: adding API keys, installing additional software etc.
|
||||||
"requires_setup": False,
|
"requires_setup": False,
|
||||||
"dependencies": {"python": ["loguru"]
|
# a dictionary of dependencies for this module, that must be installed before the module is loaded.
|
||||||
},
|
# Can be python dependencies (external packages, or other auto-archiver modules), or you can
|
||||||
|
# provide external bin dependencies (e.g. ffmpeg, docker etc.)
|
||||||
|
"dependencies": {
|
||||||
|
"python": ["loguru"],
|
||||||
|
"bin": ["bash"],
|
||||||
|
},
|
||||||
|
# configurations that this module takes. These are argparse-compliant dicationaries, that are
|
||||||
|
# used to create command line arguments when the programme is run.
|
||||||
|
# The full name of the config option will become: `module_name.config_name`
|
||||||
"configs": {
|
"configs": {
|
||||||
"csv_file": {"default": "db.csv", "help": "CSV file name"},
|
"csv_file": {"default": "db.csv", "help": "CSV file name"},
|
||||||
"required_field": {"required": True, "help": "required field in the CSV file"},
|
"required_field": {"required": True, "help": "required field in the CSV file"},
|
||||||
},
|
},
|
||||||
|
# A description of the module, used for documentation
|
||||||
|
"description": "This is an example module",
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user