mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3e44b9b577 | ||
|
|
1a5797d0f8 | ||
|
|
768b8fce9f | ||
|
|
613b1f1e50 | ||
|
|
919c37bfb6 | ||
|
|
a655b3c987 | ||
|
|
d645b840ee |
2
.github/workflows/docker-publish.yaml
vendored
2
.github/workflows/docker-publish.yaml
vendored
@@ -9,7 +9,7 @@ on:
|
|||||||
release:
|
release:
|
||||||
types: [published]
|
types: [published]
|
||||||
push:
|
push:
|
||||||
branches: [ "main" ]
|
# branches: [ "main" ]
|
||||||
tags: [ "v*.*.*" ]
|
tags: [ "v*.*.*" ]
|
||||||
|
|
||||||
env:
|
env:
|
||||||
|
|||||||
2
.github/workflows/python-publish.yaml
vendored
2
.github/workflows/python-publish.yaml
vendored
@@ -12,7 +12,7 @@ on:
|
|||||||
release:
|
release:
|
||||||
types: [published]
|
types: [published]
|
||||||
push:
|
push:
|
||||||
branches: [ "main" ]
|
# branches: [ "main" ]
|
||||||
tags: [ "v*.*.*" ]
|
tags: [ "v*.*.*" ]
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from ..formatters import Formatter
|
|||||||
from ..storages import Storage
|
from ..storages import Storage
|
||||||
from ..enrichers import Enricher
|
from ..enrichers import Enricher
|
||||||
from . import Step
|
from . import Step
|
||||||
|
from ..utils import update_nested_dict
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -38,7 +39,7 @@ class Config:
|
|||||||
self.cli_ops = {}
|
self.cli_ops = {}
|
||||||
self.config = {}
|
self.config = {}
|
||||||
|
|
||||||
def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs:str={}):
|
def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs: str = {}):
|
||||||
"""
|
"""
|
||||||
if yaml_config_filename is provided, the --config argument is ignored,
|
if yaml_config_filename is provided, the --config argument is ignored,
|
||||||
useful for library usage when the config values are preloaded
|
useful for library usage when the config values are preloaded
|
||||||
@@ -81,7 +82,7 @@ class Config:
|
|||||||
|
|
||||||
# 2. read YAML config file (or use provided value)
|
# 2. read YAML config file (or use provided value)
|
||||||
self.yaml_config = self.read_yaml(yaml_config_filename)
|
self.yaml_config = self.read_yaml(yaml_config_filename)
|
||||||
self.yaml_config.update(overwrite_configs) # optional override programmatically
|
update_nested_dict(self.yaml_config, overwrite_configs)
|
||||||
|
|
||||||
# 3. CONFIGS: decide value with priority: CLI >> config.yaml >> default
|
# 3. CONFIGS: decide value with priority: CLI >> config.yaml >> default
|
||||||
self.config = defaultdict(dict)
|
self.config = defaultdict(dict)
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ class ArchivingOrchestrator:
|
|||||||
|
|
||||||
def feed(self) -> None:
|
def feed(self) -> None:
|
||||||
for item in self.feeder:
|
for item in self.feeder:
|
||||||
self.feed_item(item)
|
yield self.feed_item(item)
|
||||||
|
|
||||||
def feed_item(self, item: Metadata) -> Metadata:
|
def feed_item(self, item: Metadata) -> Metadata:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ class Step(ABC):
|
|||||||
|
|
||||||
def init(name: str, config: dict, child: Type[Step]) -> Step:
|
def init(name: str, config: dict, child: Type[Step]) -> Step:
|
||||||
"""
|
"""
|
||||||
looks into direct subclasses of child for name and returns such ab object
|
looks into direct subclasses of child for name and returns such an object
|
||||||
TODO: cannot find subclasses of child.subclasses
|
TODO: cannot find subclasses of child.subclasses
|
||||||
"""
|
"""
|
||||||
for sub in child.__subclasses__():
|
for sub in child.__subclasses__():
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ class GsheetsFeeder(Gsheets, Feeder):
|
|||||||
})
|
})
|
||||||
|
|
||||||
def __iter__(self) -> Metadata:
|
def __iter__(self) -> Metadata:
|
||||||
sh = self.gsheets_client.open(self.sheet)
|
sh = self.open_sheet()
|
||||||
for ii, wks in enumerate(sh.worksheets()):
|
for ii, wks in enumerate(sh.worksheets()):
|
||||||
if not self.should_process_sheet(wks.title):
|
if not self.should_process_sheet(wks.title):
|
||||||
logger.debug(f"SKIPPED worksheet '{wks.title}' due to allow/block rules")
|
logger.debug(f"SKIPPED worksheet '{wks.title}' due to allow/block rules")
|
||||||
|
|||||||
@@ -10,16 +10,17 @@ class Gsheets(Step):
|
|||||||
# without this STEP.__init__ is not called
|
# without this STEP.__init__ is not called
|
||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
self.gsheets_client = gspread.service_account(filename=self.service_account)
|
self.gsheets_client = gspread.service_account(filename=self.service_account)
|
||||||
#TODO: config should be responsible for conversions
|
# TODO: config should be responsible for conversions
|
||||||
try: self.header = int(self.header)
|
try: self.header = int(self.header)
|
||||||
except: pass
|
except: pass
|
||||||
assert type(self.header) == int, f"header ({self.header}) value must be an integer not {type(self.header)}"
|
assert type(self.header) == int, f"header ({self.header}) value must be an integer not {type(self.header)}"
|
||||||
assert self.sheet is not None, "You need to define a sheet name in your orchestration file when using gsheets."
|
assert self.sheet is not None or self.sheet_id is not None, "You need to define either a 'sheet' name or a 'sheet_id' in your orchestration file when using gsheets."
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def configs() -> dict:
|
def configs() -> dict:
|
||||||
return {
|
return {
|
||||||
"sheet": {"default": None, "help": "name of the sheet to archive"},
|
"sheet": {"default": None, "help": "name of the sheet to archive"},
|
||||||
|
"sheet_id": {"default": None, "help": "(alternative to sheet name) the id of the sheet to archive"},
|
||||||
"header": {"default": 1, "help": "index of the header row (starts at 1)"},
|
"header": {"default": 1, "help": "index of the header row (starts at 1)"},
|
||||||
"service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path"},
|
"service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path"},
|
||||||
"columns": {
|
"columns": {
|
||||||
@@ -41,4 +42,10 @@ class Gsheets(Step):
|
|||||||
"help": "names of columns in the google sheet (stringified JSON object)",
|
"help": "names of columns in the google sheet (stringified JSON object)",
|
||||||
"cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val))
|
"cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val))
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def open_sheet(self):
|
||||||
|
if self.sheet:
|
||||||
|
return self.gsheets_client.open(self.sheet)
|
||||||
|
else: # self.sheet_id
|
||||||
|
return self.gsheets_client.open_by_key(self.sheet_id)
|
||||||
|
|||||||
@@ -40,3 +40,12 @@ class DateTimeEncoder(json.JSONEncoder):
|
|||||||
|
|
||||||
def dump_payload(p):
|
def dump_payload(p):
|
||||||
return json.dumps(p, ensure_ascii=False, indent=4, cls=DateTimeEncoder)
|
return json.dumps(p, ensure_ascii=False, indent=4, cls=DateTimeEncoder)
|
||||||
|
|
||||||
|
|
||||||
|
def update_nested_dict(dictionary, update_dict):
|
||||||
|
# takes 2 dicts and overwrites the first with the second only on the changed balues
|
||||||
|
for key, value in update_dict.items():
|
||||||
|
if key in dictionary and isinstance(value, dict) and isinstance(dictionary[key], dict):
|
||||||
|
update_nested_dict(dictionary[key], value)
|
||||||
|
else:
|
||||||
|
dictionary[key] = value
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ _MAJOR = "0"
|
|||||||
_MINOR = "5"
|
_MINOR = "5"
|
||||||
# On main and in a nightly release the patch should be one ahead of the last
|
# On main and in a nightly release the patch should be one ahead of the last
|
||||||
# released build.
|
# released build.
|
||||||
_PATCH = "13"
|
_PATCH = "16"
|
||||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||||
_SUFFIX = ""
|
_SUFFIX = ""
|
||||||
|
|||||||
Reference in New Issue
Block a user