mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a184bf7b97 | ||
|
|
e535f44a88 | ||
|
|
0f28bf0e35 | ||
|
|
18a8636552 | ||
|
|
81be65c828 | ||
|
|
0a91863212 | ||
|
|
3ad8349e3f | ||
|
|
2768225cd1 |
@@ -5,7 +5,7 @@ def main():
|
|||||||
config = Config()
|
config = Config()
|
||||||
config.parse()
|
config.parse()
|
||||||
orchestrator = ArchivingOrchestrator(config)
|
orchestrator = ArchivingOrchestrator(config)
|
||||||
orchestrator.feed()
|
for r in orchestrator.feed(): pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from ast import List
|
from typing import Any, List
|
||||||
from typing import Any
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from dataclasses_json import dataclass_json, config
|
from dataclasses_json import dataclass_json, config
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from ast import List, Set
|
from typing import Any, List, Union, Dict
|
||||||
from typing import Any, Union, Dict
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from dataclasses_json import dataclass_json, config
|
from dataclasses_json import dataclass_json, config
|
||||||
import datetime
|
import datetime
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from ast import List
|
from typing import Generator, Union, List
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
from .context import ArchivingContext
|
from .context import ArchivingContext
|
||||||
|
|
||||||
@@ -10,7 +9,6 @@ from ..formatters import Formatter
|
|||||||
from ..storages import Storage
|
from ..storages import Storage
|
||||||
from ..enrichers import Enricher
|
from ..enrichers import Enricher
|
||||||
from ..databases import Database
|
from ..databases import Database
|
||||||
from .media import Media
|
|
||||||
from .metadata import Metadata
|
from .metadata import Metadata
|
||||||
|
|
||||||
import tempfile, traceback
|
import tempfile, traceback
|
||||||
@@ -29,7 +27,7 @@ class ArchivingOrchestrator:
|
|||||||
|
|
||||||
for a in self.archivers: a.setup()
|
for a in self.archivers: a.setup()
|
||||||
|
|
||||||
def feed(self) -> None:
|
def feed(self) -> Generator[Metadata]:
|
||||||
for item in self.feeder:
|
for item in self.feeder:
|
||||||
yield self.feed_item(item)
|
yield self.feed_item(item)
|
||||||
|
|
||||||
|
|||||||
@@ -2,3 +2,4 @@ from .database import Database
|
|||||||
from .gsheet_db import GsheetsDb
|
from .gsheet_db import GsheetsDb
|
||||||
from .console_db import ConsoleDb
|
from .console_db import ConsoleDb
|
||||||
from .csv_db import CSVDb
|
from .csv_db import CSVDb
|
||||||
|
from .api_db import AAApiDb
|
||||||
41
src/auto_archiver/databases/api_db.py
Normal file
41
src/auto_archiver/databases/api_db.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
import requests, os
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from . import Database
|
||||||
|
from ..core import Metadata
|
||||||
|
|
||||||
|
|
||||||
|
class AAApiDb(Database):
|
||||||
|
"""
|
||||||
|
Connects to auto-archiver-api instance
|
||||||
|
"""
|
||||||
|
name = "auto_archiver_api_db"
|
||||||
|
|
||||||
|
def __init__(self, config: dict) -> None:
|
||||||
|
# without this STEP.__init__ is not called
|
||||||
|
super().__init__(config)
|
||||||
|
self.assert_valid_string("api_endpoint")
|
||||||
|
self.assert_valid_string("api_secret")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def configs() -> dict:
|
||||||
|
return {
|
||||||
|
"api_endpoint": {"default": None, "help": "API endpoint where calls are made to"},
|
||||||
|
"api_secret": {"default": None, "help": "API authentication secret"},
|
||||||
|
"public": {"default": False, "help": "whether the URL should be publicly available via the API"},
|
||||||
|
"author_id": {"default": None, "help": "which email to assign as author"},
|
||||||
|
"group_id": {"default": None, "help": "which group of users have access to the archive in case public=false as author"},
|
||||||
|
"tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
|
||||||
|
}
|
||||||
|
|
||||||
|
def done(self, item: Metadata) -> None:
|
||||||
|
"""archival result ready - should be saved to DB"""
|
||||||
|
logger.info(f"saving archive of {item.get_url()} to the AA API.")
|
||||||
|
|
||||||
|
payload = {'result': item.to_json(), 'public': self.public, 'author_id': self.author_id, 'group_id': self.group_id, 'tags': list(self.tags)}
|
||||||
|
response = requests.post(os.path.join(self.api_endpoint, "submit-archive"), json=payload, auth=("abc", self.api_secret))
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
logger.success(f"AA API: {response.json()}")
|
||||||
|
else:
|
||||||
|
logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
|
||||||
@@ -64,7 +64,7 @@ class GsheetsFeeder(Gsheets, Feeder):
|
|||||||
# All checks done - archival process starts here
|
# All checks done - archival process starts here
|
||||||
m = Metadata().set_url(url)
|
m = Metadata().set_url(url)
|
||||||
ArchivingContext.set("gsheet", {"row": row, "worksheet": gw}, keep_on_reset=True)
|
ArchivingContext.set("gsheet", {"row": row, "worksheet": gw}, keep_on_reset=True)
|
||||||
folder = slugify(gw.get_cell(row, 'folder').strip())
|
folder = slugify(gw.get_cell_or_default(row, 'folder').strip())
|
||||||
if len(folder):
|
if len(folder):
|
||||||
if self.use_sheet_names_in_stored_paths:
|
if self.use_sheet_names_in_stored_paths:
|
||||||
ArchivingContext.set("folder", os.path.join(folder, slugify(self.sheet), slugify(wks.title)), True)
|
ArchivingContext.set("folder", os.path.join(folder, slugify(self.sheet), slugify(wks.title)), True)
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ _MAJOR = "0"
|
|||||||
_MINOR = "5"
|
_MINOR = "5"
|
||||||
# On main and in a nightly release the patch should be one ahead of the last
|
# On main and in a nightly release the patch should be one ahead of the last
|
||||||
# released build.
|
# released build.
|
||||||
_PATCH = "16"
|
_PATCH = "20"
|
||||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||||
_SUFFIX = ""
|
_SUFFIX = ""
|
||||||
|
|||||||
Reference in New Issue
Block a user