mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-12 13:38:33 +03:00
Merge pull request #68 from bellingcat/config-funcs
Move shared util functions
This commit is contained in:
@@ -1,49 +0,0 @@
|
|||||||
# TODO: code in this file should eventually be moved to the auto-archiver code base
|
|
||||||
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
from auto_archiver.core import Media, Metadata
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from app.shared.db import models
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_urls(result: Metadata) -> List[models.ArchiveUrl]:
|
|
||||||
db_urls = []
|
|
||||||
for m in result.media:
|
|
||||||
for i, url in enumerate(m.urls):
|
|
||||||
db_urls.append(
|
|
||||||
models.ArchiveUrl(url=url, key=m.get("id", f"media_{i}"))
|
|
||||||
)
|
|
||||||
for k, prop in m.properties.items():
|
|
||||||
if prop_converted := convert_if_media(prop):
|
|
||||||
for i, url in enumerate(prop_converted.urls):
|
|
||||||
db_urls.append(
|
|
||||||
models.ArchiveUrl(
|
|
||||||
url=url, key=prop_converted.get("id", f"{k}_{i}")
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if isinstance(prop, list):
|
|
||||||
for i, prop_media in enumerate(prop):
|
|
||||||
if prop_media := convert_if_media(prop_media):
|
|
||||||
for j, url in enumerate(prop_media.urls):
|
|
||||||
db_urls.append(
|
|
||||||
models.ArchiveUrl(
|
|
||||||
url=url,
|
|
||||||
key=prop_media.get(
|
|
||||||
"id", f"{k}{prop_media.key}_{i}.{j}"
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return db_urls
|
|
||||||
|
|
||||||
|
|
||||||
def convert_if_media(media):
|
|
||||||
if isinstance(media, Media):
|
|
||||||
return media
|
|
||||||
elif isinstance(media, dict):
|
|
||||||
try:
|
|
||||||
return Media.from_dict(media)
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"error parsing {media} : {e}")
|
|
||||||
return False
|
|
||||||
@@ -6,7 +6,8 @@ from auto_archiver.core import Media, Metadata
|
|||||||
|
|
||||||
from app.shared import schemas
|
from app.shared import schemas
|
||||||
from app.shared.db import models
|
from app.shared.db import models
|
||||||
from app.worker.main import create_archive_task, create_sheet_task, get_all_urls
|
from app.web.utils.misc import get_all_urls
|
||||||
|
from app.worker.main import create_archive_task, create_sheet_task
|
||||||
|
|
||||||
|
|
||||||
class TestCreateArchiveTask:
|
class TestCreateArchiveTask:
|
||||||
|
|||||||
@@ -9,12 +9,12 @@ from loguru import logger
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from app.shared import business_logic, schemas
|
from app.shared import business_logic, schemas
|
||||||
from app.shared.aa_utils import get_all_urls
|
|
||||||
from app.shared.db import models, worker_crud
|
from app.shared.db import models, worker_crud
|
||||||
from app.shared.db.database import get_db_dependency
|
from app.shared.db.database import get_db_dependency
|
||||||
from app.shared.log import log_error
|
from app.shared.log import log_error
|
||||||
from app.web.config import ALLOW_ANY_EMAIL
|
from app.web.config import ALLOW_ANY_EMAIL
|
||||||
from app.web.security import token_api_key_auth
|
from app.web.security import token_api_key_auth
|
||||||
|
from app.web.utils.misc import get_all_urls
|
||||||
|
|
||||||
|
|
||||||
interoperability_router = APIRouter(
|
interoperability_router = APIRouter(
|
||||||
|
|||||||
@@ -26,7 +26,8 @@ def get_status(
|
|||||||
try:
|
try:
|
||||||
if task.status == "FAILURE":
|
if task.status == "FAILURE":
|
||||||
# *FAILURE* The task raised an exception, or has exceeded the retry limit.
|
# *FAILURE* The task raised an exception, or has exceeded the retry limit.
|
||||||
# The :attr:`result` attribute then contains the exception raised by the task.
|
# The :attr:`result` attribute then contains the exception raised by
|
||||||
|
# the task.
|
||||||
# https://docs.celeryq.dev/en/stable/_modules/celery/result.html#AsyncResult
|
# https://docs.celeryq.dev/en/stable/_modules/celery/result.html#AsyncResult
|
||||||
raise task.result
|
raise task.result
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ from app.web.utils.metrics import EXCEPTION_COUNTER
|
|||||||
async def logging_middleware(request: Request, call_next):
|
async def logging_middleware(request: Request, call_next):
|
||||||
try:
|
try:
|
||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
# TODO: use Origin to have summary prometheus metrics on where requests come from
|
# TODO: use Origin to have summary prometheus metrics on where
|
||||||
|
# requests come from
|
||||||
# origin = request.headers.get("origin")
|
# origin = request.headers.get("origin")
|
||||||
logger.info(
|
logger.info(
|
||||||
f"{request.client.host}:{request.client.port} {request.method} {request.url._url} - HTTP {response.status_code}"
|
f"{request.client.host}:{request.client.port} {request.method} {request.url._url} - HTTP {response.status_code}"
|
||||||
@@ -25,7 +26,9 @@ async def logging_middleware(request: Request, call_next):
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
async def increase_exceptions_counter(e: Exception, location: str = "cronjob"):
|
async def increase_exceptions_counter(
|
||||||
|
e: Exception, location: str = "cronjob"
|
||||||
|
) -> None:
|
||||||
if location == "cronjob":
|
if location == "cronjob":
|
||||||
try:
|
try:
|
||||||
last_trace = traceback.extract_tb(e.__traceback__)[-1]
|
last_trace = traceback.extract_tb(e.__traceback__)[-1]
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ async def get_token_or_user_auth(
|
|||||||
async def get_user_auth(
|
async def get_user_auth(
|
||||||
credentials: HTTPAuthorizationCredentials = Depends(bearer_security),
|
credentials: HTTPAuthorizationCredentials = Depends(bearer_security),
|
||||||
):
|
):
|
||||||
# validates the Bearer token in the case that it requires it
|
# Validates the Bearer token in the case that it requires it
|
||||||
valid_user, info = authenticate_user(credentials.credentials)
|
valid_user, info = authenticate_user(credentials.credentials)
|
||||||
if valid_user:
|
if valid_user:
|
||||||
return info.lower()
|
return info.lower()
|
||||||
|
|||||||
@@ -37,11 +37,12 @@ DATABASE_METRICS_COUNTER = Counter(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def redis_subscribe_worker_exceptions(REDIS_EXCEPTIONS_CHANNEL: str):
|
async def redis_subscribe_worker_exceptions(redis_exceptions_channel: str):
|
||||||
# Subscribe to Redis channel and increment the counter for each exception with info on the exception and task
|
# Subscribe to Redis channel and increment the counter for each exception
|
||||||
|
# with info on the exception and task
|
||||||
Redis = get_redis()
|
Redis = get_redis()
|
||||||
PubSubExceptions = Redis.pubsub()
|
PubSubExceptions = Redis.pubsub()
|
||||||
PubSubExceptions.subscribe(REDIS_EXCEPTIONS_CHANNEL)
|
PubSubExceptions.subscribe(redis_exceptions_channel)
|
||||||
while True:
|
while True:
|
||||||
message = PubSubExceptions.get_message()
|
message = PubSubExceptions.get_message()
|
||||||
if message and message["type"] == "message":
|
if message and message["type"] == "message":
|
||||||
|
|||||||
@@ -1,6 +1,11 @@
|
|||||||
import base64
|
import base64
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from auto_archiver.core import Media, Metadata
|
||||||
from fastapi.encoders import jsonable_encoder
|
from fastapi.encoders import jsonable_encoder
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from app.shared.db import models
|
||||||
|
|
||||||
|
|
||||||
def custom_jsonable_encoder(obj):
|
def custom_jsonable_encoder(obj):
|
||||||
@@ -14,3 +19,44 @@ def convert_priority_to_queue_dict(priority: str) -> dict:
|
|||||||
"priority": 0 if priority == "high" else 10,
|
"priority": 0 if priority == "high" else 10,
|
||||||
"queue": f"{priority}_priority",
|
"queue": f"{priority}_priority",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def convert_if_media(media):
|
||||||
|
if isinstance(media, Media):
|
||||||
|
return media
|
||||||
|
elif isinstance(media, dict):
|
||||||
|
try:
|
||||||
|
return Media.from_dict(media)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"error parsing {media} : {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_urls(result: Metadata) -> List[models.ArchiveUrl]:
|
||||||
|
db_urls = []
|
||||||
|
for m in result.media:
|
||||||
|
for i, url in enumerate(m.urls):
|
||||||
|
db_urls.append(
|
||||||
|
models.ArchiveUrl(url=url, key=m.get("id", f"media_{i}"))
|
||||||
|
)
|
||||||
|
for k, prop in m.properties.items():
|
||||||
|
if prop_converted := convert_if_media(prop):
|
||||||
|
for i, url in enumerate(prop_converted.urls):
|
||||||
|
db_urls.append(
|
||||||
|
models.ArchiveUrl(
|
||||||
|
url=url, key=prop_converted.get("id", f"{k}_{i}")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if isinstance(prop, list):
|
||||||
|
for i, prop_media in enumerate(prop):
|
||||||
|
if prop_media := convert_if_media(prop_media):
|
||||||
|
for j, url in enumerate(prop_media.urls):
|
||||||
|
db_urls.append(
|
||||||
|
models.ArchiveUrl(
|
||||||
|
url=url,
|
||||||
|
key=prop_media.get(
|
||||||
|
"id", f"{k}{prop_media.key}_{i}.{j}"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return db_urls
|
||||||
|
|||||||
@@ -8,12 +8,12 @@ from loguru import logger
|
|||||||
from sqlalchemy import exc
|
from sqlalchemy import exc
|
||||||
|
|
||||||
from app.shared import business_logic, schemas
|
from app.shared import business_logic, schemas
|
||||||
from app.shared.aa_utils import get_all_urls
|
|
||||||
from app.shared.db import models, worker_crud
|
from app.shared.db import models, worker_crud
|
||||||
from app.shared.db.database import get_db
|
from app.shared.db.database import get_db
|
||||||
from app.shared.log import log_error
|
from app.shared.log import log_error
|
||||||
from app.shared.settings import get_settings
|
from app.shared.settings import get_settings
|
||||||
from app.shared.task_messaging import get_celery, get_redis
|
from app.shared.task_messaging import get_celery, get_redis
|
||||||
|
from app.web.utils.misc import get_all_urls
|
||||||
from app.worker.worker_log import setup_celery_logger
|
from app.worker.worker_log import setup_celery_logger
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user