mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
allow for human readable console logs, but defaults to JSON on file logs.
This commit is contained in:
@@ -24,7 +24,7 @@ This will disable all logs from Auto Archiver, but it does not disable logs for
|
||||
|
||||
#### Logging Level
|
||||
|
||||
There are 7 logging levels in total, with 5 of them used in this tool. They are: `DEBUG`, `INFO`, `SUCCESS`, `WARNING` and `ERROR`.
|
||||
There are 7 logging levels in total, with 5 of them used in this tool. They are: `DEBUG`, `INFO`, `SUCCESS`, `WARNING` and `ERROR`. If you select a level, only that and higher (more serious) levels will be included. `DEBUG` is the most verbose, while `ERROR` is the least verbose.
|
||||
|
||||
Change the warning level by setting the value in your orchestration config file:
|
||||
|
||||
@@ -42,6 +42,20 @@ For normal usage, it is recommended to use the `INFO` level, or if you prefer qu
|
||||
```{note} To learn about all logging levels, see the [loguru documentation](https://loguru.readthedocs.io/en/stable/api/logger.html)
|
||||
```
|
||||
|
||||
### Logging Format
|
||||
By default, the console logs are formatted in a human-readable way and the file logs are formatted in JSON. This is new from version 1.1.1. If you want to change the format of the console logs to JSON too you can set the `format:` option in your logging settings.
|
||||
|
||||
```{code} yaml
|
||||
:caption: orchestration.yaml
|
||||
|
||||
logging:
|
||||
format: json
|
||||
```
|
||||
|
||||
When the Auto Archiver is writing logs it will include context about specific tasks, so if you are archiving a URL from a Google Sheet, both the URL (and a unique `trace_id` for that URL's archiving attempt) and the Spreadsheet name and row will be included in the logs. This is useful for debugging and understanding what the Auto Archiver is doing.
|
||||
|
||||
Using JSON allows you to easily parse the logs and extract specific information, tools like [`jq`](https://jqlang.org/) can be used to filter and search through the logs.
|
||||
|
||||
### Logging to a file
|
||||
|
||||
As default, auto-archiver will log to the console. But if you wish to store your logs for future reference, or you are running the auto-archiver from within code a implementation, then you may wish to enable file logging. This can be done by setting the `file:` config value in the logging settings.
|
||||
@@ -84,6 +98,7 @@ The below example logs only `DEBUG` logs to the console and to the file `/my/fil
|
||||
|
||||
logging:
|
||||
level: DEBUG
|
||||
format: json
|
||||
file: /my/file.log
|
||||
rotation: 1 week
|
||||
```
|
||||
@@ -15,7 +15,7 @@ import traceback
|
||||
from copy import copy
|
||||
|
||||
from rich_argparse import RichHelpFormatter
|
||||
from auto_archiver.utils.custom_logger import logger
|
||||
from auto_archiver.utils.custom_logger import format_for_human_readable_console, logger
|
||||
import requests
|
||||
|
||||
from auto_archiver.utils.misc import random_str
|
||||
@@ -348,7 +348,9 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
sys.stderr,
|
||||
level=use_level,
|
||||
catch=True,
|
||||
format="<level>{level}</level>: <fg #64FFDA>{message}</fg #64FFDA> {extra[serialize_no_message]}",
|
||||
format="<level>{extra[serialized]}</level>"
|
||||
if logging_config.get("format", "").lower() == "json"
|
||||
else format_for_human_readable_console(),
|
||||
)
|
||||
|
||||
rotation = logging_config["rotation"]
|
||||
|
||||
@@ -2,13 +2,21 @@ from loguru import logger
|
||||
import json
|
||||
|
||||
|
||||
def extract_location(record, short=False):
|
||||
"""Extracts the file name, function name, and line number from the log record."""
|
||||
if short:
|
||||
return f"{record['file'].name}:{record['line']}"
|
||||
return f"{record['file'].name}:{record['function']}:{record['line']}"
|
||||
|
||||
|
||||
def extract_log_data(record):
|
||||
subset = {
|
||||
"level": record["level"].name,
|
||||
"time": record["time"].isoformat(timespec="seconds"),
|
||||
}
|
||||
subset["loc"] = f"{record['file'].name}:{record['function']}:{record['line']}"
|
||||
subset["loc"] = extract_location(record)
|
||||
|
||||
# This is where logger.contextualize() parameters can be added to the output
|
||||
for extra_key in ["trace", "url", "worksheet", "row"]:
|
||||
if extra_val := record.get("extra", {}).get(extra_key):
|
||||
subset[extra_key] = extra_val
|
||||
@@ -19,9 +27,14 @@ def extract_log_data(record):
|
||||
return subset
|
||||
|
||||
|
||||
def serialize_no_message(record):
|
||||
def serialize_for_console(record):
|
||||
subset = extract_log_data(record)
|
||||
subset.pop("message", None)
|
||||
subset.pop("level", None)
|
||||
subset.pop("loc", None)
|
||||
subset.pop("time", None)
|
||||
if not subset:
|
||||
return ""
|
||||
return json.dumps(subset, ensure_ascii=False)
|
||||
|
||||
|
||||
@@ -31,7 +44,16 @@ def serialize(record):
|
||||
|
||||
def patching(record):
|
||||
record["extra"]["serialized"] = serialize(record)
|
||||
record["extra"]["serialize_no_message"] = serialize_no_message(record)
|
||||
record["extra"]["serialize_for_console"] = serialize_for_console(record)
|
||||
|
||||
|
||||
def format_for_human_readable_console():
|
||||
return (
|
||||
"<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
|
||||
"<level>{level: <8}</level> | "
|
||||
"<cyan>{file}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | "
|
||||
"{extra[serialize_for_console]} <level>{message}</level>"
|
||||
)
|
||||
|
||||
|
||||
logger = logger.patch(patching)
|
||||
|
||||
Reference in New Issue
Block a user