mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-13 05:38:29 +03:00
concludes logging standardization refactor
This commit is contained in:
@@ -94,7 +94,7 @@ class Extractor(BaseModule):
|
||||
to_filename = to_filename[-64:]
|
||||
to_filename = os.path.join(self.tmp_dir, to_filename)
|
||||
if verbose:
|
||||
logger.debug(f"downloading {to_filename=}")
|
||||
logger.debug(f"Downloading {to_filename=}")
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ class Media:
|
||||
@property # getter .mimetype
|
||||
def mimetype(self) -> str:
|
||||
if not self.filename or len(self.filename) == 0:
|
||||
logger.warning(f"cannot get mimetype from media without filename: {self}")
|
||||
logger.warning(f"Cannot get mimetype from media without filename: {self}")
|
||||
return ""
|
||||
if not self._mimetype:
|
||||
self._mimetype = mimetypes.guess_type(self.filename)[0]
|
||||
@@ -116,7 +116,7 @@ class Media:
|
||||
# self.is_video() should be used together with this method
|
||||
try:
|
||||
streams = ffmpeg.probe(self.filename, select_streams="v")["streams"]
|
||||
logger.debug(f"STREAMS FOR {self.filename} {streams}")
|
||||
logger.debug(f"Streams for {self.filename}: {streams}")
|
||||
return any(s.get("duration_ts", 0) > 0 for s in streams)
|
||||
except Error:
|
||||
return False # ffmpeg errors when reading bad files
|
||||
|
||||
@@ -539,11 +539,11 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
for feeder in self.feeders:
|
||||
for item in feeder:
|
||||
with logger.contextualize(url=item.get_url(), trace=random_str(12)):
|
||||
logger.info("started processing")
|
||||
logger.info("Started processing")
|
||||
yield self.feed_item(item)
|
||||
url_count += 1
|
||||
|
||||
logger.info(f"processed {url_count} URL(s)")
|
||||
logger.info(f"Processed {url_count} URL(s)")
|
||||
self.cleanup()
|
||||
|
||||
def feed_item(self, item: Metadata) -> Metadata:
|
||||
@@ -561,7 +561,7 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
return self.archive(item)
|
||||
except KeyboardInterrupt:
|
||||
# catches keyboard interruptions to do a clean exit
|
||||
logger.warning("caught interrupt")
|
||||
logger.warning("Caught interrupt")
|
||||
for d in self.databases:
|
||||
d.aborted(item)
|
||||
self.cleanup()
|
||||
@@ -620,25 +620,25 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
try:
|
||||
d.done(cached_result, cached=True)
|
||||
except Exception as e:
|
||||
logger.error(f"database {d.name}: {e}: {traceback.format_exc()}")
|
||||
logger.error(f"Database {d.name}: {e}: {traceback.format_exc()}")
|
||||
return cached_result
|
||||
|
||||
# 3 - call extractors until one succeeds
|
||||
for a in self.extractors:
|
||||
logger.info(f"trying extractor {a.name}")
|
||||
logger.info(f"Trying extractor {a.name}")
|
||||
try:
|
||||
result.merge(a.download(result))
|
||||
if result.is_success():
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"archiver {a.name}: {e}: {traceback.format_exc()}")
|
||||
logger.error(f"Extractor {a.name}: {e}: {traceback.format_exc()}")
|
||||
|
||||
# 4 - call enrichers to work with archived content
|
||||
for e in self.enrichers:
|
||||
try:
|
||||
e.enrich(result)
|
||||
except Exception as exc:
|
||||
logger.error(f"enricher {e.name}: {exc}: {traceback.format_exc()}")
|
||||
logger.error(f"Enricher {e.name}: {exc}: {traceback.format_exc()}")
|
||||
|
||||
# 5 - store all downloaded/generated media
|
||||
result.store(storages=self.storages)
|
||||
@@ -657,7 +657,7 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
try:
|
||||
d.done(result)
|
||||
except Exception as e:
|
||||
logger.error(f"database {d.name}: {e}: {traceback.format_exc()}")
|
||||
logger.error(f"Database {d.name}: {e}: {traceback.format_exc()}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
Reference in New Issue
Block a user