mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-07 19:08:30 +03:00
logging and clean up
This commit is contained in:
@@ -10,7 +10,7 @@ The filtered rows are processed into `Metadata` objects.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Tuple, Union
|
||||
from typing import Tuple, Union, Iterator
|
||||
from urllib.parse import quote
|
||||
|
||||
import gspread
|
||||
@@ -33,10 +33,10 @@ class GsheetsFeederDB(Feeder, Database):
|
||||
def open_sheet(self):
|
||||
if self.sheet:
|
||||
return self.gsheets_client.open(self.sheet)
|
||||
else: # self.sheet_id
|
||||
else:
|
||||
return self.gsheets_client.open_by_key(self.sheet_id)
|
||||
|
||||
def __iter__(self) -> Metadata:
|
||||
def __iter__(self) -> Iterator[Metadata]:
|
||||
sh = self.open_sheet()
|
||||
for ii, worksheet in enumerate(sh.worksheets()):
|
||||
if not self.should_process_sheet(worksheet.title):
|
||||
@@ -45,14 +45,14 @@ class GsheetsFeederDB(Feeder, Database):
|
||||
logger.info(f"Opening worksheet {ii=}: {worksheet.title=} header={self.header}")
|
||||
gw = GWorksheet(worksheet, header_row=self.header, columns=self.columns)
|
||||
if len(missing_cols := self.missing_required_columns(gw)):
|
||||
logger.warning(
|
||||
logger.debug(
|
||||
f"SKIPPED worksheet '{worksheet.title}' due to missing required column(s) for {missing_cols}"
|
||||
)
|
||||
continue
|
||||
|
||||
# process and yield metadata here:
|
||||
yield from self._process_rows(gw)
|
||||
logger.success(f"Finished worksheet {worksheet.title}")
|
||||
logger.info(f"Finished worksheet {worksheet.title}")
|
||||
|
||||
def _process_rows(self, gw: GWorksheet):
|
||||
for row in range(1 + self.header, gw.count_rows() + 1):
|
||||
|
||||
Reference in New Issue
Block a user