mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
Added whitelist and blacklist for workwheets (not spreadsheet)
This commit is contained in:
@@ -58,6 +58,19 @@ def process_sheet(c: Config):
|
||||
|
||||
# loop through worksheets to check
|
||||
for ii, wks in enumerate(sh.worksheets()):
|
||||
|
||||
whitelist = c.worksheet_whitelist
|
||||
if whitelist is not None:
|
||||
if wks.title != whitelist:
|
||||
logger.debug(f'Ignoring worksheet {wks.title} as not in whitelist which is specified as {whitelist}')
|
||||
continue
|
||||
|
||||
blacklist = c.worksheet_blacklist
|
||||
if blacklist is not None:
|
||||
if wks.title == blacklist:
|
||||
logger.debug(f'Ignoring worksheet {wks.title} as in blacklist')
|
||||
continue
|
||||
|
||||
logger.info(f'Opening worksheet {ii=}: {wks.title=} {c.header=}')
|
||||
gw = GWorksheet(wks, header_row=c.header, columns=c.column_names)
|
||||
|
||||
|
||||
@@ -50,6 +50,10 @@ class Config:
|
||||
|
||||
self.sheet = getattr_or(self.args, "sheet", execution.get("sheet"))
|
||||
assert self.sheet is not None, "'sheet' must be provided either through command line or configuration file"
|
||||
|
||||
self.worksheet_whitelist = execution.get("worksheet_whitelist")
|
||||
self.worksheet_blacklist = execution.get("worksheet_blacklist")
|
||||
|
||||
self.header = int(getattr_or(self.args, "header", execution.get("header", 1)))
|
||||
self.storage = getattr_or(self.args, "storage", execution.get("storage", "s3"))
|
||||
self.save_logs = getattr(self.args, "save_logs") or execution.get("save_logs", False)
|
||||
|
||||
@@ -65,6 +65,14 @@ secrets:
|
||||
execution:
|
||||
# can be overwritten with CMD --sheet=
|
||||
sheet: your-sheet-name
|
||||
|
||||
# only check this worksheet rather than iterating through all worksheets in the spreadsheet. If whitelist is used then blacklist is ignored as whitelist is more restrictive.
|
||||
# worksheet_whitelist: Sheet1
|
||||
|
||||
# worksheet to blacklist. Leave blank which is default for none. Useful if users want a MASTERSHEET exact copy of the working worksheet
|
||||
# worksheet_blacklist: MASTERSHEET
|
||||
|
||||
|
||||
# which row of your tabs contains the header, can be overwritten with CMD --header=
|
||||
header: 1
|
||||
# which storage to use, can be overwritten with CMD --storage=
|
||||
|
||||
Reference in New Issue
Block a user