mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 13:18:28 +03:00
simplifies access to google sheets, single get_values
This commit is contained in:
@@ -7,6 +7,7 @@ import gspread
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
|
import traceback
|
||||||
|
|
||||||
import archivers
|
import archivers
|
||||||
from storages import S3Storage, S3Config
|
from storages import S3Storage, S3Config
|
||||||
@@ -104,12 +105,10 @@ def process_sheet(sheet, header=1):
|
|||||||
archivers.WaybackArchiver(s3_client, driver)
|
archivers.WaybackArchiver(s3_client, driver)
|
||||||
]
|
]
|
||||||
|
|
||||||
values = gw.get_values()
|
|
||||||
# loop through rows in worksheet
|
# loop through rows in worksheet
|
||||||
for row in range(1 + header, gw.count_rows() + 1):
|
for row in range(1 + header, gw.count_rows() + 1):
|
||||||
row_values = values[row-1]
|
url = gw.get_cell(row, 'url')
|
||||||
url = gw.get_cell(row_values, 'url')
|
status = gw.get_cell(row, 'status')
|
||||||
status = gw.get_cell(row_values, 'status')
|
|
||||||
if url != '' and status in ['', None]:
|
if url != '' and status in ['', None]:
|
||||||
gw.set_cell(row, 'status', 'Archive in progress')
|
gw.set_cell(row, 'status', 'Archive in progress')
|
||||||
|
|
||||||
@@ -122,8 +121,7 @@ def process_sheet(sheet, header=1):
|
|||||||
result = archiver.download(url, check_if_exists=True)
|
result = archiver.download(url, check_if_exists=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
result = False
|
result = False
|
||||||
logger.error(
|
logger.error(f'Got unexpected error in row {row} with archiver {archiver} for url {url}: {e}\n{traceback.format_exc()}')
|
||||||
f'Got unexpected error in row {row} with archiver {archiver} for url {url}: {e}')
|
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
if result.status in ['success', 'already archived']:
|
if result.status in ['success', 'already archived']:
|
||||||
|
|||||||
@@ -18,7 +18,8 @@ class GWorksheet:
|
|||||||
|
|
||||||
def __init__(self, worksheet, columns=COLUMN_NAMES, header_row=1):
|
def __init__(self, worksheet, columns=COLUMN_NAMES, header_row=1):
|
||||||
self.wks = worksheet
|
self.wks = worksheet
|
||||||
self.headers = [v.lower() for v in self.wks.row_values(header_row)]
|
self.values = self.wks.get_values()
|
||||||
|
self.headers = [v.lower() for v in self.values[header_row - 1]]
|
||||||
self.columns = columns
|
self.columns = columns
|
||||||
|
|
||||||
def _check_col_exists(self, col: str):
|
def _check_col_exists(self, col: str):
|
||||||
@@ -34,14 +35,14 @@ class GWorksheet:
|
|||||||
return self.columns[col] in self.headers
|
return self.columns[col] in self.headers
|
||||||
|
|
||||||
def count_rows(self):
|
def count_rows(self):
|
||||||
return len(self.wks.get_values())
|
return len(self.values)
|
||||||
|
|
||||||
def get_row(self, row: int):
|
def get_row(self, row: int):
|
||||||
# row is 1-based
|
# row is 1-based
|
||||||
return self.wks.row_values(row)
|
return self.values[row - 1]
|
||||||
|
|
||||||
def get_values(self):
|
def get_values(self):
|
||||||
return self.wks.get_values()
|
return self.values
|
||||||
|
|
||||||
def get_cell(self, row, col: str):
|
def get_cell(self, row, col: str):
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user