From 6ebce974f016cb102d685d0dcec1c7f4b61333b2 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Mon, 28 Feb 2022 08:42:59 +0100 Subject: [PATCH] WIP: Make timezones more consistent in UTC --- archivers/youtubedl_archiver.py | 8 +++++++- auto_archive.py | 28 +++++++++++++++------------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/archivers/youtubedl_archiver.py b/archivers/youtubedl_archiver.py index 1e4c496..65c59be 100644 --- a/archivers/youtubedl_archiver.py +++ b/archivers/youtubedl_archiver.py @@ -89,7 +89,13 @@ class YoutubeDLArchiver(Archiver): os.remove(filename) - timestamp = info['timestamp'] if 'timestamp' in info else datetime.datetime.strptime(info['upload_date'], '%Y%m%d').timestamp() if 'upload_date' in info and info['upload_date'] is not None else None + # TODO test YoutubeDL's date conventions for a variety of sources (Twitter, Youtube, etc) + # is the timestamp always in "user" time? + timestamp = datetime.datetime.fromtimestamp(info['timestamp']).replace(tzinfo=datetime.timezone(datetime.timedelta(hours=1))).astimezone(datetime.timezone.utc).isoformat() \ + if 'timestamp' in info else \ + datetime.datetime.strptime(info['upload_date'], '%Y%m%d').timestamp() \ + if 'upload_date' in info and info['upload_date'] is not None else \ + None return ArchiveResult(status=status, cdn_url=cdn_url, thumbnail=key_thumb, thumbnail_index=thumb_index, duration=duration, title=info['title'] if 'title' in info else None, timestamp=timestamp, hash=hash, screenshot=screenshot) diff --git a/auto_archive.py b/auto_archive.py index 6b6917d..541c0a1 100644 --- a/auto_archive.py +++ b/auto_archive.py @@ -27,7 +27,7 @@ def update_sheet(gw, row, result: archivers.ArchiveResult): cell_updates.append((row, 'status', result.status)) batch_if_valid('archive', result.cdn_url) - batch_if_valid('date', True, datetime.datetime.now().isoformat()) + batch_if_valid('date', True, datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat()) batch_if_valid('thumbnail', result.thumbnail, f'=IMAGE("{result.thumbnail}")') batch_if_valid('thumbnail_index', result.thumbnail_index) @@ -36,10 +36,14 @@ def update_sheet(gw, row, result: archivers.ArchiveResult): batch_if_valid('screenshot', result.screenshot) batch_if_valid('hash', result.hash) - if result.timestamp and type(result.timestamp) != str: - result.timestamp = datetime.datetime.fromtimestamp( - result.timestamp).isoformat() - batch_if_valid('timestamp', result.timestamp) + if type(result.timestamp) == int: + timestamp_string = datetime.datetime.fromtimestamp(result.timestamp).replace(tzinfo=datetime.timezone.utc).isoformat() + elif type(result.timestamp) == str: + timestamp_string = result.timestamp + else: + timestamp_string = result.timestamp.isoformat() + + batch_if_valid('timestamp', timestamp_string) gw.batch_set_cell(cell_updates) @@ -115,14 +119,12 @@ def process_sheet(sheet, header=1): for archiver in active_archivers: logger.debug(f'Trying {archiver} on row {row}') - # TODO: add support for multiple videos/images - # try: - result = archiver.download( - url, check_if_exists=True) - # except Exception as e: - # result = False - # logger.error( - # f'Got unexpected error in row {row} with archiver {archiver} for url {url}: {e}') + try: + result = archiver.download(url, check_if_exists=True) + except Exception as e: + result = False + logger.error( + f'Got unexpected error in row {row} with archiver {archiver} for url {url}: {e}') if result: if result.status in ['success', 'already archived']: