mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 05:08:28 +03:00
fixing auto_auto_archive
This commit is contained in:
15
README.md
15
README.md
@@ -140,7 +140,7 @@ With this configuration, the archiver should archive and store all media added t
|
||||
|
||||
# auto_auto_archiver
|
||||
|
||||
To make it easier to set up new auto-archiver sheets, the auto-auto-archiver will look at a particular sheet and run the auto-archiver on every sheet name in column A, starting from row 11. (It starts here to support instructional text in the first rows of the sheet, as shown below.) This script takes one command line argument, with `--sheet`, the name of the sheet. It must be shared with the same service account.
|
||||
To make it easier to set up new auto-archiver sheets, the auto-auto-archiver will look at a particular sheet and run the auto-archiver on every sheet name in column A, starting from row 11. (It starts here to support instructional text in the first rows of the sheet, as shown below.) You can simply use your default config as for `auto_archiver.py` but use `--sheet` to specify the name of the sheet that lists the names of sheets to archive.It must be shared with the same service account.
|
||||
|
||||

|
||||
|
||||
@@ -152,15 +152,16 @@ Code is split into functional concepts:
|
||||
1. [GWorksheet](utils/gworksheet.py) - facilitates some of the reading/writing tasks for a Google Worksheet
|
||||
|
||||
### Current Archivers
|
||||
Archivers are tested in a meaningful order with Wayback Machine being the default, that can easily be changed in the code.
|
||||
Archivers are tested in a meaningful order with Wayback Machine being the failsafe, that can easily be changed in the code.
|
||||
```mermaid
|
||||
graph TD
|
||||
A(Archiver) -->|parent of| B(YoutubeDLArchiver)
|
||||
A -->|parent of| C(TikTokArchiver)
|
||||
A -->|parent of| D(TwitterArchiver)
|
||||
A(Archiver) -->|parent of| B(TelethonArchiver)
|
||||
A -->|parent of| C(TiktokArchiver)
|
||||
A -->|parent of| D(YoutubeDLArchiver)
|
||||
A -->|parent of| E(TelegramArchiver)
|
||||
A -->|parent of| F(TelethonArchiver)
|
||||
A -->|parent of| G(WaybackArchiver)
|
||||
A -->|parent of| F(TwitterArchiver)
|
||||
A -->|parent of| G(VkArchiver)
|
||||
A -->|parent of| H(WaybackArchiver)
|
||||
```
|
||||
### Current Storages
|
||||
```mermaid
|
||||
|
||||
@@ -1,29 +1,30 @@
|
||||
import gspread
|
||||
import argparse
|
||||
import shutil
|
||||
import auto_archive
|
||||
from loguru import logger
|
||||
from configs import Config
|
||||
from storages import Storage
|
||||
from utils import mkdir_if_not_exists
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Automatically use youtube-dl to download media from a Google Sheet")
|
||||
parser.add_argument("--sheet", action="store", dest="sheet")
|
||||
c = Config()
|
||||
c.parse()
|
||||
logger.info(f'Opening document {c.sheet} to look for sheet names to archive')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
logger.info("Opening document " + args.sheet)
|
||||
|
||||
gc = gspread.service_account(filename='service_account.json')
|
||||
sh = gc.open(args.sheet)
|
||||
gc = c.gsheets_client
|
||||
sh = gc.open(c.sheet)
|
||||
|
||||
wks = sh.get_worksheet(0)
|
||||
values = wks.get_all_values()
|
||||
|
||||
mkdir_if_not_exists(Storage.TMP_FOLDER)
|
||||
for i in range(11, len(values)):
|
||||
sheet_name = values[i][0]
|
||||
c.sheet = values[i][0]
|
||||
logger.info(f"Processing {c.sheet}")
|
||||
auto_archive.process_sheet(c)
|
||||
c.destroy_webdriver()
|
||||
shutil.rmtree(Storage.TMP_FOLDER)
|
||||
|
||||
logger.info("Processing " + sheet_name)
|
||||
|
||||
auto_archive.process_sheet(sheet_name)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user