Limit max # of archived files per session

This commit is contained in:
Logan Williams
2022-04-12 12:57:04 +00:00
parent 36c81c8e17
commit d1f9dd0e01
2 changed files with 3 additions and 1 deletions

2
.gitignore vendored
View File

@@ -9,9 +9,11 @@ docs/source/_*
*.db
.env
*.session
*.session-journal
service_account.json
.vscode/
*.log
*.lock
# Unit test / coverage reports
reports

View File

@@ -421,7 +421,7 @@ class ScraperController:
# this query is really slow (~2.5 minutes) because of the shuffle. shuffling is so that multiple media archivers could work
# simultaneously with low risk of collision (at least while the number of unarchived items is very large)
posts = session.query(ScraperResult).where(ScraperResult.media_archived == None).order_by(func.random()).limit(10000).all()
posts = session.query(ScraperResult).where(ScraperResult.media_archived == None).order_by(func.random()).limit(4000).all()
logger.info(f"Found {len(posts)} posts without media. Archiving now")