From b023e8044c8744d8d9d02c8dc6ac7985f92e6b5c Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Wed, 26 Oct 2022 13:11:20 +0000 Subject: [PATCH] Scrape snowball_complete sampled channels --- cisticola/scraper/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index 9ac7e03..598c01f 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -343,7 +343,7 @@ class ScraperController: session = self.session() # TODO there should be a better/more generic way of selecting scrapeable channels - channels = session.query(Channel).filter((Channel.source=='researcher')|(Channel.source=='snowball_it')).all() + channels = session.query(Channel).filter((Channel.source=='researcher')|(Channel.source=='snowball_it')|(Channel.source=='snowball_complete')).all() session.close() @@ -360,7 +360,7 @@ class ScraperController: # This will sort the channels by the least recently scraped. most_recently_archived = session.query(func.max(RawChannelInfo.date_archived).label("date"), RawChannelInfo.channel.label("channel")).group_by(RawChannelInfo.channel).subquery() channels = session.query(Channel).\ - filter((Channel.source=='researcher')|(Channel.source=='snowball_it')).\ + filter((Channel.source=='researcher')|(Channel.source=='snowball_it')|(Channel.source=='snowball_complete')).\ outerjoin(most_recently_archived, Channel.id == most_recently_archived.c.channel).\ order_by(nullsfirst(most_recently_archived.c.date.asc())).all()