Scrape snowball_complete sampled channels

This commit is contained in:
Logan Williams
2022-10-26 13:11:20 +00:00
parent c15022402d
commit b023e8044c

View File

@@ -343,7 +343,7 @@ class ScraperController:
session = self.session()
# TODO there should be a better/more generic way of selecting scrapeable channels
channels = session.query(Channel).filter((Channel.source=='researcher')|(Channel.source=='snowball_it')).all()
channels = session.query(Channel).filter((Channel.source=='researcher')|(Channel.source=='snowball_it')|(Channel.source=='snowball_complete')).all()
session.close()
@@ -360,7 +360,7 @@ class ScraperController:
# This will sort the channels by the least recently scraped.
most_recently_archived = session.query(func.max(RawChannelInfo.date_archived).label("date"), RawChannelInfo.channel.label("channel")).group_by(RawChannelInfo.channel).subquery()
channels = session.query(Channel).\
filter((Channel.source=='researcher')|(Channel.source=='snowball_it')).\
filter((Channel.source=='researcher')|(Channel.source=='snowball_it')|(Channel.source=='snowball_complete')).\
outerjoin(most_recently_archived, Channel.id == most_recently_archived.c.channel).\
order_by(nullsfirst(most_recently_archived.c.date.asc())).all()