mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-11 04:48:33 +03:00
Scrape snowball_complete sampled channels
This commit is contained in:
@@ -343,7 +343,7 @@ class ScraperController:
|
||||
session = self.session()
|
||||
|
||||
# TODO there should be a better/more generic way of selecting scrapeable channels
|
||||
channels = session.query(Channel).filter((Channel.source=='researcher')|(Channel.source=='snowball_it')).all()
|
||||
channels = session.query(Channel).filter((Channel.source=='researcher')|(Channel.source=='snowball_it')|(Channel.source=='snowball_complete')).all()
|
||||
|
||||
session.close()
|
||||
|
||||
@@ -360,7 +360,7 @@ class ScraperController:
|
||||
# This will sort the channels by the least recently scraped.
|
||||
most_recently_archived = session.query(func.max(RawChannelInfo.date_archived).label("date"), RawChannelInfo.channel.label("channel")).group_by(RawChannelInfo.channel).subquery()
|
||||
channels = session.query(Channel).\
|
||||
filter((Channel.source=='researcher')|(Channel.source=='snowball_it')).\
|
||||
filter((Channel.source=='researcher')|(Channel.source=='snowball_it')|(Channel.source=='snowball_complete')).\
|
||||
outerjoin(most_recently_archived, Channel.id == most_recently_archived.c.channel).\
|
||||
order_by(nullsfirst(most_recently_archived.c.date.asc())).all()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user