diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index e8d2d7d..a5030ba 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -342,7 +342,7 @@ class ScraperController: session = self.session() - channels = session.query(Channel).where(Channel.source=='researcher').all() + channels = session.query(Channel).filter((Channel.source=='researcher')|(Channel.source=='snowball_it')).all() session.close() @@ -359,7 +359,7 @@ class ScraperController: # This will sort the channels by the least recently scraped. most_recently_archived = session.query(func.max(RawChannelInfo.date_archived).label("date"), RawChannelInfo.channel.label("channel")).group_by(RawChannelInfo.channel).subquery() channels = session.query(Channel).\ - where(Channel.source=='researcher').\ + filter((Channel.source=='researcher')|(Channel.source=='snowball_it')).\ outerjoin(most_recently_archived, Channel.id == most_recently_archived.c.channel).\ order_by(nullsfirst(most_recently_archived.c.date.asc())).all() @@ -460,7 +460,7 @@ class ScraperController: for scraper in self.scrapers: # compare major versions - if scraper.__version__.split('.')[0] == post.scraper.split('.')[0]: + if post.scraper is not None and scraper.__version__.split('.')[0] == post.scraper.split('.')[0]: handled = True logger.debug(f"{scraper} is archiving media for ID {post.id}") post = scraper.archive_files(post) diff --git a/sync_with_gsheet.py b/sync_with_gsheet.py index 72eee59..1c15fbe 100644 --- a/sync_with_gsheet.py +++ b/sync_with_gsheet.py @@ -34,6 +34,7 @@ def sync_channels(args, session): # add new channel if c["id"] == "" or c["id"] is None: del c["id"] + del c["normalized_url"] # check to see if this already exists, platform_id = None @@ -55,7 +56,7 @@ def sync_channels(args, session): channel = session.query(Channel).filter_by(platform=str(c["platform"]), screenname=str(c["screenname"])).first() if not channel: - channel = Channel(**c, source="researcher") + channel = Channel(**c) logger.debug(f"{channel} does not exist, adding") session.add(channel) session.flush() @@ -77,7 +78,7 @@ def sync_channels(args, session): channel.public = c["public"] channel.chat = c["chat"] channel.notes = c["notes"] - channel.source = "researcher" + channel.source = c["source"] session.flush() session.commit() @@ -118,7 +119,7 @@ def sync_channels(args, session): channel.public = c["public"] channel.chat = c["chat"] channel.notes = c["notes"] - channel.source = "researcher" + channel.source = c["source"] if channel_info and channel.screenname != channel_info.screenname: channel.screenname = channel_info.screenname