diff --git a/cisticola/base.py b/cisticola/base.py index 01df6f4..4e0b77f 100644 --- a/cisticola/base.py +++ b/cisticola/base.py @@ -7,6 +7,7 @@ import io from sqlalchemy.orm import registry from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey, Boolean, Index +from sqlalchemy.dialects.postgresql import JSONB import pytesseract import PIL import exiftool @@ -475,7 +476,7 @@ channel_table = Table('channels', mapper_registry.metadata, Column('platform', String), Column('url', String), Column('screenname', String), - Column('country', String), + Column('country', JSONB), Column('influencer', String), Column('public', Boolean), Column('chat', Boolean), @@ -511,7 +512,7 @@ post_table = Table('posts', mapper_registry.metadata, Column('views', Integer), Column('video_title', String), Column('video_duration', Integer), - Column('detected_language', String), + Column('detected_language', String, index = True), Column('normalized_content', String) ) diff --git a/sync_with_gsheet.py b/sync_with_gsheet.py index 1c15fbe..fcd848a 100644 --- a/sync_with_gsheet.py +++ b/sync_with_gsheet.py @@ -4,6 +4,14 @@ from loguru import logger from cisticola.base import Channel, ChannelInfo +def standardize_country(s): + _s = s.split('(')[0].split('?')[0] + if _s == 'AUS': + return 'AU' + else: + return _s.strip() + + def sync_channels(args, session): logger.info("Synchronizing channels") @@ -73,7 +81,7 @@ def sync_channels(args, session): channel.platform = c["platform"] channel.url = c["url"] channel.screenname = c["screenname"] - channel.country = c["country"] + channel.country = list(map(standardize_country, c["country"].split('/'))) channel.influencer = c["influencer"] channel.public = c["public"] channel.chat = c["chat"] @@ -114,7 +122,7 @@ def sync_channels(args, session): channel.platform = c["platform"] channel.url = c["url"] channel.screenname = c["screenname"] - channel.country = c["country"] + channel.country = list(map(standardize_country, c["country"].split('/'))) channel.influencer = c["influencer"] channel.public = c["public"] channel.chat = c["chat"]