diff --git a/cisticola/base.py b/cisticola/base.py index 01df6f4..6825569 100644 --- a/cisticola/base.py +++ b/cisticola/base.py @@ -7,6 +7,7 @@ import io from sqlalchemy.orm import registry from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey, Boolean, Index +from sqlalchemy.dialects.postgresql import JSONB import pytesseract import PIL import exiftool @@ -475,7 +476,7 @@ channel_table = Table('channels', mapper_registry.metadata, Column('platform', String), Column('url', String), Column('screenname', String), - Column('country', String), + Column('country', JSONB, index = True), Column('influencer', String), Column('public', Boolean), Column('chat', Boolean), @@ -511,7 +512,7 @@ post_table = Table('posts', mapper_registry.metadata, Column('views', Integer), Column('video_title', String), Column('video_duration', Integer), - Column('detected_language', String), + Column('detected_language', String, index = True), Column('normalized_content', String) ) diff --git a/sync_with_gsheet.py b/sync_with_gsheet.py index 1c15fbe..5f939f1 100644 --- a/sync_with_gsheet.py +++ b/sync_with_gsheet.py @@ -4,6 +4,11 @@ from loguru import logger from cisticola.base import Channel, ChannelInfo +def standardize_country(s): + _s = s.split('(')[0].split('?')[0] + return _s.strip() + + def sync_channels(args, session): logger.info("Synchronizing channels") @@ -73,7 +78,7 @@ def sync_channels(args, session): channel.platform = c["platform"] channel.url = c["url"] channel.screenname = c["screenname"] - channel.country = c["country"] + channel.country = list(map(standardize_country, c["country"].split('/'))) channel.influencer = c["influencer"] channel.public = c["public"] channel.chat = c["chat"] @@ -114,7 +119,7 @@ def sync_channels(args, session): channel.platform = c["platform"] channel.url = c["url"] channel.screenname = c["screenname"] - channel.country = c["country"] + channel.country = list(map(standardize_country, c["country"].split('/'))) channel.influencer = c["influencer"] channel.public = c["public"] channel.chat = c["chat"]