changed ORM and Google Sheet sync to reflect converting channels.country to JSONB array, added index for detected_language

This commit is contained in:
Tristan Lee
2022-10-26 08:16:49 -05:00
parent f000c6246e
commit 3bb5af11e6
2 changed files with 13 additions and 4 deletions

View File

@@ -7,6 +7,7 @@ import io
from sqlalchemy.orm import registry
from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey, Boolean, Index
from sqlalchemy.dialects.postgresql import JSONB
import pytesseract
import PIL
import exiftool
@@ -475,7 +476,7 @@ channel_table = Table('channels', mapper_registry.metadata,
Column('platform', String),
Column('url', String),
Column('screenname', String),
Column('country', String),
Column('country', JSONB),
Column('influencer', String),
Column('public', Boolean),
Column('chat', Boolean),
@@ -511,7 +512,7 @@ post_table = Table('posts', mapper_registry.metadata,
Column('views', Integer),
Column('video_title', String),
Column('video_duration', Integer),
Column('detected_language', String),
Column('detected_language', String, index = True),
Column('normalized_content', String)
)

View File

@@ -4,6 +4,14 @@ from loguru import logger
from cisticola.base import Channel, ChannelInfo
def standardize_country(s):
_s = s.split('(')[0].split('?')[0]
if _s == 'AUS':
return 'AU'
else:
return _s.strip()
def sync_channels(args, session):
logger.info("Synchronizing channels")
@@ -73,7 +81,7 @@ def sync_channels(args, session):
channel.platform = c["platform"]
channel.url = c["url"]
channel.screenname = c["screenname"]
channel.country = c["country"]
channel.country = list(map(standardize_country, c["country"].split('/')))
channel.influencer = c["influencer"]
channel.public = c["public"]
channel.chat = c["chat"]
@@ -114,7 +122,7 @@ def sync_channels(args, session):
channel.platform = c["platform"]
channel.url = c["url"]
channel.screenname = c["screenname"]
channel.country = c["country"]
channel.country = list(map(standardize_country, c["country"].split('/')))
channel.influencer = c["influencer"]
channel.public = c["public"]
channel.chat = c["chat"]