mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-08 03:18:34 +03:00
changed ORM and Google Sheet sync to reflect converting channels.country to JSONB array, added index for detected_language
This commit is contained in:
@@ -7,6 +7,7 @@ import io
|
||||
|
||||
from sqlalchemy.orm import registry
|
||||
from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey, Boolean, Index
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
import pytesseract
|
||||
import PIL
|
||||
import exiftool
|
||||
@@ -475,7 +476,7 @@ channel_table = Table('channels', mapper_registry.metadata,
|
||||
Column('platform', String),
|
||||
Column('url', String),
|
||||
Column('screenname', String),
|
||||
Column('country', String),
|
||||
Column('country', JSONB),
|
||||
Column('influencer', String),
|
||||
Column('public', Boolean),
|
||||
Column('chat', Boolean),
|
||||
@@ -511,7 +512,7 @@ post_table = Table('posts', mapper_registry.metadata,
|
||||
Column('views', Integer),
|
||||
Column('video_title', String),
|
||||
Column('video_duration', Integer),
|
||||
Column('detected_language', String),
|
||||
Column('detected_language', String, index = True),
|
||||
Column('normalized_content', String)
|
||||
)
|
||||
|
||||
|
||||
@@ -4,6 +4,14 @@ from loguru import logger
|
||||
|
||||
from cisticola.base import Channel, ChannelInfo
|
||||
|
||||
def standardize_country(s):
|
||||
_s = s.split('(')[0].split('?')[0]
|
||||
if _s == 'AUS':
|
||||
return 'AU'
|
||||
else:
|
||||
return _s.strip()
|
||||
|
||||
|
||||
def sync_channels(args, session):
|
||||
logger.info("Synchronizing channels")
|
||||
|
||||
@@ -73,7 +81,7 @@ def sync_channels(args, session):
|
||||
channel.platform = c["platform"]
|
||||
channel.url = c["url"]
|
||||
channel.screenname = c["screenname"]
|
||||
channel.country = c["country"]
|
||||
channel.country = list(map(standardize_country, c["country"].split('/')))
|
||||
channel.influencer = c["influencer"]
|
||||
channel.public = c["public"]
|
||||
channel.chat = c["chat"]
|
||||
@@ -114,7 +122,7 @@ def sync_channels(args, session):
|
||||
channel.platform = c["platform"]
|
||||
channel.url = c["url"]
|
||||
channel.screenname = c["screenname"]
|
||||
channel.country = c["country"]
|
||||
channel.country = list(map(standardize_country, c["country"].split('/')))
|
||||
channel.influencer = c["influencer"]
|
||||
channel.public = c["public"]
|
||||
channel.chat = c["chat"]
|
||||
|
||||
Reference in New Issue
Block a user