Merge pull request #66 from bellingcat/country-language-searching

Updated ORM and sync to improve filtering by language and country
This commit is contained in:
Tristan Lee
2022-10-26 12:25:59 -05:00
committed by GitHub
2 changed files with 10 additions and 4 deletions

View File

@@ -7,6 +7,7 @@ import io
from sqlalchemy.orm import registry
from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey, Boolean, Index
from sqlalchemy.dialects.postgresql import JSONB
import pytesseract
import PIL
import exiftool
@@ -475,7 +476,7 @@ channel_table = Table('channels', mapper_registry.metadata,
Column('platform', String),
Column('url', String),
Column('screenname', String),
Column('country', String),
Column('country', JSONB, index = True),
Column('influencer', String),
Column('public', Boolean),
Column('chat', Boolean),
@@ -511,7 +512,7 @@ post_table = Table('posts', mapper_registry.metadata,
Column('views', Integer),
Column('video_title', String),
Column('video_duration', Integer),
Column('detected_language', String),
Column('detected_language', String, index = True),
Column('normalized_content', String)
)

View File

@@ -4,6 +4,11 @@ from loguru import logger
from cisticola.base import Channel, ChannelInfo
def standardize_country(s):
_s = s.split('(')[0].split('?')[0]
return _s.strip()
def sync_channels(args, session):
logger.info("Synchronizing channels")
@@ -73,7 +78,7 @@ def sync_channels(args, session):
channel.platform = c["platform"]
channel.url = c["url"]
channel.screenname = c["screenname"]
channel.country = c["country"]
channel.country = list(map(standardize_country, c["country"].split('/')))
channel.influencer = c["influencer"]
channel.public = c["public"]
channel.chat = c["chat"]
@@ -114,7 +119,7 @@ def sync_channels(args, session):
channel.platform = c["platform"]
channel.url = c["url"]
channel.screenname = c["screenname"]
channel.country = c["country"]
channel.country = list(map(standardize_country, c["country"].split('/')))
channel.influencer = c["influencer"]
channel.public = c["public"]
channel.chat = c["chat"]