mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-08 03:18:34 +03:00
Merge pull request #66 from bellingcat/country-language-searching
Updated ORM and sync to improve filtering by language and country
This commit is contained in:
@@ -7,6 +7,7 @@ import io
|
||||
|
||||
from sqlalchemy.orm import registry
|
||||
from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey, Boolean, Index
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
import pytesseract
|
||||
import PIL
|
||||
import exiftool
|
||||
@@ -475,7 +476,7 @@ channel_table = Table('channels', mapper_registry.metadata,
|
||||
Column('platform', String),
|
||||
Column('url', String),
|
||||
Column('screenname', String),
|
||||
Column('country', String),
|
||||
Column('country', JSONB, index = True),
|
||||
Column('influencer', String),
|
||||
Column('public', Boolean),
|
||||
Column('chat', Boolean),
|
||||
@@ -511,7 +512,7 @@ post_table = Table('posts', mapper_registry.metadata,
|
||||
Column('views', Integer),
|
||||
Column('video_title', String),
|
||||
Column('video_duration', Integer),
|
||||
Column('detected_language', String),
|
||||
Column('detected_language', String, index = True),
|
||||
Column('normalized_content', String)
|
||||
)
|
||||
|
||||
|
||||
@@ -4,6 +4,11 @@ from loguru import logger
|
||||
|
||||
from cisticola.base import Channel, ChannelInfo
|
||||
|
||||
def standardize_country(s):
|
||||
_s = s.split('(')[0].split('?')[0]
|
||||
return _s.strip()
|
||||
|
||||
|
||||
def sync_channels(args, session):
|
||||
logger.info("Synchronizing channels")
|
||||
|
||||
@@ -73,7 +78,7 @@ def sync_channels(args, session):
|
||||
channel.platform = c["platform"]
|
||||
channel.url = c["url"]
|
||||
channel.screenname = c["screenname"]
|
||||
channel.country = c["country"]
|
||||
channel.country = list(map(standardize_country, c["country"].split('/')))
|
||||
channel.influencer = c["influencer"]
|
||||
channel.public = c["public"]
|
||||
channel.chat = c["chat"]
|
||||
@@ -114,7 +119,7 @@ def sync_channels(args, session):
|
||||
channel.platform = c["platform"]
|
||||
channel.url = c["url"]
|
||||
channel.screenname = c["screenname"]
|
||||
channel.country = c["country"]
|
||||
channel.country = list(map(standardize_country, c["country"].split('/')))
|
||||
channel.influencer = c["influencer"]
|
||||
channel.public = c["public"]
|
||||
channel.chat = c["chat"]
|
||||
|
||||
Reference in New Issue
Block a user