mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-11 21:08:34 +03:00
implemented minor fixes recommended by pyling (unused imports, f-strings without patterns, etc.)
This commit is contained in:
15
app.py
15
app.py
@@ -1,5 +1,4 @@
|
||||
import argparse
|
||||
from asyncio import streams
|
||||
from loguru import logger
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
@@ -83,28 +82,28 @@ def get_transformer_controller(args):
|
||||
|
||||
|
||||
def scrape_channels(args):
|
||||
logger.info(f"Scraping channels")
|
||||
logger.info("Scraping channels")
|
||||
|
||||
controller = get_scraper_controller(args)
|
||||
controller.scrape_all_channels()
|
||||
|
||||
|
||||
def scrape_channels_old(args):
|
||||
logger.info(f"Scraping old posts from channels")
|
||||
logger.info("Scraping old posts from channels")
|
||||
|
||||
controller = get_scraper_controller(args)
|
||||
controller.scrape_all_channels(fetch_old=True)
|
||||
|
||||
|
||||
def scrape_channel_info(args):
|
||||
logger.info(f"Scraping channel info")
|
||||
logger.info("Scraping channel info")
|
||||
|
||||
controller = get_scraper_controller(args)
|
||||
controller.scrape_all_channel_info()
|
||||
|
||||
|
||||
def archive_media(args):
|
||||
logger.info(f"Archiving unarchived media")
|
||||
logger.info("Archiving unarchived media")
|
||||
|
||||
controller = get_scraper_controller(args)
|
||||
|
||||
@@ -115,7 +114,7 @@ def archive_media(args):
|
||||
|
||||
|
||||
def transform(args):
|
||||
logger.info(f"Transforming untransformed posts")
|
||||
logger.info("Transforming untransformed posts")
|
||||
|
||||
controller = get_transformer_controller(args)
|
||||
|
||||
@@ -128,7 +127,7 @@ def transform(args):
|
||||
|
||||
|
||||
def transform_info(args):
|
||||
logger.info(f"Transforming untransformed channel info")
|
||||
logger.info("Transforming untransformed channel info")
|
||||
|
||||
controller = get_transformer_controller(args)
|
||||
controller.transform_all_untransformed_info()
|
||||
@@ -137,7 +136,7 @@ def transform_info(args):
|
||||
|
||||
|
||||
def transform_media(args):
|
||||
logger.info(f"Transforming untransformed channel media")
|
||||
logger.info("Transforming untransformed channel media")
|
||||
|
||||
controller = get_transformer_controller(args)
|
||||
controller.transform_all_untransformed_media()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import List, Optional
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
import tempfile
|
||||
@@ -22,7 +22,7 @@ import pytesseract
|
||||
import PIL
|
||||
import exiftool
|
||||
import re
|
||||
from langdetect import detect, DetectorFactory
|
||||
from langdetect import detect
|
||||
from langdetect.lang_detect_exception import LangDetectException
|
||||
from loguru import logger
|
||||
import spacy
|
||||
|
||||
@@ -5,7 +5,6 @@ from html.parser import HTMLParser
|
||||
import dateparser
|
||||
import json
|
||||
from typing import Generator, Optional
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
from typing import List, Generator, Union, Callable
|
||||
from typing import List, Callable
|
||||
from loguru import logger
|
||||
from sqlalchemy import cast, String
|
||||
from sqlalchemy.orm import sessionmaker, make_transient, Session
|
||||
from sqlalchemy.orm import sessionmaker, Session
|
||||
from sqlalchemy.engine.base import Engine
|
||||
from sqlalchemy.sql.expression import func
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from cisticola.base import (
|
||||
@@ -295,7 +293,7 @@ class ETLController:
|
||||
and instance.source != "researcher"
|
||||
and (instance.source is None or instance.source[:4] != "snow")
|
||||
):
|
||||
logger.info(f"Updating source to linked channel")
|
||||
logger.info("Updating source to linked channel")
|
||||
instance.source = obj.source
|
||||
instance.notes = obj.notes
|
||||
instance.category = obj.category
|
||||
@@ -364,7 +362,7 @@ class ETLController:
|
||||
|
||||
break
|
||||
|
||||
if handled == False:
|
||||
if not handled:
|
||||
logger.warning(
|
||||
f"No Transformer could handle ID {result.id} with platform {result.platform} ({result.date})"
|
||||
)
|
||||
@@ -465,7 +463,7 @@ class ETLController:
|
||||
session.commit()
|
||||
break
|
||||
|
||||
if handled == False:
|
||||
if not handled:
|
||||
logger.warning(
|
||||
f"No Transformer could handle raw channel info ID {result.id} with platform {result.platform} ({result.date_archived})"
|
||||
)
|
||||
@@ -547,7 +545,7 @@ class ETLController:
|
||||
session.commit()
|
||||
break
|
||||
|
||||
if handled == False:
|
||||
if not handled:
|
||||
logger.warning(
|
||||
f"No Transformer could handle ID {result.id} with platform {result.platform} ({result.date})"
|
||||
)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import json
|
||||
from loguru import logger
|
||||
from typing import Generator, Union, Callable
|
||||
from typing import Callable
|
||||
from datetime import datetime, timezone
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
@@ -12,10 +11,7 @@ from cisticola.base import (
|
||||
RawChannelInfo,
|
||||
ScraperResult,
|
||||
Post,
|
||||
Image,
|
||||
Video,
|
||||
Media,
|
||||
Channel,
|
||||
ChannelInfo,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
import json
|
||||
from loguru import logger
|
||||
from typing import Generator, Union, Callable
|
||||
import dateutil.parser
|
||||
from typing import Callable
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -15,9 +13,6 @@ from cisticola.base import (
|
||||
ChannelInfo,
|
||||
ScraperResult,
|
||||
Post,
|
||||
Image,
|
||||
Video,
|
||||
Media,
|
||||
Channel,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import json
|
||||
from loguru import logger
|
||||
from typing import Generator, Union, Callable, Optional
|
||||
from typing import Callable, Optional
|
||||
import dateutil.parser
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import func, JSON, String, cast, text
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from cisticola.transformer.base import Transformer
|
||||
@@ -12,10 +11,6 @@ from cisticola.base import (
|
||||
ChannelInfo,
|
||||
ScraperResult,
|
||||
Post,
|
||||
Image,
|
||||
Video,
|
||||
Media,
|
||||
Channel,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
import json
|
||||
from loguru import logger
|
||||
from typing import Generator, Union, Callable
|
||||
from typing import Callable
|
||||
import dateutil.parser
|
||||
from bs4 import BeautifulSoup
|
||||
from psycopg2 import DatabaseError
|
||||
import requests
|
||||
import time
|
||||
from telethon.sync import TelegramClient
|
||||
from telethon.errors.rpcerrorlist import ChannelPrivateError, ChannelInvalidError
|
||||
from telethon.tl import types
|
||||
@@ -24,10 +22,6 @@ from cisticola.base import (
|
||||
ChannelInfo,
|
||||
ScraperResult,
|
||||
Post,
|
||||
Image,
|
||||
Video,
|
||||
Audio,
|
||||
Media,
|
||||
Channel,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user