From 1ec1d6190a4b7b8002864a4fd77c0a99a4a43ef4 Mon Sep 17 00:00:00 2001 From: Tristan Lee Date: Mon, 7 Aug 2023 19:39:03 -0500 Subject: [PATCH] implemented minor fixes recommended by pyling (unused imports, f-strings without patterns, etc.) --- app.py | 15 +++++++-------- cisticola/base.py | 4 ++-- cisticola/scraper/bitchute.py | 1 - cisticola/transformer/base.py | 14 ++++++-------- cisticola/transformer/bitchute.py | 6 +----- cisticola/transformer/gettr.py | 7 +------ cisticola/transformer/rumble.py | 9 ++------- cisticola/transformer/telegram_telethon.py | 8 +------- 8 files changed, 20 insertions(+), 44 deletions(-) diff --git a/app.py b/app.py index 02a45b2..ecea78b 100644 --- a/app.py +++ b/app.py @@ -1,5 +1,4 @@ import argparse -from asyncio import streams from loguru import logger from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker @@ -83,28 +82,28 @@ def get_transformer_controller(args): def scrape_channels(args): - logger.info(f"Scraping channels") + logger.info("Scraping channels") controller = get_scraper_controller(args) controller.scrape_all_channels() def scrape_channels_old(args): - logger.info(f"Scraping old posts from channels") + logger.info("Scraping old posts from channels") controller = get_scraper_controller(args) controller.scrape_all_channels(fetch_old=True) def scrape_channel_info(args): - logger.info(f"Scraping channel info") + logger.info("Scraping channel info") controller = get_scraper_controller(args) controller.scrape_all_channel_info() def archive_media(args): - logger.info(f"Archiving unarchived media") + logger.info("Archiving unarchived media") controller = get_scraper_controller(args) @@ -115,7 +114,7 @@ def archive_media(args): def transform(args): - logger.info(f"Transforming untransformed posts") + logger.info("Transforming untransformed posts") controller = get_transformer_controller(args) @@ -128,7 +127,7 @@ def transform(args): def transform_info(args): - logger.info(f"Transforming untransformed channel info") + logger.info("Transforming untransformed channel info") controller = get_transformer_controller(args) controller.transform_all_untransformed_info() @@ -137,7 +136,7 @@ def transform_info(args): def transform_media(args): - logger.info(f"Transforming untransformed channel media") + logger.info("Transforming untransformed channel media") controller = get_transformer_controller(args) controller.transform_all_untransformed_media() diff --git a/cisticola/base.py b/cisticola/base.py index 665762d..a477a26 100644 --- a/cisticola/base.py +++ b/cisticola/base.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional from dataclasses import dataclass, field from datetime import datetime import tempfile @@ -22,7 +22,7 @@ import pytesseract import PIL import exiftool import re -from langdetect import detect, DetectorFactory +from langdetect import detect from langdetect.lang_detect_exception import LangDetectException from loguru import logger import spacy diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py index e624e69..1143963 100644 --- a/cisticola/scraper/bitchute.py +++ b/cisticola/scraper/bitchute.py @@ -5,7 +5,6 @@ from html.parser import HTMLParser import dateparser import json from typing import Generator, Optional -from dateutil.relativedelta import relativedelta import requests from bs4 import BeautifulSoup diff --git a/cisticola/transformer/base.py b/cisticola/transformer/base.py index 7910159..50e3b40 100644 --- a/cisticola/transformer/base.py +++ b/cisticola/transformer/base.py @@ -1,10 +1,8 @@ -from typing import List, Generator, Union, Callable +from typing import List, Callable from loguru import logger from sqlalchemy import cast, String -from sqlalchemy.orm import sessionmaker, make_transient, Session +from sqlalchemy.orm import sessionmaker, Session from sqlalchemy.engine.base import Engine -from sqlalchemy.sql.expression import func -from collections import defaultdict from datetime import datetime, timezone from cisticola.base import ( @@ -295,7 +293,7 @@ class ETLController: and instance.source != "researcher" and (instance.source is None or instance.source[:4] != "snow") ): - logger.info(f"Updating source to linked channel") + logger.info("Updating source to linked channel") instance.source = obj.source instance.notes = obj.notes instance.category = obj.category @@ -364,7 +362,7 @@ class ETLController: break - if handled == False: + if not handled: logger.warning( f"No Transformer could handle ID {result.id} with platform {result.platform} ({result.date})" ) @@ -465,7 +463,7 @@ class ETLController: session.commit() break - if handled == False: + if not handled: logger.warning( f"No Transformer could handle raw channel info ID {result.id} with platform {result.platform} ({result.date_archived})" ) @@ -547,7 +545,7 @@ class ETLController: session.commit() break - if handled == False: + if not handled: logger.warning( f"No Transformer could handle ID {result.id} with platform {result.platform} ({result.date})" ) diff --git a/cisticola/transformer/bitchute.py b/cisticola/transformer/bitchute.py index 617130a..ca3ec40 100644 --- a/cisticola/transformer/bitchute.py +++ b/cisticola/transformer/bitchute.py @@ -1,6 +1,5 @@ import json -from loguru import logger -from typing import Generator, Union, Callable +from typing import Callable from datetime import datetime, timezone from dateutil.relativedelta import relativedelta @@ -12,10 +11,7 @@ from cisticola.base import ( RawChannelInfo, ScraperResult, Post, - Image, Video, - Media, - Channel, ChannelInfo, ) diff --git a/cisticola/transformer/gettr.py b/cisticola/transformer/gettr.py index 53d7c80..da6e0ce 100644 --- a/cisticola/transformer/gettr.py +++ b/cisticola/transformer/gettr.py @@ -1,7 +1,5 @@ import json -from loguru import logger -from typing import Generator, Union, Callable -import dateutil.parser +from typing import Callable from datetime import datetime, timezone from sqlalchemy import func from sqlalchemy.orm import Session @@ -15,9 +13,6 @@ from cisticola.base import ( ChannelInfo, ScraperResult, Post, - Image, - Video, - Media, Channel, ) diff --git a/cisticola/transformer/rumble.py b/cisticola/transformer/rumble.py index ad6fdd5..4464d5d 100644 --- a/cisticola/transformer/rumble.py +++ b/cisticola/transformer/rumble.py @@ -1,9 +1,8 @@ import json -from loguru import logger -from typing import Generator, Union, Callable, Optional +from typing import Callable, Optional import dateutil.parser from datetime import datetime, timezone -from sqlalchemy import func, JSON, String, cast, text +from sqlalchemy import text from sqlalchemy.orm import Session from cisticola.transformer.base import Transformer @@ -12,10 +11,6 @@ from cisticola.base import ( ChannelInfo, ScraperResult, Post, - Image, - Video, - Media, - Channel, ) diff --git a/cisticola/transformer/telegram_telethon.py b/cisticola/transformer/telegram_telethon.py index f350de5..11183fb 100644 --- a/cisticola/transformer/telegram_telethon.py +++ b/cisticola/transformer/telegram_telethon.py @@ -1,11 +1,9 @@ import json from loguru import logger -from typing import Generator, Union, Callable +from typing import Callable import dateutil.parser from bs4 import BeautifulSoup -from psycopg2 import DatabaseError import requests -import time from telethon.sync import TelegramClient from telethon.errors.rpcerrorlist import ChannelPrivateError, ChannelInvalidError from telethon.tl import types @@ -24,10 +22,6 @@ from cisticola.base import ( ChannelInfo, ScraperResult, Post, - Image, - Video, - Audio, - Media, Channel, )