From 1eb82c5f3e567db9de550c82aac929768631583d Mon Sep 17 00:00:00 2001 From: Tristan Lee Date: Mon, 7 Aug 2023 20:04:16 -0500 Subject: [PATCH] sorted imports using isort and tried to add pre-commit hook for isort --- .github/workflows/black.yml | 10 ------ .github/workflows/lint.yml | 14 ++++++++ .pre-commit-config.yaml | 6 ++++ app.py | 18 +++++----- cisticola/__init__.py | 4 +-- cisticola/base.py | 42 +++++++++++----------- cisticola/scraper/__init__.py | 3 +- cisticola/scraper/base.py | 16 ++++----- cisticola/scraper/bitchute.py | 12 +++---- cisticola/scraper/gettr.py | 6 ++-- cisticola/scraper/rumble.py | 8 ++--- cisticola/scraper/telegram_telethon.py | 12 +++---- cisticola/transformer/__init__.py | 4 +-- cisticola/transformer/base.py | 29 +++++++-------- cisticola/transformer/bitchute.py | 12 ++----- cisticola/transformer/gettr.py | 14 +++----- cisticola/transformer/rumble.py | 12 +++---- cisticola/transformer/telegram_telethon.py | 31 +++++++--------- cisticola/utils.py | 3 +- sync_with_gsheet.py | 7 ++-- telethon_session_init.py | 3 +- tests/base.py | 8 ++--- tests/conftest.py | 2 +- 23 files changed, 133 insertions(+), 143 deletions(-) delete mode 100644 .github/workflows/black.yml create mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml deleted file mode 100644 index f61ad14..0000000 --- a/.github/workflows/black.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: Lint - -on: [push] - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: psf/black@stable \ No newline at end of file diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..7722037 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,14 @@ +name: Lint + +on: [push] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Run isort + - uses: actions/checkout@v3 + - uses: isort/isort-action@v1 + - name: Run black + - uses: actions/checkout@v3 + - uses: psf/black@stable \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 29fca09..3f080d4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,10 @@ repos: + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + name: isort (python) + args: ["--profile", "black"] - repo: https://github.com/psf/black rev: 22.3.0 hooks: diff --git a/app.py b/app.py index ecea78b..2622637 100644 --- a/app.py +++ b/app.py @@ -1,27 +1,27 @@ import argparse -from loguru import logger -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker import datetime import os import sys +from loguru import logger +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + from cisticola.base import mapper_registry from cisticola.scraper import ( + BitchuteScraper, + GettrScraper, + RumbleScraper, ScraperController, TelegramTelethonScraper, - GettrScraper, - BitchuteScraper, - RumbleScraper, ) from cisticola.transformer import ( + BitchuteTransformer, ETLController, - TelegramTelethonTransformer, GettrTransformer, RumbleTransformer, - BitchuteTransformer, + TelegramTelethonTransformer, ) - from sync_with_gsheet import sync_channels diff --git a/cisticola/__init__.py b/cisticola/__init__.py index f553d9a..2e2b975 100644 --- a/cisticola/__init__.py +++ b/cisticola/__init__.py @@ -1,3 +1 @@ -from . import base -from . import scraper -from . import transformer +from . import base, scraper, transformer diff --git a/cisticola/base.py b/cisticola/base.py index a477a26..4846b89 100644 --- a/cisticola/base.py +++ b/cisticola/base.py @@ -1,31 +1,31 @@ -from typing import Optional +import io +import json +import re +import tempfile from dataclasses import dataclass, field from datetime import datetime -import tempfile -import json -import io +from typing import Optional -from sqlalchemy.orm import registry -from sqlalchemy import ( - Table, - Column, - Integer, - String, - JSON, - DateTime, - ForeignKey, - Boolean, - Index, -) -from sqlalchemy.dialects.postgresql import JSONB -import pytesseract -import PIL import exiftool -import re +import PIL +import pytesseract +import spacy from langdetect import detect from langdetect.lang_detect_exception import LangDetectException from loguru import logger -import spacy +from sqlalchemy import ( + JSON, + Boolean, + Column, + DateTime, + ForeignKey, + Index, + Integer, + String, + Table, +) +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import registry from .utils import make_request diff --git a/cisticola/scraper/__init__.py b/cisticola/scraper/__init__.py index 826de0a..6a7167d 100644 --- a/cisticola/scraper/__init__.py +++ b/cisticola/scraper/__init__.py @@ -1,5 +1,6 @@ from cisticola.utils import make_request -from .base import Scraper, ScraperController, ChannelDoesNotExistError + +from .base import ChannelDoesNotExistError, Scraper, ScraperController from .bitchute import BitchuteScraper from .gettr import GettrScraper from .rumble import RumbleScraper diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index d784dce..ab576e3 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -1,19 +1,19 @@ -from typing import Generator, Tuple, List, Optional import os -from io import BytesIO -from urllib.parse import urlparse import tempfile from datetime import datetime, timezone +from io import BytesIO +from pathlib import Path +from typing import Generator, List, Optional, Tuple +from urllib.parse import urlparse import boto3 -from loguru import logger import ffmpeg -from sqlalchemy.orm import sessionmaker import yt_dlp -from sqlalchemy.sql.expression import func -from sqlalchemy.orm.session import close_all_sessions -from pathlib import Path +from loguru import logger from sqlalchemy import nullsfirst +from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm.session import close_all_sessions +from sqlalchemy.sql.expression import func from cisticola.base import Channel, RawChannelInfo, ScraperResult, mapper_registry from cisticola.utils import make_request diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py index 1143963..c24390a 100644 --- a/cisticola/scraper/bitchute.py +++ b/cisticola/scraper/bitchute.py @@ -1,16 +1,16 @@ -from datetime import datetime, timezone -import time -import re -from html.parser import HTMLParser -import dateparser import json +import re +import time +from datetime import datetime, timezone +from html.parser import HTMLParser from typing import Generator, Optional +import dateparser import requests from bs4 import BeautifulSoup from loguru import logger -from cisticola.base import Channel, ScraperResult, RawChannelInfo +from cisticola.base import Channel, RawChannelInfo, ScraperResult from cisticola.scraper.base import Scraper diff --git a/cisticola/scraper/gettr.py b/cisticola/scraper/gettr.py index 56927bc..af26c68 100644 --- a/cisticola/scraper/gettr.py +++ b/cisticola/scraper/gettr.py @@ -1,12 +1,12 @@ -from datetime import datetime, timezone import json +from datetime import datetime, timezone from typing import Generator, Optional from urllib.parse import urlparse -from loguru import logger from gogettr import PublicClient +from loguru import logger -from cisticola.base import Channel, ScraperResult, RawChannelInfo +from cisticola.base import Channel, RawChannelInfo, ScraperResult from cisticola.scraper.base import Scraper diff --git a/cisticola/scraper/rumble.py b/cisticola/scraper/rumble.py index aeb7a45..7381c3d 100644 --- a/cisticola/scraper/rumble.py +++ b/cisticola/scraper/rumble.py @@ -1,13 +1,13 @@ -from datetime import datetime, timezone import json +import os +from datetime import datetime, timezone from typing import Generator, Optional from urllib.parse import urlparse -from loguru import logger from bs4 import BeautifulSoup -import os +from loguru import logger -from cisticola.base import Channel, ScraperResult, RawChannelInfo +from cisticola.base import Channel, RawChannelInfo, ScraperResult from cisticola.scraper import Scraper, make_request BASE_URL = "https://rumble.com" diff --git a/cisticola/scraper/telegram_telethon.py b/cisticola/scraper/telegram_telethon.py index 8791227..3ff7d7a 100644 --- a/cisticola/scraper/telegram_telethon.py +++ b/cisticola/scraper/telegram_telethon.py @@ -1,17 +1,17 @@ -from typing import Generator, Optional -from datetime import datetime, timezone -import os import json +import os import tempfile -from pathlib import Path import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Generator, Optional from loguru import logger from telethon.sync import TelegramClient -from telethon.tl.functions.channels import GetFullChannelRequest from telethon.tl import types +from telethon.tl.functions.channels import GetFullChannelRequest -from cisticola.base import Channel, ScraperResult, RawChannelInfo +from cisticola.base import Channel, RawChannelInfo, ScraperResult from cisticola.scraper.base import Scraper MEDIA_TYPES = ["photo", "video", "document", "webpage"] diff --git a/cisticola/transformer/__init__.py b/cisticola/transformer/__init__.py index 0c54752..4e90918 100644 --- a/cisticola/transformer/__init__.py +++ b/cisticola/transformer/__init__.py @@ -1,5 +1,5 @@ from .base import ETLController from .bitchute import BitchuteTransformer -from .telegram_telethon import TelegramTelethonTransformer -from .rumble import RumbleTransformer from .gettr import GettrTransformer +from .rumble import RumbleTransformer +from .telegram_telethon import TelegramTelethonTransformer diff --git a/cisticola/transformer/base.py b/cisticola/transformer/base.py index 50e3b40..e1edc02 100644 --- a/cisticola/transformer/base.py +++ b/cisticola/transformer/base.py @@ -1,21 +1,22 @@ -from typing import List, Callable -from loguru import logger -from sqlalchemy import cast, String -from sqlalchemy.orm import sessionmaker, Session -from sqlalchemy.engine.base import Engine from datetime import datetime, timezone +from typing import Callable, List + +from loguru import logger +from sqlalchemy import String, cast +from sqlalchemy.engine.base import Engine +from sqlalchemy.orm import Session, sessionmaker from cisticola.base import ( - RawChannelInfo, - ChannelInfo, - ScraperResult, - Post, - Media, - Channel, - mapper_registry, - Image, - Video, Audio, + Channel, + ChannelInfo, + Image, + Media, + Post, + RawChannelInfo, + ScraperResult, + Video, + mapper_registry, ) diff --git a/cisticola/transformer/bitchute.py b/cisticola/transformer/bitchute.py index ca3ec40..889e7cb 100644 --- a/cisticola/transformer/bitchute.py +++ b/cisticola/transformer/bitchute.py @@ -1,19 +1,13 @@ import json -from typing import Callable from datetime import datetime, timezone -from dateutil.relativedelta import relativedelta +from typing import Callable from bs4 import BeautifulSoup +from dateutil.relativedelta import relativedelta from sqlalchemy.orm import Session +from cisticola.base import ChannelInfo, Post, RawChannelInfo, ScraperResult, Video from cisticola.transformer.base import Transformer -from cisticola.base import ( - RawChannelInfo, - ScraperResult, - Post, - Video, - ChannelInfo, -) class BitchuteTransformer(Transformer): diff --git a/cisticola/transformer/gettr.py b/cisticola/transformer/gettr.py index da6e0ce..dc2fb1c 100644 --- a/cisticola/transformer/gettr.py +++ b/cisticola/transformer/gettr.py @@ -1,20 +1,14 @@ import json -from typing import Callable from datetime import datetime, timezone -from sqlalchemy import func -from sqlalchemy.orm import Session +from typing import Callable from gogettr import PublicClient from gogettr.api import GettrApiError +from sqlalchemy import func +from sqlalchemy.orm import Session +from cisticola.base import Channel, ChannelInfo, Post, RawChannelInfo, ScraperResult from cisticola.transformer.base import Transformer -from cisticola.base import ( - RawChannelInfo, - ChannelInfo, - ScraperResult, - Post, - Channel, -) class GettrTransformer(Transformer): diff --git a/cisticola/transformer/rumble.py b/cisticola/transformer/rumble.py index 4464d5d..4847f48 100644 --- a/cisticola/transformer/rumble.py +++ b/cisticola/transformer/rumble.py @@ -1,17 +1,13 @@ import json -from typing import Callable, Optional -import dateutil.parser from datetime import datetime, timezone +from typing import Callable, Optional + +import dateutil.parser from sqlalchemy import text from sqlalchemy.orm import Session +from cisticola.base import ChannelInfo, Post, RawChannelInfo, ScraperResult from cisticola.transformer.base import Transformer -from cisticola.base import ( - RawChannelInfo, - ChannelInfo, - ScraperResult, - Post, -) class RumbleTransformer(Transformer): diff --git a/cisticola/transformer/telegram_telethon.py b/cisticola/transformer/telegram_telethon.py index 11183fb..8585cc3 100644 --- a/cisticola/transformer/telegram_telethon.py +++ b/cisticola/transformer/telegram_telethon.py @@ -1,29 +1,22 @@ import json -from loguru import logger -from typing import Callable -import dateutil.parser -from bs4 import BeautifulSoup -import requests -from telethon.sync import TelegramClient -from telethon.errors.rpcerrorlist import ChannelPrivateError, ChannelInvalidError -from telethon.tl import types -from telethon.helpers import add_surrogate, del_surrogate -from itertools import takewhile - import os from datetime import datetime, timezone +from itertools import takewhile +from typing import Callable + +import dateutil.parser +import requests +from bs4 import BeautifulSoup +from loguru import logger from sqlalchemy import func from sqlalchemy.orm import Session +from telethon.errors.rpcerrorlist import ChannelInvalidError, ChannelPrivateError +from telethon.helpers import add_surrogate, del_surrogate +from telethon.sync import TelegramClient +from telethon.tl import types - +from cisticola.base import Channel, ChannelInfo, Post, RawChannelInfo, ScraperResult from cisticola.transformer.base import Transformer -from cisticola.base import ( - RawChannelInfo, - ChannelInfo, - ScraperResult, - Post, - Channel, -) class TelegramTelethonTransformer(Transformer): diff --git a/cisticola/utils.py b/cisticola/utils.py index 815d927..f4d7465 100644 --- a/cisticola/utils.py +++ b/cisticola/utils.py @@ -1,6 +1,7 @@ +import time + import requests from loguru import logger -import time def make_request(url, headers=None, max_retries=5, break_codes=None): diff --git a/sync_with_gsheet.py b/sync_with_gsheet.py index 0efd56c..9b40f1c 100644 --- a/sync_with_gsheet.py +++ b/sync_with_gsheet.py @@ -1,7 +1,8 @@ -import gspread -import time -from loguru import logger import os +import time + +import gspread +from loguru import logger from cisticola.base import Channel, ChannelInfo diff --git a/telethon_session_init.py b/telethon_session_init.py index 950c35c..895ba21 100644 --- a/telethon_session_init.py +++ b/telethon_session_init.py @@ -1,7 +1,8 @@ import argparse -from telethon.sync import TelegramClient import os +from telethon.sync import TelegramClient + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Cisticola command line tools") parser.add_argument("--telethon_session", type=str) diff --git a/tests/base.py b/tests/base.py index 16259bb..3c6e39c 100644 --- a/tests/base.py +++ b/tests/base.py @@ -2,24 +2,24 @@ import pytest from sqlalchemy.sql import text from cisticola.base import ( - Post, Channel, ChannelInfo, Media, - ScraperResult, + Post, RawChannelInfo, + ScraperResult, ) from cisticola.scraper import ( - TelegramTelethonScraper, BitchuteScraper, GettrScraper, RumbleScraper, + TelegramTelethonScraper, ) from cisticola.transformer import ( - TelegramTelethonTransformer, BitchuteTransformer, GettrTransformer, RumbleTransformer, + TelegramTelethonTransformer, ) CONTROLLERS = { diff --git a/tests/conftest.py b/tests/conftest.py index c890d00..da07ae6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ import os -import pytest +import pytest from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker