mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-07 19:08:35 +03:00
sorted imports using isort and tried to add pre-commit hook for isort
This commit is contained in:
10
.github/workflows/black.yml
vendored
10
.github/workflows/black.yml
vendored
@@ -1,10 +0,0 @@
|
||||
name: Lint
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: psf/black@stable
|
||||
14
.github/workflows/lint.yml
vendored
Normal file
14
.github/workflows/lint.yml
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
name: Lint
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Run isort
|
||||
- uses: actions/checkout@v3
|
||||
- uses: isort/isort-action@v1
|
||||
- name: Run black
|
||||
- uses: actions/checkout@v3
|
||||
- uses: psf/black@stable
|
||||
@@ -1,4 +1,10 @@
|
||||
repos:
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 5.12.0
|
||||
hooks:
|
||||
- id: isort
|
||||
name: isort (python)
|
||||
args: ["--profile", "black"]
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.3.0
|
||||
hooks:
|
||||
|
||||
18
app.py
18
app.py
@@ -1,27 +1,27 @@
|
||||
import argparse
|
||||
from loguru import logger
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import datetime
|
||||
import os
|
||||
import sys
|
||||
|
||||
from loguru import logger
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from cisticola.base import mapper_registry
|
||||
from cisticola.scraper import (
|
||||
BitchuteScraper,
|
||||
GettrScraper,
|
||||
RumbleScraper,
|
||||
ScraperController,
|
||||
TelegramTelethonScraper,
|
||||
GettrScraper,
|
||||
BitchuteScraper,
|
||||
RumbleScraper,
|
||||
)
|
||||
from cisticola.transformer import (
|
||||
BitchuteTransformer,
|
||||
ETLController,
|
||||
TelegramTelethonTransformer,
|
||||
GettrTransformer,
|
||||
RumbleTransformer,
|
||||
BitchuteTransformer,
|
||||
TelegramTelethonTransformer,
|
||||
)
|
||||
|
||||
from sync_with_gsheet import sync_channels
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1 @@
|
||||
from . import base
|
||||
from . import scraper
|
||||
from . import transformer
|
||||
from . import base, scraper, transformer
|
||||
|
||||
@@ -1,31 +1,31 @@
|
||||
from typing import Optional
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import tempfile
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
import tempfile
|
||||
import json
|
||||
import io
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy.orm import registry
|
||||
from sqlalchemy import (
|
||||
Table,
|
||||
Column,
|
||||
Integer,
|
||||
String,
|
||||
JSON,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
Boolean,
|
||||
Index,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
import pytesseract
|
||||
import PIL
|
||||
import exiftool
|
||||
import re
|
||||
import PIL
|
||||
import pytesseract
|
||||
import spacy
|
||||
from langdetect import detect
|
||||
from langdetect.lang_detect_exception import LangDetectException
|
||||
from loguru import logger
|
||||
import spacy
|
||||
from sqlalchemy import (
|
||||
JSON,
|
||||
Boolean,
|
||||
Column,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
Index,
|
||||
Integer,
|
||||
String,
|
||||
Table,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import registry
|
||||
|
||||
from .utils import make_request
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from cisticola.utils import make_request
|
||||
from .base import Scraper, ScraperController, ChannelDoesNotExistError
|
||||
|
||||
from .base import ChannelDoesNotExistError, Scraper, ScraperController
|
||||
from .bitchute import BitchuteScraper
|
||||
from .gettr import GettrScraper
|
||||
from .rumble import RumbleScraper
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
from typing import Generator, Tuple, List, Optional
|
||||
import os
|
||||
from io import BytesIO
|
||||
from urllib.parse import urlparse
|
||||
import tempfile
|
||||
from datetime import datetime, timezone
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Generator, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import boto3
|
||||
from loguru import logger
|
||||
import ffmpeg
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import yt_dlp
|
||||
from sqlalchemy.sql.expression import func
|
||||
from sqlalchemy.orm.session import close_all_sessions
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
from sqlalchemy import nullsfirst
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.orm.session import close_all_sessions
|
||||
from sqlalchemy.sql.expression import func
|
||||
|
||||
from cisticola.base import Channel, RawChannelInfo, ScraperResult, mapper_registry
|
||||
from cisticola.utils import make_request
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
from datetime import datetime, timezone
|
||||
import time
|
||||
import re
|
||||
from html.parser import HTMLParser
|
||||
import dateparser
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from html.parser import HTMLParser
|
||||
from typing import Generator, Optional
|
||||
|
||||
import dateparser
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
|
||||
from cisticola.base import Channel, ScraperResult, RawChannelInfo
|
||||
from cisticola.base import Channel, RawChannelInfo, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Generator, Optional
|
||||
from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
from gogettr import PublicClient
|
||||
from loguru import logger
|
||||
|
||||
from cisticola.base import Channel, ScraperResult, RawChannelInfo
|
||||
from cisticola.base import Channel, RawChannelInfo, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import Generator, Optional
|
||||
from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
from loguru import logger
|
||||
|
||||
from cisticola.base import Channel, ScraperResult, RawChannelInfo
|
||||
from cisticola.base import Channel, RawChannelInfo, ScraperResult
|
||||
from cisticola.scraper import Scraper, make_request
|
||||
|
||||
BASE_URL = "https://rumble.com"
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
from typing import Generator, Optional
|
||||
from datetime import datetime, timezone
|
||||
import os
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Generator, Optional
|
||||
|
||||
from loguru import logger
|
||||
from telethon.sync import TelegramClient
|
||||
from telethon.tl.functions.channels import GetFullChannelRequest
|
||||
from telethon.tl import types
|
||||
from telethon.tl.functions.channels import GetFullChannelRequest
|
||||
|
||||
from cisticola.base import Channel, ScraperResult, RawChannelInfo
|
||||
from cisticola.base import Channel, RawChannelInfo, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
|
||||
MEDIA_TYPES = ["photo", "video", "document", "webpage"]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .base import ETLController
|
||||
from .bitchute import BitchuteTransformer
|
||||
from .telegram_telethon import TelegramTelethonTransformer
|
||||
from .rumble import RumbleTransformer
|
||||
from .gettr import GettrTransformer
|
||||
from .rumble import RumbleTransformer
|
||||
from .telegram_telethon import TelegramTelethonTransformer
|
||||
|
||||
@@ -1,21 +1,22 @@
|
||||
from typing import List, Callable
|
||||
from loguru import logger
|
||||
from sqlalchemy import cast, String
|
||||
from sqlalchemy.orm import sessionmaker, Session
|
||||
from sqlalchemy.engine.base import Engine
|
||||
from datetime import datetime, timezone
|
||||
from typing import Callable, List
|
||||
|
||||
from loguru import logger
|
||||
from sqlalchemy import String, cast
|
||||
from sqlalchemy.engine.base import Engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from cisticola.base import (
|
||||
RawChannelInfo,
|
||||
ChannelInfo,
|
||||
ScraperResult,
|
||||
Post,
|
||||
Media,
|
||||
Channel,
|
||||
mapper_registry,
|
||||
Image,
|
||||
Video,
|
||||
Audio,
|
||||
Channel,
|
||||
ChannelInfo,
|
||||
Image,
|
||||
Media,
|
||||
Post,
|
||||
RawChannelInfo,
|
||||
ScraperResult,
|
||||
Video,
|
||||
mapper_registry,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,19 +1,13 @@
|
||||
import json
|
||||
from typing import Callable
|
||||
from datetime import datetime, timezone
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from typing import Callable
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from cisticola.base import ChannelInfo, Post, RawChannelInfo, ScraperResult, Video
|
||||
from cisticola.transformer.base import Transformer
|
||||
from cisticola.base import (
|
||||
RawChannelInfo,
|
||||
ScraperResult,
|
||||
Post,
|
||||
Video,
|
||||
ChannelInfo,
|
||||
)
|
||||
|
||||
|
||||
class BitchuteTransformer(Transformer):
|
||||
|
||||
@@ -1,20 +1,14 @@
|
||||
import json
|
||||
from typing import Callable
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Callable
|
||||
|
||||
from gogettr import PublicClient
|
||||
from gogettr.api import GettrApiError
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from cisticola.base import Channel, ChannelInfo, Post, RawChannelInfo, ScraperResult
|
||||
from cisticola.transformer.base import Transformer
|
||||
from cisticola.base import (
|
||||
RawChannelInfo,
|
||||
ChannelInfo,
|
||||
ScraperResult,
|
||||
Post,
|
||||
Channel,
|
||||
)
|
||||
|
||||
|
||||
class GettrTransformer(Transformer):
|
||||
|
||||
@@ -1,17 +1,13 @@
|
||||
import json
|
||||
from typing import Callable, Optional
|
||||
import dateutil.parser
|
||||
from datetime import datetime, timezone
|
||||
from typing import Callable, Optional
|
||||
|
||||
import dateutil.parser
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from cisticola.base import ChannelInfo, Post, RawChannelInfo, ScraperResult
|
||||
from cisticola.transformer.base import Transformer
|
||||
from cisticola.base import (
|
||||
RawChannelInfo,
|
||||
ChannelInfo,
|
||||
ScraperResult,
|
||||
Post,
|
||||
)
|
||||
|
||||
|
||||
class RumbleTransformer(Transformer):
|
||||
|
||||
@@ -1,29 +1,22 @@
|
||||
import json
|
||||
from loguru import logger
|
||||
from typing import Callable
|
||||
import dateutil.parser
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from telethon.sync import TelegramClient
|
||||
from telethon.errors.rpcerrorlist import ChannelPrivateError, ChannelInvalidError
|
||||
from telethon.tl import types
|
||||
from telethon.helpers import add_surrogate, del_surrogate
|
||||
from itertools import takewhile
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from itertools import takewhile
|
||||
from typing import Callable
|
||||
|
||||
import dateutil.parser
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
from telethon.errors.rpcerrorlist import ChannelInvalidError, ChannelPrivateError
|
||||
from telethon.helpers import add_surrogate, del_surrogate
|
||||
from telethon.sync import TelegramClient
|
||||
from telethon.tl import types
|
||||
|
||||
|
||||
from cisticola.base import Channel, ChannelInfo, Post, RawChannelInfo, ScraperResult
|
||||
from cisticola.transformer.base import Transformer
|
||||
from cisticola.base import (
|
||||
RawChannelInfo,
|
||||
ChannelInfo,
|
||||
ScraperResult,
|
||||
Post,
|
||||
Channel,
|
||||
)
|
||||
|
||||
|
||||
class TelegramTelethonTransformer(Transformer):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import time
|
||||
|
||||
import requests
|
||||
from loguru import logger
|
||||
import time
|
||||
|
||||
|
||||
def make_request(url, headers=None, max_retries=5, break_codes=None):
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import gspread
|
||||
import time
|
||||
from loguru import logger
|
||||
import os
|
||||
import time
|
||||
|
||||
import gspread
|
||||
from loguru import logger
|
||||
|
||||
from cisticola.base import Channel, ChannelInfo
|
||||
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import argparse
|
||||
from telethon.sync import TelegramClient
|
||||
import os
|
||||
|
||||
from telethon.sync import TelegramClient
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Cisticola command line tools")
|
||||
parser.add_argument("--telethon_session", type=str)
|
||||
|
||||
@@ -2,24 +2,24 @@ import pytest
|
||||
from sqlalchemy.sql import text
|
||||
|
||||
from cisticola.base import (
|
||||
Post,
|
||||
Channel,
|
||||
ChannelInfo,
|
||||
Media,
|
||||
ScraperResult,
|
||||
Post,
|
||||
RawChannelInfo,
|
||||
ScraperResult,
|
||||
)
|
||||
from cisticola.scraper import (
|
||||
TelegramTelethonScraper,
|
||||
BitchuteScraper,
|
||||
GettrScraper,
|
||||
RumbleScraper,
|
||||
TelegramTelethonScraper,
|
||||
)
|
||||
from cisticola.transformer import (
|
||||
TelegramTelethonTransformer,
|
||||
BitchuteTransformer,
|
||||
GettrTransformer,
|
||||
RumbleTransformer,
|
||||
TelegramTelethonTransformer,
|
||||
)
|
||||
|
||||
CONTROLLERS = {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
import pytest
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
|
||||
Reference in New Issue
Block a user