Load channels from google sheet in test.py

This commit is contained in:
Logan Williams
2022-03-22 11:37:47 +01:00
parent 571b019137
commit c0a094eefa
3 changed files with 40 additions and 21 deletions

4
.gitignore vendored
View File

@@ -9,10 +9,12 @@ docs/source/_*
*.db
.env
*.session
service_account.json
.vscode/
# Unit test / coverage reports
reports
.coverage
.cache
.pytest_cache/
cover/
cover/

View File

@@ -21,12 +21,17 @@ telethon = "*"
pytesseract = "*"
pyexiftool = {git = "https://github.com/smarnach/pyexiftool.git"}
instaloader = "*"
gspread = "*"
[dev-packages]
pytest = "*"
pytest-cov = "*"
pytest-html = "*"
pytest-metadata = "*"
black = "*"
[requires]
python_version = "3.9"
[pipenv]
allow_prereleases = true

52
test.py
View File

@@ -1,7 +1,10 @@
from sqlalchemy import create_engine
from loguru import logger
import gspread
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from cisticola.base import Channel, TransformedResult, ScraperResult
from cisticola.base import Channel, TransformedResult, ScraperResult, mapper_registry
from cisticola.scraper import (
ScraperController,
BitchuteScraper,
@@ -14,26 +17,9 @@ from cisticola.scraper import (
TwitterScraper)
from cisticola.transformer import ETLController
from cisticola.transformer.twitter import TwitterTransformer
from sqlalchemy.orm import sessionmaker
logger.add("../test.log")
test_channels = [
Channel(
id=0,
name="L Weber (test)",
platform_id=1424979017749442595,
category="test",
followers=None,
platform="Twitter",
url="https://twitter.com/LWeber33662141",
screenname="LWeber33662141",
country="US",
influencer=None,
public=True,
chat=False,
notes="")]
controller = ScraperController()
scrapers = [
@@ -49,9 +35,35 @@ scrapers = [
controller.register_scrapers(scrapers)
engine = create_engine('sqlite:///test.db')
controller.connect_to_db(engine)
mapper_registry.metadata.create_all(bind=engine)
session_generator = sessionmaker()
session_generator.configure(bind=engine)
session = session_generator()
controller.scrape_channels(test_channels, archive_media = True)
gc = gspread.service_account(filename='service_account.json')
# Open a sheet from a spreadsheet in one go
wks = gc.open_by_url("https://docs.google.com/spreadsheets/d/1yxd6-2Mp0jZ8r9XJklb39WE-iIMrKRyA2kymJcIfGis/edit#gid=0")
channels = wks.worksheet("channels").get_all_records()
for c in channels:
del c['followers']
for k in c.keys():
if c[k] == 'TRUE': c[k] = True
if c[k] == 'FALSE': c[k] = False
# check to see if this already exists,
channel = session.query(Channel).filter_by(platform_id=c['platform_id'], platform=c['platform']).first()
if not channel:
channel = Channel(**c, source='researcher')
session.add(channel)
session.commit()
controller.connect_to_db(engine)
controller.scrape_all_channels(archive_media = True)
transformer = TwitterTransformer()