From 5783206ad87f20c1ed09da77bac0c0081ba9a579 Mon Sep 17 00:00:00 2001 From: Tristan Lee Date: Thu, 10 Mar 2022 10:20:49 -0600 Subject: [PATCH] implemented method to reset database, to enable the 'contoller' fixture scope to be shared across the whole package, which will enable the transformer tests to be run without re-running the scrapers --- Pipfile.lock | 67 ++++++++++++++++++++++++++---- cisticola/scraper/base.py | 9 +++- tests/conftest.py | 2 +- tests/scraper/bitchute.py | 2 + tests/scraper/gab.py | 2 + tests/scraper/gettr.py | 2 + tests/scraper/odysee.py | 2 + tests/scraper/rumble.py | 2 + tests/scraper/telegram_snscrape.py | 2 + tests/scraper/telegram_telethon.py | 2 + tests/scraper/twitter.py | 2 + 11 files changed, 85 insertions(+), 9 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index 0ca0eda..5a75176 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "3d293e1f3802d64ae7a8fbfc4c1d742cc33cd4c520a6263f93e566f89faa7013" + "sha256": "afacc6dd45c110f235861c54db45f5546fb0095f4e68a1084e85fd0e902db21c" }, "pipfile-spec": 6, "requires": { @@ -49,19 +49,19 @@ }, "boto3": { "hashes": [ - "sha256:30394729b38d5ce2f845440428a55161c6d45478044e553a12ca1acf56d7278a", - "sha256:895489900eb882777124c3b64a13df49785cf77f7bd1504e783464fb3b4c8163" + "sha256:15fa6d1acac422d2d34f7811e02acfc7ac222cea24db3f463d5c52f2f87baa52", + "sha256:c974a7fa781c500b7067441f9883ed939cf8c80bcd74c88b11965b336cabb4b6" ], "index": "pypi", - "version": "==1.21.15" + "version": "==1.21.16" }, "botocore": { "hashes": [ - "sha256:405082f92a9e524e1aee96cbc90134668026d7da3c12f86990c91a12620ca28b", - "sha256:fa4816e94e72111a9341204061e760bcbde74ca5d900d3f2206c2c2e8e4b56e4" + "sha256:0a809efb821d81dc29f2e6c404ed123176b8d2eb43103758f31d89b291af2a8b", + "sha256:dcff7f9b5fea98701d0b520eba99385c538825f10e6d1cab1e7da213293d141e" ], "markers": "python_version >= '3.6'", - "version": "==1.24.15" + "version": "==1.24.16" }, "bs4": { "hashes": [ @@ -436,6 +436,47 @@ "markers": "python_version >= '3.8'", "version": "==1.4.1" }, + "pillow": { + "hashes": [ + "sha256:011233e0c42a4a7836498e98c1acf5e744c96a67dd5032a6f666cc1fb97eab97", + "sha256:0f29d831e2151e0b7b39981756d201f7108d3d215896212ffe2e992d06bfe049", + "sha256:12875d118f21cf35604176872447cdb57b07126750a33748bac15e77f90f1f9c", + "sha256:14d4b1341ac07ae07eb2cc682f459bec932a380c3b122f5540432d8977e64eae", + "sha256:1c3c33ac69cf059bbb9d1a71eeaba76781b450bc307e2291f8a4764d779a6b28", + "sha256:1d19397351f73a88904ad1aee421e800fe4bbcd1aeee6435fb62d0a05ccd1030", + "sha256:253e8a302a96df6927310a9d44e6103055e8fb96a6822f8b7f514bb7ef77de56", + "sha256:2632d0f846b7c7600edf53c48f8f9f1e13e62f66a6dbc15191029d950bfed976", + "sha256:335ace1a22325395c4ea88e00ba3dc89ca029bd66bd5a3c382d53e44f0ccd77e", + "sha256:413ce0bbf9fc6278b2d63309dfeefe452835e1c78398efb431bab0672fe9274e", + "sha256:5100b45a4638e3c00e4d2320d3193bdabb2d75e79793af7c3eb139e4f569f16f", + "sha256:514ceac913076feefbeaf89771fd6febde78b0c4c1b23aaeab082c41c694e81b", + "sha256:528a2a692c65dd5cafc130de286030af251d2ee0483a5bf50c9348aefe834e8a", + "sha256:6295f6763749b89c994fcb6d8a7f7ce03c3992e695f89f00b741b4580b199b7e", + "sha256:6c8bc8238a7dfdaf7a75f5ec5a663f4173f8c367e5a39f87e720495e1eed75fa", + "sha256:718856856ba31f14f13ba885ff13874be7fefc53984d2832458f12c38205f7f7", + "sha256:7f7609a718b177bf171ac93cea9fd2ddc0e03e84d8fa4e887bdfc39671d46b00", + "sha256:80ca33961ced9c63358056bd08403ff866512038883e74f3a4bf88ad3eb66838", + "sha256:80fe64a6deb6fcfdf7b8386f2cf216d329be6f2781f7d90304351811fb591360", + "sha256:81c4b81611e3a3cb30e59b0cf05b888c675f97e3adb2c8672c3154047980726b", + "sha256:855c583f268edde09474b081e3ddcd5cf3b20c12f26e0d434e1386cc5d318e7a", + "sha256:9bfdb82cdfeccec50aad441afc332faf8606dfa5e8efd18a6692b5d6e79f00fd", + "sha256:a5d24e1d674dd9d72c66ad3ea9131322819ff86250b30dc5821cbafcfa0b96b4", + "sha256:a9f44cd7e162ac6191491d7249cceb02b8116b0f7e847ee33f739d7cb1ea1f70", + "sha256:b5b3f092fe345c03bca1e0b687dfbb39364b21ebb8ba90e3fa707374b7915204", + "sha256:b9618823bd237c0d2575283f2939655f54d51b4527ec3972907a927acbcc5bfc", + "sha256:cef9c85ccbe9bee00909758936ea841ef12035296c748aaceee535969e27d31b", + "sha256:d21237d0cd37acded35154e29aec853e945950321dd2ffd1a7d86fe686814669", + "sha256:d3c5c79ab7dfce6d88f1ba639b77e77a17ea33a01b07b99840d6ed08031cb2a7", + "sha256:d9d7942b624b04b895cb95af03a23407f17646815495ce4547f0e60e0b06f58e", + "sha256:db6d9fac65bd08cea7f3540b899977c6dee9edad959fa4eaf305940d9cbd861c", + "sha256:ede5af4a2702444a832a800b8eb7f0a7a1c0eed55b644642e049c98d589e5092", + "sha256:effb7749713d5317478bb3acb3f81d9d7c7f86726d41c1facca068a04cf5bb4c", + "sha256:f154d173286a5d1863637a7dcd8c3437bb557520b01bddb0be0258dcb72696b5", + "sha256:f25ed6e28ddf50de7e7ea99d7a976d6a9c415f03adcaac9c41ff6ff41b6d86ac" + ], + "markers": "python_version >= '3.7'", + "version": "==9.0.1" + }, "pluggy": { "hashes": [ "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", @@ -480,6 +521,10 @@ ], "version": "==0.4.8" }, + "pyexiftool": { + "git": "https://github.com/smarnach/pyexiftool.git", + "ref": "3db3764895e687d75b42d3ae4e554ca8664a7f6f" + }, "pygments": { "hashes": [ "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65", @@ -504,6 +549,14 @@ ], "version": "==1.7.1" }, + "pytesseract": { + "hashes": [ + "sha256:7e2bafc7f48d1bb71443ce4633a56f5e21925a98f220a36c336297edcd1956d0", + "sha256:fecda37d1e4eaf744c657cd03a5daab4eb97c61506ac5550274322c8ae32eca2" + ], + "index": "pypi", + "version": "==0.3.9" + }, "pytest": { "hashes": [ "sha256:9ce3ff477af913ecf6321fe337b93a2c0dcf2a0a1439c43f5452112c1e4280db", diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index 11016e6..f2eae25 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -146,5 +146,12 @@ class ScraperController: mapper_registry.metadata.create_all(bind=engine) self.session = sessionmaker() - self.session.configure(bind=engine) + self.engine = engine + self.session.configure(bind=self.engine) + + def reset_db(self): + + mapper_registry.metadata.drop_all(bind=self.engine) + self.connect_to_db(self.engine) + diff --git a/tests/conftest.py b/tests/conftest.py index 0608903..161439d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -113,7 +113,7 @@ TWITTER_CHANNEL_KWARGS = { #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# -@pytest.fixture(scope='function') +@pytest.fixture(scope='package') def controller(tmpdir_factory): """Initialize ScraperController and SQLite database file to be used for all diff --git a/tests/scraper/bitchute.py b/tests/scraper/bitchute.py index bc64c4b..c32e840 100644 --- a/tests/scraper/bitchute.py +++ b/tests/scraper/bitchute.py @@ -9,6 +9,8 @@ def test_scrape_bitchute_channel_no_media(controller, channel_kwargs): def test_scrape_bitchute_channel(controller, channel_kwargs): + controller.reset_db() + channels = [Channel(**channel_kwargs['bitchute'])] controller.register_scraper(scraper = BitchuteScraper()) controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/gab.py b/tests/scraper/gab.py index 29fa34a..c864c37 100644 --- a/tests/scraper/gab.py +++ b/tests/scraper/gab.py @@ -8,6 +8,8 @@ def test_scrape_gab_channel_no_media(controller, channel_kwargs): controller.scrape_channels(channels = channels, archive_media = False) def test_scrape_gab_channel(controller, channel_kwargs): + + controller.reset_db() channels = [Channel(**channel_kwargs['gab'])] controller.register_scraper(scraper = GabScraper()) diff --git a/tests/scraper/gettr.py b/tests/scraper/gettr.py index 186b74c..7dd2f24 100644 --- a/tests/scraper/gettr.py +++ b/tests/scraper/gettr.py @@ -9,6 +9,8 @@ def test_scrape_gettr_channel_no_media(controller, channel_kwargs): def test_scrape_gettr_channel(controller, channel_kwargs): + controller.reset_db() + channels = [Channel(**channel_kwargs['gettr'])] controller.register_scraper(scraper = GettrScraper()) controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/odysee.py b/tests/scraper/odysee.py index 8b9f89a..f97700e 100644 --- a/tests/scraper/odysee.py +++ b/tests/scraper/odysee.py @@ -9,6 +9,8 @@ def test_scrape_odysee_channel_no_media(controller, channel_kwargs): def test_scrape_odysee_channel(controller, channel_kwargs): + controller.reset_db() + channels = [Channel(**channel_kwargs['odysee'])] controller.register_scraper(scraper = OdyseeScraper()) controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/rumble.py b/tests/scraper/rumble.py index daf59f6..5f640e5 100644 --- a/tests/scraper/rumble.py +++ b/tests/scraper/rumble.py @@ -9,6 +9,8 @@ def test_scrape_rumble_channel_no_media(controller, channel_kwargs): def test_scrape_rumble_channel(controller, channel_kwargs): + controller.reset_db() + channels = [Channel(**channel_kwargs['rumble'])] controller.register_scraper(scraper = RumbleScraper()) controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/telegram_snscrape.py b/tests/scraper/telegram_snscrape.py index af25ed7..3848780 100644 --- a/tests/scraper/telegram_snscrape.py +++ b/tests/scraper/telegram_snscrape.py @@ -9,6 +9,8 @@ def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs): def test_scrape_telegram_snscrape_channel(controller, channel_kwargs): + controller.reset_db() + channels = [Channel(**channel_kwargs['telegram'])] controller.register_scraper(scraper = TelegramSnscrapeScraper()) controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/telegram_telethon.py b/tests/scraper/telegram_telethon.py index 1cfc529..c015631 100644 --- a/tests/scraper/telegram_telethon.py +++ b/tests/scraper/telegram_telethon.py @@ -9,6 +9,8 @@ def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs): def test_scrape_telegram_telethon_channel(controller, channel_kwargs): + controller.reset_db() + channels = [Channel(**channel_kwargs['telegram'])] controller.register_scraper(scraper = TelegramTelethonScraper()) controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/twitter.py b/tests/scraper/twitter.py index ef375b7..bd79a6a 100644 --- a/tests/scraper/twitter.py +++ b/tests/scraper/twitter.py @@ -9,6 +9,8 @@ def test_scrape_twitter_channel_no_media(controller, channel_kwargs): def test_scrape_twitter_channel(controller, channel_kwargs): + controller.reset_db() + channels = [Channel(**channel_kwargs['twitter'])] controller.register_scraper(scraper = TwitterScraper()) controller.scrape_channels(channels = channels, archive_media = True)