implemented method to reset database, to enable the 'contoller' fixture scope to be shared across the whole package, which will enable the transformer tests to be run without re-running the scrapers

This commit is contained in:
Tristan Lee
2022-03-10 10:20:49 -06:00
parent fa5037d67c
commit 5783206ad8
11 changed files with 85 additions and 9 deletions

67
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "3d293e1f3802d64ae7a8fbfc4c1d742cc33cd4c520a6263f93e566f89faa7013"
"sha256": "afacc6dd45c110f235861c54db45f5546fb0095f4e68a1084e85fd0e902db21c"
},
"pipfile-spec": 6,
"requires": {
@@ -49,19 +49,19 @@
},
"boto3": {
"hashes": [
"sha256:30394729b38d5ce2f845440428a55161c6d45478044e553a12ca1acf56d7278a",
"sha256:895489900eb882777124c3b64a13df49785cf77f7bd1504e783464fb3b4c8163"
"sha256:15fa6d1acac422d2d34f7811e02acfc7ac222cea24db3f463d5c52f2f87baa52",
"sha256:c974a7fa781c500b7067441f9883ed939cf8c80bcd74c88b11965b336cabb4b6"
],
"index": "pypi",
"version": "==1.21.15"
"version": "==1.21.16"
},
"botocore": {
"hashes": [
"sha256:405082f92a9e524e1aee96cbc90134668026d7da3c12f86990c91a12620ca28b",
"sha256:fa4816e94e72111a9341204061e760bcbde74ca5d900d3f2206c2c2e8e4b56e4"
"sha256:0a809efb821d81dc29f2e6c404ed123176b8d2eb43103758f31d89b291af2a8b",
"sha256:dcff7f9b5fea98701d0b520eba99385c538825f10e6d1cab1e7da213293d141e"
],
"markers": "python_version >= '3.6'",
"version": "==1.24.15"
"version": "==1.24.16"
},
"bs4": {
"hashes": [
@@ -436,6 +436,47 @@
"markers": "python_version >= '3.8'",
"version": "==1.4.1"
},
"pillow": {
"hashes": [
"sha256:011233e0c42a4a7836498e98c1acf5e744c96a67dd5032a6f666cc1fb97eab97",
"sha256:0f29d831e2151e0b7b39981756d201f7108d3d215896212ffe2e992d06bfe049",
"sha256:12875d118f21cf35604176872447cdb57b07126750a33748bac15e77f90f1f9c",
"sha256:14d4b1341ac07ae07eb2cc682f459bec932a380c3b122f5540432d8977e64eae",
"sha256:1c3c33ac69cf059bbb9d1a71eeaba76781b450bc307e2291f8a4764d779a6b28",
"sha256:1d19397351f73a88904ad1aee421e800fe4bbcd1aeee6435fb62d0a05ccd1030",
"sha256:253e8a302a96df6927310a9d44e6103055e8fb96a6822f8b7f514bb7ef77de56",
"sha256:2632d0f846b7c7600edf53c48f8f9f1e13e62f66a6dbc15191029d950bfed976",
"sha256:335ace1a22325395c4ea88e00ba3dc89ca029bd66bd5a3c382d53e44f0ccd77e",
"sha256:413ce0bbf9fc6278b2d63309dfeefe452835e1c78398efb431bab0672fe9274e",
"sha256:5100b45a4638e3c00e4d2320d3193bdabb2d75e79793af7c3eb139e4f569f16f",
"sha256:514ceac913076feefbeaf89771fd6febde78b0c4c1b23aaeab082c41c694e81b",
"sha256:528a2a692c65dd5cafc130de286030af251d2ee0483a5bf50c9348aefe834e8a",
"sha256:6295f6763749b89c994fcb6d8a7f7ce03c3992e695f89f00b741b4580b199b7e",
"sha256:6c8bc8238a7dfdaf7a75f5ec5a663f4173f8c367e5a39f87e720495e1eed75fa",
"sha256:718856856ba31f14f13ba885ff13874be7fefc53984d2832458f12c38205f7f7",
"sha256:7f7609a718b177bf171ac93cea9fd2ddc0e03e84d8fa4e887bdfc39671d46b00",
"sha256:80ca33961ced9c63358056bd08403ff866512038883e74f3a4bf88ad3eb66838",
"sha256:80fe64a6deb6fcfdf7b8386f2cf216d329be6f2781f7d90304351811fb591360",
"sha256:81c4b81611e3a3cb30e59b0cf05b888c675f97e3adb2c8672c3154047980726b",
"sha256:855c583f268edde09474b081e3ddcd5cf3b20c12f26e0d434e1386cc5d318e7a",
"sha256:9bfdb82cdfeccec50aad441afc332faf8606dfa5e8efd18a6692b5d6e79f00fd",
"sha256:a5d24e1d674dd9d72c66ad3ea9131322819ff86250b30dc5821cbafcfa0b96b4",
"sha256:a9f44cd7e162ac6191491d7249cceb02b8116b0f7e847ee33f739d7cb1ea1f70",
"sha256:b5b3f092fe345c03bca1e0b687dfbb39364b21ebb8ba90e3fa707374b7915204",
"sha256:b9618823bd237c0d2575283f2939655f54d51b4527ec3972907a927acbcc5bfc",
"sha256:cef9c85ccbe9bee00909758936ea841ef12035296c748aaceee535969e27d31b",
"sha256:d21237d0cd37acded35154e29aec853e945950321dd2ffd1a7d86fe686814669",
"sha256:d3c5c79ab7dfce6d88f1ba639b77e77a17ea33a01b07b99840d6ed08031cb2a7",
"sha256:d9d7942b624b04b895cb95af03a23407f17646815495ce4547f0e60e0b06f58e",
"sha256:db6d9fac65bd08cea7f3540b899977c6dee9edad959fa4eaf305940d9cbd861c",
"sha256:ede5af4a2702444a832a800b8eb7f0a7a1c0eed55b644642e049c98d589e5092",
"sha256:effb7749713d5317478bb3acb3f81d9d7c7f86726d41c1facca068a04cf5bb4c",
"sha256:f154d173286a5d1863637a7dcd8c3437bb557520b01bddb0be0258dcb72696b5",
"sha256:f25ed6e28ddf50de7e7ea99d7a976d6a9c415f03adcaac9c41ff6ff41b6d86ac"
],
"markers": "python_version >= '3.7'",
"version": "==9.0.1"
},
"pluggy": {
"hashes": [
"sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
@@ -480,6 +521,10 @@
],
"version": "==0.4.8"
},
"pyexiftool": {
"git": "https://github.com/smarnach/pyexiftool.git",
"ref": "3db3764895e687d75b42d3ae4e554ca8664a7f6f"
},
"pygments": {
"hashes": [
"sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65",
@@ -504,6 +549,14 @@
],
"version": "==1.7.1"
},
"pytesseract": {
"hashes": [
"sha256:7e2bafc7f48d1bb71443ce4633a56f5e21925a98f220a36c336297edcd1956d0",
"sha256:fecda37d1e4eaf744c657cd03a5daab4eb97c61506ac5550274322c8ae32eca2"
],
"index": "pypi",
"version": "==0.3.9"
},
"pytest": {
"hashes": [
"sha256:9ce3ff477af913ecf6321fe337b93a2c0dcf2a0a1439c43f5452112c1e4280db",

View File

@@ -146,5 +146,12 @@ class ScraperController:
mapper_registry.metadata.create_all(bind=engine)
self.session = sessionmaker()
self.session.configure(bind=engine)
self.engine = engine
self.session.configure(bind=self.engine)
def reset_db(self):
mapper_registry.metadata.drop_all(bind=self.engine)
self.connect_to_db(self.engine)

View File

@@ -113,7 +113,7 @@ TWITTER_CHANNEL_KWARGS = {
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
@pytest.fixture(scope='function')
@pytest.fixture(scope='package')
def controller(tmpdir_factory):
"""Initialize ScraperController and SQLite database file to be used for all

View File

@@ -9,6 +9,8 @@ def test_scrape_bitchute_channel_no_media(controller, channel_kwargs):
def test_scrape_bitchute_channel(controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['bitchute'])]
controller.register_scraper(scraper = BitchuteScraper())
controller.scrape_channels(channels = channels, archive_media = True)

View File

@@ -8,6 +8,8 @@ def test_scrape_gab_channel_no_media(controller, channel_kwargs):
controller.scrape_channels(channels = channels, archive_media = False)
def test_scrape_gab_channel(controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['gab'])]
controller.register_scraper(scraper = GabScraper())

View File

@@ -9,6 +9,8 @@ def test_scrape_gettr_channel_no_media(controller, channel_kwargs):
def test_scrape_gettr_channel(controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['gettr'])]
controller.register_scraper(scraper = GettrScraper())
controller.scrape_channels(channels = channels, archive_media = True)

View File

@@ -9,6 +9,8 @@ def test_scrape_odysee_channel_no_media(controller, channel_kwargs):
def test_scrape_odysee_channel(controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['odysee'])]
controller.register_scraper(scraper = OdyseeScraper())
controller.scrape_channels(channels = channels, archive_media = True)

View File

@@ -9,6 +9,8 @@ def test_scrape_rumble_channel_no_media(controller, channel_kwargs):
def test_scrape_rumble_channel(controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['rumble'])]
controller.register_scraper(scraper = RumbleScraper())
controller.scrape_channels(channels = channels, archive_media = True)

View File

@@ -9,6 +9,8 @@ def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs):
def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['telegram'])]
controller.register_scraper(scraper = TelegramSnscrapeScraper())
controller.scrape_channels(channels = channels, archive_media = True)

View File

@@ -9,6 +9,8 @@ def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs):
def test_scrape_telegram_telethon_channel(controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['telegram'])]
controller.register_scraper(scraper = TelegramTelethonScraper())
controller.scrape_channels(channels = channels, archive_media = True)

View File

@@ -9,6 +9,8 @@ def test_scrape_twitter_channel_no_media(controller, channel_kwargs):
def test_scrape_twitter_channel(controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['twitter'])]
controller.register_scraper(scraper = TwitterScraper())
controller.scrape_channels(channels = channels, archive_media = True)