Simplify transform method signature

This commit is contained in:
Logan Williams
2023-08-07 10:08:13 +02:00
parent 1f0197200e
commit 3aec25f74c
5 changed files with 11 additions and 18 deletions

View File

@@ -205,7 +205,6 @@ class ETLController:
def insert_or_select(self, obj, session, hydrate: bool = True):
"""Insert an object into the database or return an existing object from the database.
Regardless, the resulting object has an `id` attribute that can be referenced later.
Parameters
----------
@@ -253,6 +252,7 @@ class ETLController:
)
elif type(obj) == Post:
# attempt to add to current batch
return self.insert_post(obj, session, hydrate)
# instance = session.query(Post).filter_by(platform=obj.platform, platform_id=obj.platform_id).first()
@@ -351,9 +351,6 @@ class ETLController:
result,
lambda obj: self.insert_or_select(obj, session, hydrate),
session,
lambda obj: self.insert_post(
obj, session, hydrate, flush=False
),
lambda: self.flush_posts(session),
)

View File

@@ -82,7 +82,7 @@ class BitchuteTransformer(Transformer):
transformed = insert(transformed)
def transform(
self, data: ScraperResult, insert: Callable, session, insert_post, flush_posts
self, data: ScraperResult, insert: Callable, session, flush_posts
) -> Generator[Union[Post, Channel, Media], None, None]:
raw = json.loads(data.raw_data)
@@ -146,8 +146,7 @@ class BitchuteTransformer(Transformer):
video_duration=_parse_duration_str(raw["length"]),
)
# insert_post
transformed = insert_post(transformed)
transformed = insert(transformed)
def parse_created(created: str, date_archived: datetime) -> datetime:

View File

@@ -100,7 +100,7 @@ class GettrTransformer(Transformer):
return channel.id
def transform(
self, data: ScraperResult, insert: Callable, session, insert_post, flush_posts
self, data: ScraperResult, insert: Callable, session, flush_posts
) -> Generator[Union[Post, Channel, Media], None, None]:
raw = json.loads(data.raw_data)
@@ -147,8 +147,7 @@ class GettrTransformer(Transformer):
views=raw.get("vfpst"),
)
# insert_post
insert_post(transformed)
insert(transformed)
# media = self.process_media(raw, transformed.id, data)
# for m in media:

View File

@@ -78,7 +78,7 @@ class RumbleTransformer(Transformer):
transformed = insert(transformed)
def transform(
self, data: ScraperResult, insert: Callable, session, insert_post, flush_posts
self, data: ScraperResult, insert: Callable, session, flush_posts
) -> Generator[Union[Post, Channel, Media], None, None]:
raw = json.loads(data.raw_data)
@@ -102,8 +102,7 @@ class RumbleTransformer(Transformer):
video_duration=_parse_duration_str(raw["duration"]),
)
# insert_post
insert_post(transformed)
insert(transformed)
def _process_number(s):

View File

@@ -206,9 +206,8 @@ class TelegramTelethonTransformer(Transformer):
insert(new_chat)
# TODO this method API is chaotic and could be cleaned up
def transform(
self, data: ScraperResult, insert: Callable, session, insert_post, flush_posts
self, data: ScraperResult, insert: Callable, session, flush_posts
) -> Generator[Union[Post, Channel, Media], None, None]:
raw = json.loads(data.raw_data)
@@ -283,7 +282,8 @@ class TelegramTelethonTransformer(Transformer):
# use cache to find post ID instead of a DB request, if possible
if (data.channel, reply_to_id) not in self.posts_cache:
session.commit()
flush_posts() # TODO this is necessary because the post we are looking for might have been added in the same session
# this is necessary because the post we are looking for could be batched but not yet committed to the DB
flush_posts()
post = (
session.query(Post)
.filter_by(channel=data.channel, platform_id=reply_to_id)
@@ -385,8 +385,7 @@ class TelegramTelethonTransformer(Transformer):
views=raw.get("views"),
)
# insert_post
insert_post(transformed)
insert(transformed)
def stripped(s):