mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-07 19:08:35 +03:00
Simplify transform method signature
This commit is contained in:
@@ -205,7 +205,6 @@ class ETLController:
|
||||
|
||||
def insert_or_select(self, obj, session, hydrate: bool = True):
|
||||
"""Insert an object into the database or return an existing object from the database.
|
||||
Regardless, the resulting object has an `id` attribute that can be referenced later.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -253,6 +252,7 @@ class ETLController:
|
||||
)
|
||||
|
||||
elif type(obj) == Post:
|
||||
# attempt to add to current batch
|
||||
return self.insert_post(obj, session, hydrate)
|
||||
# instance = session.query(Post).filter_by(platform=obj.platform, platform_id=obj.platform_id).first()
|
||||
|
||||
@@ -351,9 +351,6 @@ class ETLController:
|
||||
result,
|
||||
lambda obj: self.insert_or_select(obj, session, hydrate),
|
||||
session,
|
||||
lambda obj: self.insert_post(
|
||||
obj, session, hydrate, flush=False
|
||||
),
|
||||
lambda: self.flush_posts(session),
|
||||
)
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ class BitchuteTransformer(Transformer):
|
||||
transformed = insert(transformed)
|
||||
|
||||
def transform(
|
||||
self, data: ScraperResult, insert: Callable, session, insert_post, flush_posts
|
||||
self, data: ScraperResult, insert: Callable, session, flush_posts
|
||||
) -> Generator[Union[Post, Channel, Media], None, None]:
|
||||
raw = json.loads(data.raw_data)
|
||||
|
||||
@@ -146,8 +146,7 @@ class BitchuteTransformer(Transformer):
|
||||
video_duration=_parse_duration_str(raw["length"]),
|
||||
)
|
||||
|
||||
# insert_post
|
||||
transformed = insert_post(transformed)
|
||||
transformed = insert(transformed)
|
||||
|
||||
|
||||
def parse_created(created: str, date_archived: datetime) -> datetime:
|
||||
|
||||
@@ -100,7 +100,7 @@ class GettrTransformer(Transformer):
|
||||
return channel.id
|
||||
|
||||
def transform(
|
||||
self, data: ScraperResult, insert: Callable, session, insert_post, flush_posts
|
||||
self, data: ScraperResult, insert: Callable, session, flush_posts
|
||||
) -> Generator[Union[Post, Channel, Media], None, None]:
|
||||
raw = json.loads(data.raw_data)
|
||||
|
||||
@@ -147,8 +147,7 @@ class GettrTransformer(Transformer):
|
||||
views=raw.get("vfpst"),
|
||||
)
|
||||
|
||||
# insert_post
|
||||
insert_post(transformed)
|
||||
insert(transformed)
|
||||
|
||||
# media = self.process_media(raw, transformed.id, data)
|
||||
# for m in media:
|
||||
|
||||
@@ -78,7 +78,7 @@ class RumbleTransformer(Transformer):
|
||||
transformed = insert(transformed)
|
||||
|
||||
def transform(
|
||||
self, data: ScraperResult, insert: Callable, session, insert_post, flush_posts
|
||||
self, data: ScraperResult, insert: Callable, session, flush_posts
|
||||
) -> Generator[Union[Post, Channel, Media], None, None]:
|
||||
raw = json.loads(data.raw_data)
|
||||
|
||||
@@ -102,8 +102,7 @@ class RumbleTransformer(Transformer):
|
||||
video_duration=_parse_duration_str(raw["duration"]),
|
||||
)
|
||||
|
||||
# insert_post
|
||||
insert_post(transformed)
|
||||
insert(transformed)
|
||||
|
||||
|
||||
def _process_number(s):
|
||||
|
||||
@@ -206,9 +206,8 @@ class TelegramTelethonTransformer(Transformer):
|
||||
|
||||
insert(new_chat)
|
||||
|
||||
# TODO this method API is chaotic and could be cleaned up
|
||||
def transform(
|
||||
self, data: ScraperResult, insert: Callable, session, insert_post, flush_posts
|
||||
self, data: ScraperResult, insert: Callable, session, flush_posts
|
||||
) -> Generator[Union[Post, Channel, Media], None, None]:
|
||||
raw = json.loads(data.raw_data)
|
||||
|
||||
@@ -283,7 +282,8 @@ class TelegramTelethonTransformer(Transformer):
|
||||
# use cache to find post ID instead of a DB request, if possible
|
||||
if (data.channel, reply_to_id) not in self.posts_cache:
|
||||
session.commit()
|
||||
flush_posts() # TODO this is necessary because the post we are looking for might have been added in the same session
|
||||
# this is necessary because the post we are looking for could be batched but not yet committed to the DB
|
||||
flush_posts()
|
||||
post = (
|
||||
session.query(Post)
|
||||
.filter_by(channel=data.channel, platform_id=reply_to_id)
|
||||
@@ -385,8 +385,7 @@ class TelegramTelethonTransformer(Transformer):
|
||||
views=raw.get("views"),
|
||||
)
|
||||
|
||||
# insert_post
|
||||
insert_post(transformed)
|
||||
insert(transformed)
|
||||
|
||||
|
||||
def stripped(s):
|
||||
|
||||
Reference in New Issue
Block a user