From 763a5a0a01069d6f028ea21a8301f444042f9198 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Thu, 13 Nov 2025 03:33:57 +0300 Subject: [PATCH] better parsing --- src/rss_generator.py | 3 ++- src/transcript_cleaner.py | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/rss_generator.py b/src/rss_generator.py index ae4ba17..03ee045 100644 --- a/src/rss_generator.py +++ b/src/rss_generator.py @@ -60,8 +60,9 @@ class RSSGenerator: fe.description(video.get('description', '')[:200]) # Content (tam transcript) + # feedgen HTML içeriği için otomatik escaping yapıyor if video.get('transcript_clean'): - fe.content(content=video['transcript_clean']) + fe.content(content=video['transcript_clean'], type='html') def generate_rss(self, output_path: str): """RSS feed'i dosyaya yaz""" diff --git a/src/transcript_cleaner.py b/src/transcript_cleaner.py index dfd5e97..5e4899e 100644 --- a/src/transcript_cleaner.py +++ b/src/transcript_cleaner.py @@ -127,10 +127,8 @@ class TranscriptCleaner: paragraphs = self.create_paragraphs(sentences, sentences_per_paragraph) # HTML'e sar - html_content = self.wrap_html(paragraphs) - - # XML entity escaping - clean_html = self.escape_xml_entities(html_content) + # feedgen zaten XML escaping yapıyor, bu yüzden escape_xml_entities çağrısını kaldırdık + clean_html = self.wrap_html(paragraphs) return raw_text, clean_html