better parsing

This commit is contained in:
salvacybersec
2025-11-13 03:33:57 +03:00
parent abe170a1f8
commit 763a5a0a01
2 changed files with 4 additions and 5 deletions

View File

@@ -60,8 +60,9 @@ class RSSGenerator:
fe.description(video.get('description', '')[:200]) fe.description(video.get('description', '')[:200])
# Content (tam transcript) # Content (tam transcript)
# feedgen HTML içeriği için otomatik escaping yapıyor
if video.get('transcript_clean'): if video.get('transcript_clean'):
fe.content(content=video['transcript_clean']) fe.content(content=video['transcript_clean'], type='html')
def generate_rss(self, output_path: str): def generate_rss(self, output_path: str):
"""RSS feed'i dosyaya yaz""" """RSS feed'i dosyaya yaz"""

View File

@@ -127,10 +127,8 @@ class TranscriptCleaner:
paragraphs = self.create_paragraphs(sentences, sentences_per_paragraph) paragraphs = self.create_paragraphs(sentences, sentences_per_paragraph)
# HTML'e sar # HTML'e sar
html_content = self.wrap_html(paragraphs) # feedgen zaten XML escaping yapıyor, bu yüzden escape_xml_entities çağrısını kaldırdık
clean_html = self.wrap_html(paragraphs)
# XML entity escaping
clean_html = self.escape_xml_entities(html_content)
return raw_text, clean_html return raw_text, clean_html