#!/usr/bin/env python3 """ YouTube Transcript RSS Feed Generator - Ana Pipeline """ import yaml import os import sys from pathlib import Path # Proje root'unu path'e ekle sys.path.insert(0, str(Path(__file__).parent)) from src.database import Database from src.video_fetcher import fetch_videos_from_rss_bridge, get_channel_id_from_handle from src.transcript_extractor import TranscriptExtractor from src.transcript_cleaner import TranscriptCleaner from src.rss_generator import RSSGenerator def load_config(config_path: str = "config/config.yaml") -> dict: """Config dosyasını yükle""" with open(config_path, 'r', encoding='utf-8') as f: return yaml.safe_load(f) def get_channel_id(config: dict) -> str: """Config'den channel ID'yi al (handle varsa dönüştür)""" channel_config = config.get('channel', {}) # Channel ID direkt varsa if channel_config.get('id'): return channel_config['id'] # Handle URL varsa if channel_config.get('handle_url'): channel_id = get_channel_id_from_handle(channel_config['handle_url']) if channel_id: return channel_id # Handle varsa if channel_config.get('handle'): handle_url = f"https://www.youtube.com/{channel_config['handle']}" channel_id = get_channel_id_from_handle(handle_url) if channel_id: return channel_id raise ValueError("Channel ID bulunamadı! Config'de id, handle veya handle_url belirtin.") def main(): """Ana pipeline""" print("YouTube Transcript RSS Feed Generator başlatılıyor...") # Config yükle config = load_config() # Channel ID al channel_id = get_channel_id(config) print(f"Channel ID: {channel_id}") # Database başlat db = Database() db.init_database() # RSS-Bridge'den videoları çek rss_bridge_config = config.get('rss_bridge', {}) print(f"RSS-Bridge'den videolar çekiliyor...") try: videos = fetch_videos_from_rss_bridge( base_url=rss_bridge_config.get('base_url', 'https://rss-bridge.org/bridge01'), channel_id=channel_id, format=rss_bridge_config.get('format', 'Atom'), max_items=rss_bridge_config.get('max_items', 100) ) print(f"{len(videos)} video bulundu") except Exception as e: print(f"Hata: {e}") return # Yeni videoları veritabanına ekle new_count = 0 for video in videos: video['channel_id'] = channel_id if not db.is_video_processed(video['video_id']): db.add_video(video) new_count += 1 print(f"{new_count} yeni video eklendi") # Bekleyen videoları işle pending_videos = db.get_pending_videos() print(f"{len(pending_videos)} video işlenmeyi bekliyor") if pending_videos: extractor = TranscriptExtractor() cleaner = TranscriptCleaner() transcript_config = config.get('transcript', {}) for video in pending_videos[:10]: # İlk 10 video (test için) print(f"İşleniyor: {video['video_title']}") # Transcript çıkar transcript = extractor.fetch_transcript( video['video_id'], languages=transcript_config.get('languages', ['en']) ) if transcript: # Transcript temizle raw, clean = cleaner.clean_transcript( transcript, sentences_per_paragraph=transcript_config.get('paragraph_length', 3) ) # Veritabanına kaydet db.update_video_transcript( video['video_id'], raw, clean, status=1, # Başarılı language=transcript_config.get('languages', ['en'])[0] ) print(f"✓ Tamamlandı: {video['video_title']}") else: # Başarısız olarak işaretle db.mark_video_failed(video['video_id'], "Transcript bulunamadı") print(f"✗ Başarısız: {video['video_title']}") # RSS feed oluştur processed_videos = db.get_processed_videos( limit=config.get('automation', {}).get('max_items', 100), channel_id=channel_id ) if processed_videos: channel_info = { 'id': channel_id, 'title': config.get('rss', {}).get('title', 'Transcript Feed'), 'link': config.get('channel', {}).get('url', ''), 'description': config.get('rss', {}).get('description', ''), 'language': config.get('channel', {}).get('language', 'en') } generator = RSSGenerator(channel_info) for video in processed_videos: generator.add_video_entry(video) output_file = config.get('rss', {}).get('output_file', 'transcript_feed.xml') output_path = f"output/{output_file}" os.makedirs('output', exist_ok=True) generator.generate_rss(output_path) print(f"RSS feed oluşturuldu: {output_path}") db.close() print("Tamamlandı!") if __name__ == "__main__": main()