""" RSS-Bridge kullanarak video metadata çıkarımı """ import feedparser import re import requests from urllib.parse import urlencode from typing import List, Dict, Optional from datetime import datetime def get_channel_id_from_handle(handle_url: str) -> Optional[str]: """ Channel handle URL'inden Channel ID'yi web scraping ile bulur. Örnek: https://www.youtube.com/@tavakfi -> UC... """ try: response = requests.get(handle_url) response.raise_for_status() html_content = response.text # İlk pattern: "externalId":"UC..." match = re.search(r'"externalId":"(UC[a-zA-Z0-9_-]{22})"', html_content) if match: return match.group(1) # Alternatif pattern: "channelId":"UC..." match_alt = re.search(r'"channelId":"(UC[a-zA-Z0-9_-]{22})"', html_content) if match_alt: return match_alt.group(1) return None except requests.exceptions.RequestException as e: raise Exception(f"Error fetching channel page: {e}") def extract_video_id(url: str) -> Optional[str]: """YouTube URL'den video ID çıkar""" patterns = [ r'youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})', r'youtu\.be/([a-zA-Z0-9_-]{11})', r'youtube\.com/embed/([a-zA-Z0-9_-]{11})' ] for pattern in patterns: match = re.search(pattern, url) if match: return match.group(1) return None def fetch_videos_from_rss_bridge(base_url: str, channel_id: str, format: str = "Atom", max_items: int = 100) -> List[Dict]: """ RSS-Bridge'den video listesini çek Args: base_url: RSS-Bridge base URL channel_id: YouTube Channel ID (UC...) format: Feed format (Atom veya Rss) max_items: Maksimum video sayısı Returns: Video metadata listesi """ params = { 'action': 'display', 'bridge': 'YoutubeBridge', 'context': 'By channel id', 'c': channel_id, 'format': format } feed_url = f"{base_url}/?{urlencode(params)}" try: feed = feedparser.parse(feed_url) videos = [] for entry in feed.entries[:max_items]: video_id = extract_video_id(entry.link) if not video_id: continue # Tarih parsing published_date = None if hasattr(entry, 'published_parsed') and entry.published_parsed: published_date = datetime(*entry.published_parsed[:6]).isoformat() + 'Z' videos.append({ 'video_id': video_id, 'video_title': entry.title, 'video_url': entry.link, 'published_at_utc': published_date, 'description': getattr(entry, 'summary', '') }) return videos except Exception as e: raise Exception(f"Error fetching RSS-Bridge feed: {e}")