Files
Youtube2Feed/src/video_fetcher.py
salvacybersec abe170a1f8 first commit
2025-11-13 03:25:21 +03:00

106 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
RSS-Bridge kullanarak video metadata çıkarımı
"""
import feedparser
import re
import requests
from urllib.parse import urlencode
from typing import List, Dict, Optional
from datetime import datetime
def get_channel_id_from_handle(handle_url: str) -> Optional[str]:
"""
Channel handle URL'inden Channel ID'yi web scraping ile bulur.
Örnek: https://www.youtube.com/@tavakfi -> UC...
"""
try:
response = requests.get(handle_url)
response.raise_for_status()
html_content = response.text
# İlk pattern: "externalId":"UC..."
match = re.search(r'"externalId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
if match:
return match.group(1)
# Alternatif pattern: "channelId":"UC..."
match_alt = re.search(r'"channelId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
if match_alt:
return match_alt.group(1)
return None
except requests.exceptions.RequestException as e:
raise Exception(f"Error fetching channel page: {e}")
def extract_video_id(url: str) -> Optional[str]:
"""YouTube URL'den video ID çıkar"""
patterns = [
r'youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})',
r'youtu\.be/([a-zA-Z0-9_-]{11})',
r'youtube\.com/embed/([a-zA-Z0-9_-]{11})'
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def fetch_videos_from_rss_bridge(base_url: str, channel_id: str,
format: str = "Atom", max_items: int = 100) -> List[Dict]:
"""
RSS-Bridge'den video listesini çek
Args:
base_url: RSS-Bridge base URL
channel_id: YouTube Channel ID (UC...)
format: Feed format (Atom veya Rss)
max_items: Maksimum video sayısı
Returns:
Video metadata listesi
"""
params = {
'action': 'display',
'bridge': 'YoutubeBridge',
'context': 'By channel id',
'c': channel_id,
'format': format
}
feed_url = f"{base_url}/?{urlencode(params)}"
try:
feed = feedparser.parse(feed_url)
videos = []
for entry in feed.entries[:max_items]:
video_id = extract_video_id(entry.link)
if not video_id:
continue
# Tarih parsing
published_date = None
if hasattr(entry, 'published_parsed') and entry.published_parsed:
published_date = datetime(*entry.published_parsed[:6]).isoformat() + 'Z'
videos.append({
'video_id': video_id,
'video_title': entry.title,
'video_url': entry.link,
'published_at_utc': published_date,
'description': getattr(entry, 'summary', '')
})
return videos
except Exception as e:
raise Exception(f"Error fetching RSS-Bridge feed: {e}")