106 lines
3.0 KiB
Python
106 lines
3.0 KiB
Python
"""
|
||
RSS-Bridge kullanarak video metadata çıkarımı
|
||
"""
|
||
import feedparser
|
||
import re
|
||
import requests
|
||
from urllib.parse import urlencode
|
||
from typing import List, Dict, Optional
|
||
from datetime import datetime
|
||
|
||
|
||
def get_channel_id_from_handle(handle_url: str) -> Optional[str]:
|
||
"""
|
||
Channel handle URL'inden Channel ID'yi web scraping ile bulur.
|
||
Örnek: https://www.youtube.com/@tavakfi -> UC...
|
||
"""
|
||
try:
|
||
response = requests.get(handle_url)
|
||
response.raise_for_status()
|
||
|
||
html_content = response.text
|
||
|
||
# İlk pattern: "externalId":"UC..."
|
||
match = re.search(r'"externalId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
|
||
if match:
|
||
return match.group(1)
|
||
|
||
# Alternatif pattern: "channelId":"UC..."
|
||
match_alt = re.search(r'"channelId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
|
||
if match_alt:
|
||
return match_alt.group(1)
|
||
|
||
return None
|
||
|
||
except requests.exceptions.RequestException as e:
|
||
raise Exception(f"Error fetching channel page: {e}")
|
||
|
||
|
||
def extract_video_id(url: str) -> Optional[str]:
|
||
"""YouTube URL'den video ID çıkar"""
|
||
patterns = [
|
||
r'youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})',
|
||
r'youtu\.be/([a-zA-Z0-9_-]{11})',
|
||
r'youtube\.com/embed/([a-zA-Z0-9_-]{11})'
|
||
]
|
||
|
||
for pattern in patterns:
|
||
match = re.search(pattern, url)
|
||
if match:
|
||
return match.group(1)
|
||
|
||
return None
|
||
|
||
|
||
def fetch_videos_from_rss_bridge(base_url: str, channel_id: str,
|
||
format: str = "Atom", max_items: int = 100) -> List[Dict]:
|
||
"""
|
||
RSS-Bridge'den video listesini çek
|
||
|
||
Args:
|
||
base_url: RSS-Bridge base URL
|
||
channel_id: YouTube Channel ID (UC...)
|
||
format: Feed format (Atom veya Rss)
|
||
max_items: Maksimum video sayısı
|
||
|
||
Returns:
|
||
Video metadata listesi
|
||
"""
|
||
params = {
|
||
'action': 'display',
|
||
'bridge': 'YoutubeBridge',
|
||
'context': 'By channel id',
|
||
'c': channel_id,
|
||
'format': format
|
||
}
|
||
|
||
feed_url = f"{base_url}/?{urlencode(params)}"
|
||
|
||
try:
|
||
feed = feedparser.parse(feed_url)
|
||
|
||
videos = []
|
||
for entry in feed.entries[:max_items]:
|
||
video_id = extract_video_id(entry.link)
|
||
if not video_id:
|
||
continue
|
||
|
||
# Tarih parsing
|
||
published_date = None
|
||
if hasattr(entry, 'published_parsed') and entry.published_parsed:
|
||
published_date = datetime(*entry.published_parsed[:6]).isoformat() + 'Z'
|
||
|
||
videos.append({
|
||
'video_id': video_id,
|
||
'video_title': entry.title,
|
||
'video_url': entry.link,
|
||
'published_at_utc': published_date,
|
||
'description': getattr(entry, 'summary', '')
|
||
})
|
||
|
||
return videos
|
||
|
||
except Exception as e:
|
||
raise Exception(f"Error fetching RSS-Bridge feed: {e}")
|
||
|