From 01f9cfc8b22f16fee4df86add7997a5f90e4622f Mon Sep 17 00:00:00 2001
From: salvacybersec <salva@opsecti.local>
Date: Thu, 13 Nov 2025 05:16:12 +0300
Subject: [PATCH] log all

---
 app.py                      | 21 ++++++++++++
 src/database.py             |  6 +++-
 src/transcript_extractor.py | 26 ++++++++++++---
 src/web_server.py           | 66 ++++++++++++++++++++++++++++++++-----
 4 files changed, 104 insertions(+), 15 deletions(-)

diff --git a/app.py b/app.py
index 72e675b..8ac7cab 100644
--- a/app.py
+++ b/app.py
@@ -2,8 +2,29 @@
 """
 Flask Web Server - RSS-Bridge benzeri URL template sistemi
 """
+import logging
+import sys
+from datetime import datetime
+
+# Logging konfigürasyonu
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s | %(levelname)-8s | %(name)s | %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S',
+    handlers=[
+        logging.StreamHandler(sys.stdout)
+    ]
+)
+
+# Flask ve werkzeug loglarını azalt
+logging.getLogger('werkzeug').setLevel(logging.WARNING)
+logging.getLogger('flask').setLevel(logging.WARNING)
+
 from src.web_server import app
 
 if __name__ == '__main__':
+    logging.info("=" * 60)
+    logging.info("YouTube Transcript RSS Feed Generator başlatılıyor...")
+    logging.info("=" * 60)
     app.run(host='0.0.0.0', port=5000, debug=False)
 
diff --git a/src/database.py b/src/database.py
index afa9812..22d57f2 100644
--- a/src/database.py
+++ b/src/database.py
@@ -5,9 +5,13 @@ import sqlite3
 import os
 import re
 import threading
+import logging
 from datetime import datetime, timezone
 from typing import Optional, List, Dict
 
+# Logger oluştur
+logger = logging.getLogger(__name__)
+
 
 class Database:
     """SQLite veritabanı yönetim sınıfı (thread-safe)"""
@@ -91,7 +95,7 @@ class Database:
         """)
         
         conn.commit()
-        print("Database initialized successfully")
+        logger.info("[DATABASE] ✅ Veritabanı başarıyla initialize edildi")
     
     def close(self):
         """Veritabanı bağlantısını kapat (thread-safe)"""
diff --git a/src/transcript_extractor.py b/src/transcript_extractor.py
index f6bea7a..f3399fa 100644
--- a/src/transcript_extractor.py
+++ b/src/transcript_extractor.py
@@ -4,6 +4,10 @@ YouTube transcript çıkarımı modülü
 from youtube_transcript_api import YouTubeTranscriptApi
 from typing import List, Dict, Optional
 import time
+import logging
+
+# Logger oluştur
+logger = logging.getLogger(__name__)
 
 
 class TranscriptExtractor:
@@ -27,7 +31,7 @@ class TranscriptExtractor:
         # Eğer son 5 dakikada IP blocking hatası aldıysak, daha uzun bekle
         if self.last_blocked_time > 0 and (now - self.last_blocked_time) < 300:
             wait_time = 60  # 1 dakika bekle
-            print(f"IP blocking sonrası bekleme: {wait_time} saniye")
+            logger.warning(f"[RATE_LIMIT] IP blocking sonrası bekleme: {wait_time} saniye (Son blocking: {int(now - self.last_blocked_time)} saniye önce)")
             time.sleep(wait_time)
             self.last_blocked_time = 0  # Reset
         
@@ -38,7 +42,7 @@ class TranscriptExtractor:
         if len(self.request_times) >= self.rate_limit:
             sleep_time = self.time_window - (now - self.request_times[0])
             if sleep_time > 0:
-                print(f"Rate limit: {sleep_time:.1f} saniye bekleniyor...")
+                logger.info(f"[RATE_LIMIT] Rate limit aşıldı ({len(self.request_times)}/{self.rate_limit} istek), {sleep_time:.1f} saniye bekleniyor...")
                 time.sleep(sleep_time)
                 # Tekrar filtrele
                 now = time.time()
@@ -50,10 +54,12 @@ class TranscriptExtractor:
             min_interval = 3  # Minimum 3 saniye
             if time_since_last < min_interval:
                 sleep_time = min_interval - time_since_last
+                logger.debug(f"[RATE_LIMIT] Minimum interval bekleme: {sleep_time:.2f} saniye (Son istek: {time_since_last:.2f} saniye önce)")
                 time.sleep(sleep_time)
         
         # İstek zamanını kaydet
         self.request_times.append(time.time())
+        logger.debug(f"[RATE_LIMIT] İstek kaydedildi (Toplam aktif istek: {len(self.request_times)})")
     
     def fetch_transcript(self, video_id: str, 
                         languages: List[str] = ['en']) -> Optional[List[Dict]]:
@@ -67,10 +73,14 @@ class TranscriptExtractor:
         Returns:
             Transcript listesi veya None
         """
+        logger.info(f"[TRANSCRIPT] Video {video_id} için transcript çıkarımı başlatılıyor (Diller: {languages})")
+        
         # Rate limiting kontrolü
         self._check_rate_limit()
         
         try:
+            logger.debug(f"[TRANSCRIPT] YouTube Transcript API çağrısı yapılıyor: video_id={video_id}")
+            
             # YouTube Transcript API kullanımı (yeni versiyon)
             # API instance oluştur ve fetch() metodunu kullan
             api = YouTubeTranscriptApi()
@@ -80,15 +90,21 @@ class TranscriptExtractor:
             # Format: [{'text': '...', 'start': 1.36, 'duration': 1.68}, ...]
             transcript = fetched_transcript.to_raw_data()
             
+            transcript_count = len(transcript) if transcript else 0
+            logger.info(f"[TRANSCRIPT] ✅ Video {video_id} transcript'i başarıyla çıkarıldı ({transcript_count} segment)")
+            
             return transcript
         except Exception as e:
             error_msg = str(e)
-            print(f"Error fetching transcript for {video_id}: {error_msg}")
+            error_type = type(e).__name__
+            
+            logger.error(f"[TRANSCRIPT] ❌ Video {video_id} transcript çıkarımı başarısız: {error_type} - {error_msg[:200]}")
             
             # IP blocking hatası tespit edilirse işaretle
-            if "blocking" in error_msg.lower() or "blocked" in error_msg.lower():
+            if "blocking" in error_msg.lower() or "blocked" in error_msg.lower() or "IP" in error_msg:
                 self.last_blocked_time = time.time()
-                print(f"IP blocking tespit edildi, sonraki isteklerde daha uzun bekleme yapılacak")
+                logger.warning(f"[TRANSCRIPT] 🚫 IP blocking tespit edildi! Video: {video_id}, Sonraki isteklerde 60 saniye bekleme yapılacak")
+                logger.warning(f"[TRANSCRIPT] IP blocking detayları: {error_msg[:500]}")
             
             return None
 
diff --git a/src/web_server.py b/src/web_server.py
index e161b3b..bacf309 100644
--- a/src/web_server.py
+++ b/src/web_server.py
@@ -7,8 +7,12 @@ import sys
 import os
 import yaml
 import time
+import logging
 from pathlib import Path
 
+# Logger oluştur
+logger = logging.getLogger(__name__)
+
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
 from src.database import Database
@@ -180,6 +184,8 @@ def process_channel(channel_id: str, max_items: int = 50) -> dict:
     # RSS-Bridge'den videoları çek (max_items'ın 2 katı kadar çek, böylece yeterli video olur)
     # RSS-Bridge'den daha fazla video çekiyoruz çünkü bazıları transcript'siz olabilir
     rss_bridge_limit = max(max_items * 2, 50)  # En az 50 video çek
+    logger.info(f"[PROCESS] Channel {channel_id} için RSS-Bridge'den video listesi çekiliyor (limit: {rss_bridge_limit})")
+    
     try:
         videos = fetch_videos_from_rss_bridge(
             base_url="https://rss-bridge.org/bridge01",
@@ -187,14 +193,23 @@ def process_channel(channel_id: str, max_items: int = 50) -> dict:
             format="Atom",
             max_items=rss_bridge_limit
         )
+        logger.info(f"[PROCESS] RSS-Bridge'den {len(videos)} video alındı")
     except Exception as e:
+        logger.error(f"[PROCESS] ❌ RSS-Bridge hatası: {type(e).__name__} - {str(e)}")
         raise Exception(f"RSS-Bridge hatası: {e}")
     
     # Yeni videoları veritabanına ekle
+    new_videos_count = 0
     for video in videos:
         video['channel_id'] = channel_id
         if not db.is_video_processed(video['video_id']):
             db.add_video(video)
+            new_videos_count += 1
+    
+    if new_videos_count > 0:
+        logger.info(f"[PROCESS] {new_videos_count} yeni video veritabanına eklendi")
+    else:
+        logger.debug(f"[PROCESS] Tüm videolar zaten veritabanında")
     
     # Bekleyen videoları işle (max_items kadar, 20'şer batch'ler halinde)
     # YouTube IP blocking'i önlemek için her batch'te 20 video işlenir
@@ -204,51 +219,82 @@ def process_channel(channel_id: str, max_items: int = 50) -> dict:
     
     # Tüm bekleyen videoları al (channel_id'ye göre filtrele)
     all_pending_videos = [v for v in db.get_pending_videos() if v['channel_id'] == channel_id]
+    logger.info(f"[PROCESS] Channel {channel_id} için {len(all_pending_videos)} bekleyen video bulundu (max_items: {max_items})")
     
     # max_items kadar transcript işlenene kadar batch'ler halinde işle
+    total_batches = (len(all_pending_videos) + batch_size - 1) // batch_size
+    current_batch = 0
+    
     for batch_start in range(0, len(all_pending_videos), batch_size):
         if processed_count >= max_items:
+            logger.info(f"[PROCESS] Maksimum transcript sayısına ulaşıldı ({processed_count}/{max_items})")
             break
-            
+        
+        current_batch += 1
         batch_videos = all_pending_videos[batch_start:batch_start + batch_size]
+        logger.info(f"[BATCH] Batch {current_batch}/{total_batches} başlatılıyor ({len(batch_videos)} video, Toplam işlenen: {processed_count}/{max_items})")
+        
+        batch_processed = 0
+        batch_cached = 0
+        batch_failed = 0
         
         for video in batch_videos:
             if processed_count >= max_items:
                 break
             
+            video_id = video['video_id']
+            video_title = video.get('video_title', 'N/A')[:50]
+            
             # Cache kontrolü: 3 gün içinde işlenmiş transcript varsa atla
-            if db.is_transcript_cached(video['video_id'], cache_days=3):
-                print(f"Video {video['video_id']} transcript'i cache'de (3 gün içinde işlenmiş), atlanıyor")
+            if db.is_transcript_cached(video_id, cache_days=3):
+                logger.debug(f"[CACHE] Video {video_id} ({video_title}) transcript'i cache'de, atlanıyor")
+                batch_cached += 1
                 continue
                 
             try:
+                logger.info(f"[VIDEO] Video işleniyor: {video_id} - {video_title}")
+                
                 # Transcript çıkar
                 transcript = extractor.fetch_transcript(
-                    video['video_id'],
+                    video_id,
                     languages=['tr', 'en']
                 )
                 
                 if transcript:
                     # Transcript temizle
+                    logger.debug(f"[VIDEO] Video {video_id} transcript'i temizleniyor...")
                     raw, clean = cleaner.clean_transcript(transcript, sentences_per_paragraph=3)
                     
                     # Veritabanına kaydet (her batch hemen kaydedilir)
                     db.update_video_transcript(
-                        video['video_id'],
+                        video_id,
                         raw,
                         clean,
                         status=1,
                         language='tr'
                     )
                     processed_count += 1
-                    print(f"Video {video['video_id']} transcript'i işlendi ve veritabanına kaydedildi ({processed_count}/{max_items})")
+                    batch_processed += 1
+                    logger.info(f"[VIDEO] ✅ Video {video_id} başarıyla işlendi ve kaydedildi ({processed_count}/{max_items})")
+                else:
+                    logger.warning(f"[VIDEO] ⚠️ Video {video_id} transcript'i alınamadı (None döndü)")
+                    batch_failed += 1
+                    db.mark_video_failed(video_id, "Transcript None döndü")
             except Exception as e:
-                print(f"Transcript çıkarım hatası {video['video_id']}: {e}")
-                db.mark_video_failed(video['video_id'], str(e))
+                error_type = type(e).__name__
+                error_msg = str(e)[:200]
+                logger.error(f"[VIDEO] ❌ Video {video_id} işleme hatası: {error_type} - {error_msg}")
+                db.mark_video_failed(video_id, str(e))
+                batch_failed += 1
+        
+        # Batch özeti
+        logger.info(f"[BATCH] Batch {current_batch}/{total_batches} tamamlandı - İşlenen: {batch_processed}, Cache: {batch_cached}, Başarısız: {batch_failed}")
         
         # Batch tamamlandı, kısa bir bekleme (rate limiting için)
         if processed_count < max_items and batch_start + batch_size < len(all_pending_videos):
-            time.sleep(2)  # Batch'ler arası 2 saniye bekleme
+            wait_time = 2
+            logger.debug(f"[BATCH] Batch'ler arası bekleme: {wait_time} saniye")
+            time.sleep(wait_time)
     
     # İşlenmiş videoları getir
     processed_videos = db.get_processed_videos(
@@ -256,6 +302,8 @@ def process_channel(channel_id: str, max_items: int = 50) -> dict:
         channel_id=channel_id
     )
     
+    logger.info(f"[PROCESS] ✅ Channel {channel_id} işleme tamamlandı - {len(processed_videos)} işlenmiş video döndürülüyor")
+    
     return {
         'videos': processed_videos,
         'channel_id': channel_id,