"""
Update all activity categories in the database using the latest categorizer rules
This will move Gmail, YouTube, and other non-productive items out of the productive tab
"""

import os
import sys
from sqlalchemy import create_engine, text
from dotenv import load_dotenv

# Add backend to path so we can import activity_categorizer
sys.path.append(os.path.join(os.path.dirname(__file__), 'backend'))
from activity_categorizer import ActivityCategorizer

# Load environment variables
load_dotenv()

# Database connection
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://postgres:asdf1234@localhost:5432/timesheet")
engine = create_engine(DATABASE_URL)

def update_categories():
    """Update all activity categories using the latest categorizer rules"""
    
    # Initialize categorizer with your latest rules
    categorizer = ActivityCategorizer()
    
    print("🔄 Updating all activity categories with latest rules...")
    print("=" * 60)
    
    with engine.connect() as conn:
        # First, let's see current distribution
        print("\n📊 Current category distribution:")
        result = conn.execute(text("""
            SELECT 
                COALESCE(category, 'uncategorized') as category,
                COUNT(*) as count,
                SUM(duration)/3600.0 as hours
            FROM activity_records
            WHERE timestamp >= CURRENT_DATE - INTERVAL '30 days'
            GROUP BY category
            ORDER BY hours DESC
        """))
        
        for row in result:
            print(f"  {row.category}: {row.count} activities ({row.hours:.1f} hours)")
        
        # Get all activities to recategorize
        print("\n🔍 Fetching activities to recategorize...")
        activities = conn.execute(text("""
            SELECT 
                id,
                window_title,
                application_name,
                url
            FROM activity_records
            WHERE timestamp >= CURRENT_DATE - INTERVAL '30 days'
        """))
        
        updates = {
            'productive': 0,
            'browser': 0,
            'server': 0,
            'non-work': 0
        }
        
        # Process in batches
        batch_size = 1000
        batch = []
        
        for activity in activities:
            # Get new category using your categorizer
            category_info = categorizer.get_detailed_category(
                activity.window_title or "",
                activity.application_name or ""
            )
            
            batch.append({
                'id': activity.id,
                'category': category_info['category'],
                'subcategory': category_info['subcategory']
            })
            
            updates[category_info['category']] += 1
            
            # Process batch when it's full
            if len(batch) >= batch_size:
                process_batch(conn, batch)
                batch = []
        
        # Process remaining items
        if batch:
            process_batch(conn, batch)
        
        # Commit all changes
        conn.commit()
        
        print("\n✅ Update completed!")
        print(f"\nActivities updated:")
        for cat, count in updates.items():
            print(f"  {cat}: {count} activities")
        
        # Show new distribution
        print("\n📊 New category distribution:")
        result = conn.execute(text("""
            SELECT 
                category,
                COUNT(*) as count,
                SUM(duration)/3600.0 as hours
            FROM activity_records
            WHERE timestamp >= CURRENT_DATE - INTERVAL '30 days'
            GROUP BY category
            ORDER BY hours DESC
        """))
        
        for row in result:
            print(f"  {row.category}: {row.count} activities ({row.hours:.1f} hours)")
        
        # Show what's now in browser category
        print("\n🌐 Sample activities now in BROWSER category:")
        browser_samples = conn.execute(text("""
            SELECT DISTINCT
                window_title,
                application_name
            FROM activity_records
            WHERE category = 'browser'
            AND timestamp >= CURRENT_DATE - INTERVAL '7 days'
            AND (
                LOWER(window_title) LIKE '%gmail%'
                OR LOWER(window_title) LIKE '%youtube%'
                OR LOWER(window_title) LIKE '%facebook%'
                OR LOWER(window_title) LIKE '%instagram%'
            )
            LIMIT 10
        """))
        
        for row in browser_samples:
            print(f"  • {row.window_title} ({row.application_name})")

def process_batch(conn, batch):
    """Process a batch of updates"""
    for item in batch:
        conn.execute(text("""
            UPDATE activity_records
            SET category = :category,
                subcategory = :subcategory
            WHERE id = :id
        """), item)


if __name__ == "__main__":
    update_categories()
    print("\n🎉 Done! Gmail, YouTube, and other non-productive activities have been moved out of the productive category.")
    print("Restart your dashboard to see the changes.")
