"""
Verify that Gmail, YouTube, and other non-productive items are NOT in the productive category
"""

import os
from sqlalchemy import create_engine, text
from dotenv import load_dotenv

load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://postgres:asdf1234@localhost:5432/timesheet")
engine = create_engine(DATABASE_URL)

def check_productive_category():
    print("🔍 Checking PRODUCTIVE category for non-productive items...")
    print("=" * 70)
    
    with engine.connect() as conn:
        # Check for Gmail in productive
        print("\n📧 Checking for Gmail in productive category:")
        gmail_in_productive = conn.execute(text("""
            SELECT COUNT(*) as count, SUM(duration)/3600.0 as hours
            FROM activity_records
            WHERE category = 'productive'
            AND (
                LOWER(window_title) LIKE '%gmail%'
                OR LOWER(url) LIKE '%mail.google.com%'
            )
            AND timestamp >= CURRENT_DATE - INTERVAL '30 days'
        """)).fetchone()
        
        if gmail_in_productive.count > 0:
            print(f"  ❌ Found {gmail_in_productive.count} Gmail activities in productive ({gmail_in_productive.hours:.1f} hours)")
            print("     These should be in BROWSER category!")
        else:
            print("  ✅ No Gmail found in productive category")
        
        # Check for YouTube in productive
        print("\n📺 Checking for YouTube in productive category:")
        youtube_in_productive = conn.execute(text("""
            SELECT COUNT(*) as count, SUM(duration)/3600.0 as hours
            FROM activity_records
            WHERE category = 'productive'
            AND (
                LOWER(window_title) LIKE '%youtube%'
                OR LOWER(url) LIKE '%youtube.com%'
                OR LOWER(url) LIKE '%youtu.be%'
            )
            AND timestamp >= CURRENT_DATE - INTERVAL '30 days'
        """)).fetchone()
        
        if youtube_in_productive.count > 0:
            print(f"  ❌ Found {youtube_in_productive.count} YouTube activities in productive ({youtube_in_productive.hours:.1f} hours)")
            print("     These should be in BROWSER category!")
        else:
            print("  ✅ No YouTube found in productive category")
        
        # Check for social media in productive
        print("\n💬 Checking for social media in productive category:")
        social_in_productive = conn.execute(text("""
            SELECT COUNT(*) as count, SUM(duration)/3600.0 as hours
            FROM activity_records
            WHERE category = 'productive'
            AND (
                LOWER(window_title) LIKE '%facebook%'
                OR LOWER(window_title) LIKE '%instagram%'
                OR LOWER(window_title) LIKE '%twitter%'
                OR LOWER(window_title) LIKE '%reddit%'
                OR LOWER(window_title) LIKE '%whatsapp%'
            )
            AND timestamp >= CURRENT_DATE - INTERVAL '30 days'
        """)).fetchone()
        
        if social_in_productive.count > 0:
            print(f"  ❌ Found {social_in_productive.count} social media activities in productive ({social_in_productive.hours:.1f} hours)")
            print("     These should be in BROWSER category!")
        else:
            print("  ✅ No social media found in productive category")
        
        # Show what IS in productive category
        print("\n✅ Top 20 activities correctly in PRODUCTIVE category:")
        productive_activities = conn.execute(text("""
            SELECT 
                window_title,
                application_name,
                COUNT(*) as count,
                SUM(duration)/3600.0 as hours
            FROM activity_records
            WHERE category = 'productive'
            AND timestamp >= CURRENT_DATE - INTERVAL '7 days'
            GROUP BY window_title, application_name
            ORDER BY hours DESC
            LIMIT 20
        """))
        
        for i, row in enumerate(productive_activities, 1):
            print(f"  {i}. {row.window_title[:60]} ({row.application_name}) - {row.hours:.1f}h")
        
        # Show what's in browser category
        print("\n🌐 Top 10 activities in BROWSER category (should include Gmail/YouTube):")
        browser_activities = conn.execute(text("""
            SELECT 
                window_title,
                application_name,
                COUNT(*) as count,
                SUM(duration)/3600.0 as hours
            FROM activity_records
            WHERE category = 'browser'
            AND timestamp >= CURRENT_DATE - INTERVAL '7 days'
            GROUP BY window_title, application_name
            ORDER BY hours DESC
            LIMIT 10
        """))
        
        for i, row in enumerate(browser_activities, 1):
            print(f"  {i}. {row.window_title[:60]} ({row.application_name}) - {row.hours:.1f}h")
        
        # Summary
        print("\n📊 Category distribution (last 7 days):")
        distribution = conn.execute(text("""
            SELECT 
                category,
                COUNT(*) as activities,
                COUNT(DISTINCT DATE(timestamp)) as days,
                SUM(duration)/3600.0 as total_hours,
                ROUND(100.0 * SUM(duration) / SUM(SUM(duration)) OVER (), 1) as percentage
            FROM activity_records
            WHERE timestamp >= CURRENT_DATE - INTERVAL '7 days'
            GROUP BY category
            ORDER BY total_hours DESC
        """))
        
        for row in distribution:
            print(f"  {row.category}: {row.total_hours:.1f}h ({row.percentage}%) - {row.activities} activities")


if __name__ == "__main__":
    check_productive_category()
    print("\n💡 If you found non-productive items in the productive category, run:")
    print("   python update_all_categories.py")
