# activity_categorizer.py
"""
Four Categories:
1) PRODUCTIVE  → All work-related activities
2) BROWSER     → YouTube, Gmail, social, entertainment, search, ALL MAIL
3) SERVER      → AWS, GCP, Azure, SSH, Docker, monitoring tools
4) NON-WORK    → Untitled, system lock / idle / AFK / screensaver (ActivityWatch)
"""

import re
from typing import Dict, List, Tuple


class ActivityCategorizer:
    def __init__(self):

        # 🟥 NON-WORK (Untitled + ActivityWatch Lock / Idle / AFK / Screensaver)
        self.non_work_keywords = [
            # Untitled windows - MUST BE FIRST
            "untitled", "new tab", "blank", "empty",
            
            # Windows Lock Screen
            "lockapp.exe", "lockapp",
            "lock screen", "sessionlock", "session locked",
            "windows default lock screen",

            # Idle / AFK states
            "idle", "idle-time", "afk", "away",
            "not active", "userinactive",
            "no active window",

            # Screensaver
            "screensaver", "screen saver"
        ]

        # 🟧 BROWSER (non-productive browsing + ALL EMAIL)
        self.browser_keywords = [
            # Email - ALL variations
            "inbox", "compose", "mail", "email",
            "@firsteconomy.com", "@gmail.com", "@yahoo.com", 
            "@outlook.com", "@hotmail.com",
            "first economy mail", "gmail", "yahoo mail", "outlook",
            "protonmail", "thunderbird", "webmail",
            
            # Video / Entertainment
            "youtube", "youtu.be", "netflix", "amazon prime",
            "primevideo", "hotstar", "spotify", "twitch",

            # Social Media
            "facebook", "instagram", "snapchat", "tiktok",
            "pinterest", "reddit", "twitter", "x.com",
            "whatsapp", "telegram",

            # Search engines
            "google.com/search", "bing.com/search",
            "duckduckgo", "?q=", "&q=", "search?q="
        ]

        # 🟦 SERVER / DEVOPS
        self.server_keywords = [
            "aws", "ec2", "s3", "lambda", "iam", "cloudwatch",
            "azure", "microsoft azure",
            "gcp", "google cloud", "firebase",
            "digitalocean", "droplet",
            "linode", "vultr",
            "vercel", "netlify",
            "cloudflare", "godaddy", "namecheap",

            # DevOps tools
            "jenkins", "github actions", "gitlab ci", "circleci",
            "docker", "kubernetes", "k8s", "pods", "cluster",

            # Monitoring
            "grafana", "prometheus", "datadog", "new relic", "sentry",

            # Remote Access
            "ssh", "rdp", "vnc", "teamviewer", "anydesk", "openvpn"
        ]

        # 🟩 PRODUCTIVE (default)
        self.productive_keywords = [
            # IDEs
            "visual studio", "vs code", "vscode", "cursor",
            "intellij", "pycharm", "phpstorm", "webstorm",

            # Coding files
            ".py", ".js", ".ts", ".php", ".jsx", ".tsx",
            ".html", ".css", ".json", ".sql",

            # Local development servers
            "localhost", "127.0.0.1",
            ":3000", ":8000", ":5000", ":4200",

            # Developer tools
            "postman", "insomnia",
            "github.com", "gitlab.com", "bitbucket",
            "stack overflow",

            # AI tools
            "chatgpt", "claude", "bard", "perplexity", "phind",

            # PM tools
            "jira", "notion", "trello", "asana",
            "confluence", "clickup",

            # Design
            "figma", "adobe xd", "photoshop", "illustrator",

            # Dev server tools
            "cpanel", "phpmyadmin", "filezilla"
        ]

    def categorize_activity(self, window_title: str, app_name: str = "") -> Tuple[str, float]:
        text = f"{window_title} {app_name}".lower()
        
        # SPECIAL CHECK: If window_title is exactly "Untitled" or variations
        if window_title.lower().strip() in ["untitled", "new tab", "blank", ""]:
            return ("non-work", 1.0)

        # 1️⃣ NON-WORK (system lock / idle / screensaver / untitled)
        for word in self.non_work_keywords:
            if word in text:
                return ("non-work", 1.0)

        # 2️⃣ BROWSER (INCLUDES ALL EMAIL)
        # Check for email indicators FIRST
        if "@" in text or "inbox" in text.lower() or "mail" in text.lower():
            return ("browser", 1.0)
        
        for word in self.browser_keywords:
            if word in text:
                return ("browser", 0.95)

        # 3️⃣ SERVER
        for word in self.server_keywords:
            if word in text:
                return ("server", 0.95)

        # 4️⃣ PRODUCTIVE (default for everything else)
        # But double-check it's not an email that slipped through
        if any(email_indicator in text for email_indicator in ["@", "inbox", "compose mail", "first economy"]):
            return ("browser", 0.95)
            
        return ("productive", 0.90)

    def get_detailed_category(self, window_title: str, app_name: str = "") -> Dict:
        category, confidence = self.categorize_activity(window_title, app_name)
        text = f"{window_title} {app_name}".lower()

        if category == "non-work":
            if "untitled" in text.lower():
                sub = "untitled"
            elif "lock" in text:
                sub = "system-lock"
            elif "idle" in text or "afk" in text:
                sub = "idle"
            else:
                sub = "non-work"
        elif category == "browser":
            if "@" in text or "inbox" in text or "mail" in text:
                sub = "email"
            elif "youtube" in text:
                sub = "entertainment"
            elif "?q=" in text or "search" in text:
                sub = "search"
            else:
                sub = "general-browsing"
        elif category == "server":
            if "aws" in text:
                sub = "aws"
            elif "azure" in text:
                sub = "azure"
            elif "gcp" in text:
                sub = "gcp"
            else:
                sub = "server-tools"
        else:  # PRODUCTIVE
            if "vscode" in text or "code.exe" in text:
                sub = "coding"
            elif "localhost" in text:
                sub = "dev-server"
            elif "postman" in text:
                sub = "api-testing"
            elif "figma" in text:
                sub = "design"
            else:
                sub = "productive-general"

        return {
            "category": category,
            "subcategory": sub,
            "confidence": confidence,
            "window_title": window_title,
            "app_name": app_name
        }

    def categorize_batch(self, activities: List[Dict]) -> List[Dict]:
        categorized = []
        for activity in activities:
            info = self.get_detailed_category(
                activity.get("window_title", ""),
                activity.get("application_name", "")
            )
            activity.update(info)
            categorized.append(activity)

        return categorized


# Example usage and tests
if __name__ == "__main__":
    categorizer = ActivityCategorizer()
    
    # Test cases to verify the new categorization
    test_cases = [
        ("Untitled", "Unknown"),  # Should be non-work
        ("Inbox (11,928) - ankita@firsteconomy.com - First Economy Mail", "chrome.exe"),  # Should be browser
        ("YouTube - Google Chrome", "chrome.exe"),  # Should be browser
        ("Gmail - Inbox - Google Chrome", "chrome.exe"),  # Should be browser
        ("claude.ai - Claude", "chrome.exe"),  # Should be productive
        ("Dashboard - WAAREE Admin", "chrome.exe"),  # Should be productive
        ("Timesheet App", "chrome.exe"),  # Should be productive
        ("ChatGPT", "chrome.exe"),  # Should be productive
        ("Stack Overflow - Python question", "chrome.exe"),  # Should be productive
        ("cPanel - Web Hosting Control Panel", "chrome.exe"),  # Should be productive
        ("FileZilla - FTP Client", "filezilla.exe"),  # Should be productive
        ("Google Search: python tutorial", "chrome.exe"),  # Should be browser
        ("Facebook", "chrome.exe"),  # Should be browser
        ("main.py - Visual Studio Code", "code.exe"),  # Should be productive
        ("localhost:3000 - React App", "chrome.exe"),  # Should be productive
        ("Untitled - Notepad", "notepad.exe"),  # Should be non-work
        ("waaree_main - live@waaree.com@103.174.103.109 - FileZilla", "filezilla.exe"),  # Should be productive
    ]
    
    print("Testing categorization:")
    print("-" * 60)
    for title, app in test_cases:
        category_info = categorizer.get_detailed_category(title, app)
        print(f"Title: {title}")
        print(f"App: {app}")
        print(f"Category: {category_info['category']} ({category_info['subcategory']})")
        print(f"Confidence: {category_info['confidence']}")
        print("-" * 60)
