#!/usr/bin/env python3
"""
Debug User Data Script
This script helps debug user data inconsistencies between desktop app and website.
"""

import os
import sys
from datetime import datetime
from pymongo import MongoClient
from bson import ObjectId

def get_mongodb_connection():
    """Get MongoDB connection using environment variables"""
    try:
        mongo_uri = os.getenv('MONGODB_URI')
        mongo_db = os.getenv('MONGODB_DB')
        
        if not mongo_uri or not mongo_db:
            print("❌ MongoDB configuration not found in environment variables")
            print("Please set MONGODB_URI and MONGODB_DB environment variables")
            return None, None
            
        client = MongoClient(mongo_uri)
        db = client[mongo_db]
        
        # Test connection
        client.server_info()
        print(f"✅ Connected to MongoDB: {mongo_db}")
        return client, db
        
    except Exception as e:
        print(f"❌ Failed to connect to MongoDB: {e}")
        return None, None

def debug_codegen_recordings(db):
    """Debug codegen recordings to see what's causing the inconsistency"""
    print("\n🔍 Debugging Codegen Recordings...")
    
    codegen_recordings = db.codegen_recordings
    
    # Get all recordings with their user IDs
    all_recordings = list(codegen_recordings.find({}, {
        "_id": 1, "name": 1, "userId": 1, "currentJWTUserId": 1, "user_id": 1, "date": 1
    }).sort("date", -1).limit(20))
    
    print(f"📊 Found {len(all_recordings)} recent recordings:")
    
    for i, recording in enumerate(all_recordings, 1):
        print(f"\n{i}. Recording: {recording.get('name', 'Unknown')}")
        print(f"   ID: {recording['_id']}")
        print(f"   Date: {recording.get('date', 'Unknown')}")
        print(f"   userId: {recording.get('userId', 'None')}")
        print(f"   currentJWTUserId: {recording.get('currentJWTUserId', 'None')}")
        print(f"   user_id: {recording.get('user_id', 'None')}")
    
    # Group by user ID to see the distribution
    user_groups = {}
    for recording in all_recordings:
        user_id = recording.get('userId') or recording.get('currentJWTUserId') or recording.get('user_id')
        if user_id:
            if user_id not in user_groups:
                user_groups[user_id] = []
            user_groups[user_id].append(recording)
    
    print(f"\n📊 Recordings grouped by user ID:")
    for user_id, recordings in user_groups.items():
        print(f"   User ID: {user_id} - {len(recordings)} recordings")
        for recording in recordings[:3]:  # Show first 3
            print(f"     - {recording.get('name', 'Unknown')} ({recording.get('date', 'Unknown')})")
    
    return all_recordings, user_groups

def debug_test_cases(db):
    """Debug test cases to see what's causing the inconsistency"""
    print("\n🔍 Debugging Test Cases...")
    
    test_cases = db.test_cases
    
    # Get all test cases with their user IDs
    all_test_cases = list(test_cases.find({}, {
        "_id": 1, "source_type": 1, "user_id": 1, "userId": 1, "currentJWTUserId": 1, "created_at": 1
    }).sort("created_at", -1).limit(20))
    
    print(f"📊 Found {len(all_test_cases)} recent test cases:")
    
    for i, test_case in enumerate(all_test_cases, 1):
        print(f"\n{i}. Test Case: {test_case.get('_id', 'Unknown')}")
        print(f"   Source Type: {test_case.get('source_type', 'Unknown')}")
        print(f"   Created: {test_case.get('created_at', 'Unknown')}")
        print(f"   user_id: {test_case.get('user_id', 'None')}")
        print(f"   userId: {test_case.get('userId', 'None')}")
        print(f"   currentJWTUserId: {test_case.get('currentJWTUserId', 'None')}")
    
    # Group by user ID
    user_groups = {}
    for test_case in all_test_cases:
        user_id = test_case.get('user_id') or test_case.get('userId') or test_case.get('currentJWTUserId')
        if user_id:
            if user_id not in user_groups:
                user_groups[user_id] = []
            user_groups[user_id].append(test_case)
    
    print(f"\n📊 Test cases grouped by user ID:")
    for user_id, test_cases_list in user_groups.items():
        print(f"   User ID: {user_id} - {len(test_cases_list)} test cases")
    
    return all_test_cases, user_groups

def debug_generated_scripts(db):
    """Debug generated scripts to see what's causing the inconsistency"""
    print("\n🔍 Debugging Generated Scripts...")
    
    generated_scripts = db.generated_scripts
    
    # Get all scripts with their user IDs
    all_scripts = list(generated_scripts.find({}, {
        "_id": 1, "source_type": 1, "user_id": 1, "userId": 1, "currentJWTUserId": 1, "created_at": 1
    }).sort("created_at", -1).limit(20))
    
    print(f"📊 Found {len(all_scripts)} recent generated scripts:")
    
    for i, script in enumerate(all_scripts, 1):
        print(f"\n{i}. Script: {script.get('_id', 'Unknown')}")
        print(f"   Source Type: {script.get('source_type', 'Unknown')}")
        print(f"   Created: {script.get('created_at', 'Unknown')}")
        print(f"   user_id: {script.get('user_id', 'None')}")
        print(f"   userId: {script.get('userId', 'None')}")
        print(f"   currentJWTUserId: {script.get('currentJWTUserId', 'None')}")
    
    # Group by user ID
    user_groups = {}
    for script in all_scripts:
        user_id = script.get('user_id') or script.get('userId') or script.get('currentJWTUserId')
        if user_id:
            if user_id not in user_groups:
                user_groups[user_id] = []
            user_groups[user_id].append(script)
    
    print(f"\n📊 Generated scripts grouped by user ID:")
    for user_id, scripts_list in user_groups.items():
        print(f"   User ID: {user_id} - {len(scripts_list)} scripts")
    
    return all_scripts, user_groups

def main():
    """Main function to debug user data"""
    print("🚀 Starting User Data Debug")
    print("=" * 50)
    
    # Get MongoDB connection
    client, db = get_mongodb_connection()
    if client is None or db is None:
        return
    
    try:
        # Debug each collection
        recordings, recording_groups = debug_codegen_recordings(db)
        test_cases, test_case_groups = debug_test_cases(db)
        scripts, script_groups = debug_generated_scripts(db)
        
        # Summary
        print("\n" + "=" * 50)
        print("📋 SUMMARY")
        print("=" * 50)
        
        all_user_ids = set()
        all_user_ids.update(recording_groups.keys())
        all_user_ids.update(test_case_groups.keys())
        all_user_ids.update(script_groups.keys())
        
        print(f"🔍 Found {len(all_user_ids)} unique user IDs across all collections:")
        for user_id in sorted(all_user_ids):
            recordings_count = len(recording_groups.get(user_id, []))
            test_cases_count = len(test_case_groups.get(user_id, []))
            scripts_count = len(script_groups.get(user_id, []))
            
            print(f"   User ID: {user_id}")
            print(f"     - Recordings: {recordings_count}")
            print(f"     - Test Cases: {test_cases_count}")
            print(f"     - Scripts: {scripts_count}")
        
        print("\n💡 RECOMMENDATIONS:")
        print("1. If you see multiple user IDs, you may need to migrate data between them")
        print("2. Run the fix_user_data_consistency.py script to standardize user IDs")
        print("3. Check if the website and desktop app are using different user IDs")
        print("4. Ensure both applications are using the same authentication system")
        
    except Exception as e:
        print(f"❌ Error during debug: {e}")
    finally:
        if client:
            client.close()

if __name__ == "__main__":
    main()
