#!/usr/bin/env python3
"""
Data Consistency Fix Script
This script helps fix data inconsistency between desktop app and website
by migrating data to use consistent user IDs.
"""

import os
import sys
from datetime import datetime
from pymongo import MongoClient
from bson import ObjectId

def get_mongodb_connection():
    """Get MongoDB connection using environment variables"""
    try:
        mongo_uri = os.getenv('MONGODB_URI')
        mongo_db = os.getenv('MONGODB_DB')
        
        if not mongo_uri or not mongo_db:
            print("❌ MongoDB configuration not found in environment variables")
            print("Please set MONGODB_URI and MONGODB_DB environment variables")
            return None, None
            
        client = MongoClient(mongo_uri)
        db = client[mongo_db]
        
        # Test connection
        client.server_info()
        print(f"✅ Connected to MongoDB: {mongo_db}")
        return client, db
        
    except Exception as e:
        print(f"❌ Failed to connect to MongoDB: {e}")
        return None, None

def analyze_user_data(db):
    """Analyze current user data to identify inconsistencies"""
    print("\n🔍 Analyzing user data...")
    
    # Check test_cases collection
    test_cases = db.test_cases
    total_test_cases = test_cases.count_documents({})
    
    # Count by user ID field
    user_id_count = test_cases.count_documents({"user_id": {"$exists": True}})
    user_id_legacy_count = test_cases.count_documents({"userId": {"$exists": True}})
    current_jwt_count = test_cases.count_documents({"currentJWTUserId": {"$exists": True}})
    
    print(f"📊 Test Cases Analysis:")
    print(f"   Total test cases: {total_test_cases}")
    print(f"   With 'user_id' field: {user_id_count}")
    print(f"   With 'userId' field: {user_id_legacy_count}")
    print(f"   With 'currentJWTUserId' field: {current_jwt_count}")
    
    # Check generated_scripts collection
    generated_scripts = db.generated_scripts
    total_scripts = generated_scripts.count_documents({})
    
    script_user_id_count = generated_scripts.count_documents({"user_id": {"$exists": True}})
    script_user_id_legacy_count = generated_scripts.count_documents({"userId": {"$exists": True}})
    script_current_jwt_count = generated_scripts.count_documents({"currentJWTUserId": {"$exists": True}})
    
    print(f"\n📊 Generated Scripts Analysis:")
    print(f"   Total scripts: {total_scripts}")
    print(f"   With 'user_id' field: {script_user_id_count}")
    print(f"   With 'userId' field: {script_user_id_legacy_count}")
    print(f"   With 'currentJWTUserId' field: {script_current_jwt_count}")
    
    # Check codegen_recordings collection
    codegen_recordings = db.codegen_recordings
    total_recordings = codegen_recordings.count_documents({})
    
    recording_user_id_count = codegen_recordings.count_documents({"userId": {"$exists": True}})
    recording_current_jwt_count = codegen_recordings.count_documents({"currentJWTUserId": {"$exists": True}})
    
    print(f"\n📊 Codegen Recordings Analysis:")
    print(f"   Total recordings: {total_recordings}")
    print(f"   With 'userId' field: {recording_user_id_count}")
    print(f"   With 'currentJWTUserId' field: {recording_current_jwt_count}")
    
    return {
        'test_cases': {
            'total': total_test_cases,
            'user_id': user_id_count,
            'userId': user_id_legacy_count,
            'currentJWTUserId': current_jwt_count
        },
        'generated_scripts': {
            'total': total_scripts,
            'user_id': script_user_id_count,
            'userId': script_user_id_legacy_count,
            'currentJWTUserId': script_current_jwt_count
        },
        'codegen_recordings': {
            'total': total_recordings,
            'userId': recording_user_id_count,
            'currentJWTUserId': recording_current_jwt_count
        }
    }

def find_user_id_mappings(db):
    """Find potential user ID mappings between different formats"""
    print("\n🔍 Finding user ID mappings...")
    
    # Get all unique user IDs from different fields
    test_cases = db.test_cases
    
    # Find documents with multiple user ID fields
    multi_user_docs = list(test_cases.find({
        "$or": [
            {"user_id": {"$exists": True}, "userId": {"$exists": True}},
            {"user_id": {"$exists": True}, "currentJWTUserId": {"$exists": True}},
            {"userId": {"$exists": True}, "currentJWTUserId": {"$exists": True}}
        ]
    }).limit(10))
    
    print(f"📋 Found {len(multi_user_docs)} documents with multiple user ID fields:")
    for doc in multi_user_docs[:5]:  # Show first 5
        print(f"   Document {doc['_id']}:")
        if 'user_id' in doc:
            print(f"     user_id: {doc['user_id']}")
        if 'userId' in doc:
            print(f"     userId: {doc['userId']}")
        if 'currentJWTUserId' in doc:
            print(f"     currentJWTUserId: {doc['currentJWTUserId']}")
    
    return multi_user_docs

def migrate_user_data(db, source_user_id, target_user_id):
    """Migrate data from source user ID to target user ID"""
    print(f"\n🔄 Migrating data from {source_user_id} to {target_user_id}...")
    
    # Migrate test_cases
    test_cases_result = db.test_cases.update_many(
        {
            "$or": [
                {"userId": source_user_id},
                {"currentJWTUserId": source_user_id}
            ]
        },
        {
            "$set": {
                "user_id": target_user_id,
                "userId": target_user_id,
                "currentJWTUserId": target_user_id
            }
        }
    )
    print(f"✅ Migrated {test_cases_result.modified_count} test cases")
    
    # Migrate generated_scripts
    scripts_result = db.generated_scripts.update_many(
        {
            "$or": [
                {"userId": source_user_id},
                {"currentJWTUserId": source_user_id}
            ]
        },
        {
            "$set": {
                "user_id": target_user_id,
                "userId": target_user_id,
                "currentJWTUserId": target_user_id
            }
        }
    )
    print(f"✅ Migrated {scripts_result.modified_count} generated scripts")
    
    # Migrate codegen_recordings
    recordings_result = db.codegen_recordings.update_many(
        {
            "$or": [
                {"userId": source_user_id},
                {"currentJWTUserId": source_user_id}
            ]
        },
        {
            "$set": {
                "userId": target_user_id,
                "currentJWTUserId": target_user_id
            }
        }
    )
    print(f"✅ Migrated {recordings_result.modified_count} codegen recordings")
    
    return {
        'test_cases': test_cases_result.modified_count,
        'scripts': scripts_result.modified_count,
        'recordings': recordings_result.modified_count
    }

def standardize_user_ids(db):
    """Standardize all user IDs to use consistent field names"""
    print("\n🔧 Standardizing user ID fields...")
    
    # Standardize test_cases
    test_cases_result = db.test_cases.update_many(
        {"userId": {"$exists": True}, "user_id": {"$exists": False}},
        {"$set": {"user_id": "$userId"}}
    )
    print(f"✅ Standardized {test_cases_result.modified_count} test cases")
    
    # Standardize generated_scripts
    scripts_result = db.generated_scripts.update_many(
        {"userId": {"$exists": True}, "user_id": {"$exists": False}},
        {"$set": {"user_id": "$userId"}}
    )
    print(f"✅ Standardized {scripts_result.modified_count} generated scripts")
    
    # For codegen_recordings, we'll keep both fields for compatibility
    recordings_result = db.codegen_recordings.update_many(
        {"userId": {"$exists": True}, "user_id": {"$exists": False}},
        {"$set": {"user_id": "$userId"}}
    )
    print(f"✅ Standardized {recordings_result.modified_count} codegen recordings")

def main():
    """Main function to run the data consistency fix"""
    print("🚀 Starting Data Consistency Fix")
    print("=" * 50)
    
    # Get MongoDB connection
    client, db = get_mongodb_connection()
    if client is None or db is None:
        return
    
    try:
        # Analyze current data
        analysis = analyze_user_data(db)
        
        # Find user ID mappings
        multi_user_docs = find_user_id_mappings(db)
        
        # Ask user if they want to proceed with migration
        print("\n❓ Do you want to standardize user ID fields? (y/n): ", end="")
        response = input().lower().strip()
        
        if response == 'y':
            standardize_user_ids(db)
            print("\n✅ User ID standardization completed!")
        else:
            print("⏭️ Skipping user ID standardization")
        
        # Ask if user wants to migrate specific user IDs
        print("\n❓ Do you want to migrate data between specific user IDs? (y/n): ", end="")
        response = input().lower().strip()
        
        if response == 'y':
            print("Enter source user ID: ", end="")
            source_id = input().strip()
            print("Enter target user ID: ", end="")
            target_id = input().strip()
            
            if source_id and target_id:
                migrate_user_data(db, source_id, target_id)
                print("\n✅ Data migration completed!")
            else:
                print("⏭️ Skipping data migration")
        
        print("\n🎉 Data consistency fix completed!")
        print("\nNext steps:")
        print("1. Restart your website application")
        print("2. Restart your desktop application")
        print("3. Log in again to trigger data migration")
        print("4. Check if data is now consistent between platforms")
        
    except Exception as e:
        print(f"❌ Error during data consistency fix: {e}")
    finally:
        if client:
            client.close()

if __name__ == "__main__":
    main()
