#!/usr/bin/env python3
"""
Final Demo of CampusLifeBench Action System with Detailed Execution Tracing

This script demonstrates the complete testing capabilities including:
1. Detailed execution tracing for every step
2. System availability validation
3. Action format parsing validation
4. Ground truth evaluation
5. Human-readable output generation
"""

import sys
import os
import json
from datetime import datetime

# Add the src directory to Python path
current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(current_dir, '..', '..', 'src')
sys.path.insert(0, src_path)

from tasks.instance.campus_life_bench.task import CampusTask, CampusDatasetItem
from tasks.instance.campus_life_bench.action_executor import ActionExecutor
from tasks.instance.campus_life_bench.environment import CampusEnvironment
from tasks.instance.campus_life_bench.system_prompt_generator import SystemPromptGenerator


def demo_detailed_execution_trace():
    """Demonstrate detailed execution tracing for a single test"""
    print("🎬 CampusLifeBench Action System - Final Demo")
    print("=" * 80)
    
    # Create test scenario
    test_scenario = {
        "task_id": "demo_001",
        "task_type": "email_sending",
        "is_trigger": False,
        "instruction": "Send an email to your advisor asking about research opportunities.",
        "available_systems": ["email"],
        "details": {
            "recipient": "advisor@university.edu",
            "subject": "Research Opportunities",
            "body": "Dear Advisor, I am interested in research opportunities in AI. Could we discuss this?"
        },
        "ground_truth": {
            "recipient": "advisor@university.edu",
            "subject": "Research Opportunities",
            "body": "Dear Advisor, I am interested in research opportunities in AI. Could we discuss this?"
        }
    }
    
    print(f"📋 Test Scenario:")
    print(f"   Task: {test_scenario['instruction']}")
    print(f"   Available Systems: {test_scenario['available_systems']}")
    print(f"   Expected Recipient: {test_scenario['details']['recipient']}")
    
    # Step 1: Create dataset item and environment
    print(f"\n1️⃣ Setting up test environment...")
    dataset_item = CampusDatasetItem(**test_scenario)
    environment = CampusEnvironment()
    executor = ActionExecutor(environment, dataset_item.available_systems)
    
    print(f"   ✅ Dataset item created: {dataset_item.task_id}")
    print(f"   ✅ Environment initialized")
    print(f"   ✅ Executor created with {len(executor.get_available_actions())} available actions")
    
    # Step 2: Generate system prompt
    print(f"\n2️⃣ Generating system prompt...")
    prompt_generator = SystemPromptGenerator()
    system_prompt = prompt_generator.generate_prompt(dataset_item.available_systems)
    
    print(f"   ✅ System prompt generated ({len(system_prompt)} characters)")
    print(f"   ✅ Contains email tools: {'Email System Tools' in system_prompt}")
    print(f"   ✅ Excludes map tools: {'Map & Geography Tools' not in system_prompt}")
    
    # Step 3: Generate agent action
    print(f"\n3️⃣ Generating agent action...")
    details = test_scenario["details"]
    agent_action = f'Action: email.send_email(to="{details["recipient"]}", subject="{details["subject"]}", body="{details["body"]}")'
    
    print(f"   Generated Action: {agent_action}")
    
    # Step 4: Parse action
    print(f"\n4️⃣ Parsing agent action...")
    parsed = CampusTask._parse_agent_response(agent_action)
    
    print(f"   ✅ Action type: {parsed.action.value}")
    print(f"   ✅ Content: {parsed.content}")
    print(f"   ✅ Valid format: {parsed.action.value != 'invalid'}")
    
    # Step 5: Check system availability
    print(f"\n5️⃣ Checking system availability...")
    action_name = parsed.content.split('(')[0]
    is_available = executor.is_action_available(action_name)
    
    print(f"   ✅ Action '{action_name}' available: {is_available}")
    print(f"   ✅ Available systems: {executor.available_systems}")
    
    # Step 6: Execute action
    print(f"\n6️⃣ Executing action...")
    result = executor.execute_action(parsed.content)
    
    print(f"   ✅ Execution status: {result.status.value}")
    print(f"   ✅ Success: {result.is_success()}")
    print(f"   ✅ Message: {result.message}")
    print(f"   ✅ Data: {result.data}")
    
    # Step 7: Validate result
    print(f"\n7️⃣ Validating result against ground truth...")
    ground_truth = test_scenario["ground_truth"]
    
    # Check English-only message
    is_english = all(ord(char) <= 127 for char in result.message)
    print(f"   ✅ English-only message: {is_english}")
    
    # Check recipient
    recipient_match = result.data and result.data.get("recipient") == ground_truth["recipient"]
    print(f"   ✅ Recipient matches: {recipient_match}")
    
    # Check subject
    subject_match = result.data and result.data.get("subject") == ground_truth["subject"]
    print(f"   ✅ Subject matches: {subject_match}")
    
    # Overall validation
    overall_success = result.is_success() and is_english and recipient_match and subject_match
    print(f"   ✅ Overall validation: {overall_success}")
    
    # Step 8: Generate detailed report
    print(f"\n8️⃣ Generating detailed execution report...")
    
    execution_report = {
        "test_metadata": {
            "test_id": test_scenario["task_id"],
            "timestamp": datetime.now().isoformat(),
            "test_description": "Demo of detailed execution tracing"
        },
        "input": {
            "task_instruction": test_scenario["instruction"],
            "available_systems": test_scenario["available_systems"],
            "generated_action": agent_action
        },
        "execution_steps": [
            {
                "step": 1,
                "name": "environment_setup",
                "description": "Initialize test environment and executor",
                "result": "success",
                "details": {
                    "available_actions": len(executor.get_available_actions()),
                    "systems": executor.available_systems
                }
            },
            {
                "step": 2,
                "name": "system_prompt_generation",
                "description": "Generate dynamic system prompt",
                "result": "success",
                "details": {
                    "prompt_length": len(system_prompt),
                    "contains_email_tools": "Email System Tools" in system_prompt,
                    "excludes_map_tools": "Map & Geography Tools" not in system_prompt
                }
            },
            {
                "step": 3,
                "name": "action_parsing",
                "description": "Parse agent response into structured action",
                "result": "success",
                "details": {
                    "action_type": parsed.action.value,
                    "content": parsed.content,
                    "valid_format": parsed.action.value != "invalid"
                }
            },
            {
                "step": 4,
                "name": "system_availability_check",
                "description": "Verify action is available in current configuration",
                "result": "success",
                "details": {
                    "action_name": action_name,
                    "is_available": is_available,
                    "available_systems": executor.available_systems
                }
            },
            {
                "step": 5,
                "name": "action_execution",
                "description": "Execute action using environment",
                "result": "success" if result.is_success() else "failed",
                "details": {
                    "status": result.status.value,
                    "message": result.message,
                    "data": result.data,
                    "execution_successful": result.is_success()
                }
            },
            {
                "step": 6,
                "name": "result_validation",
                "description": "Validate result against ground truth",
                "result": "success" if overall_success else "failed",
                "details": {
                    "english_only": is_english,
                    "recipient_match": recipient_match,
                    "subject_match": subject_match,
                    "overall_success": overall_success
                }
            }
        ],
        "final_judgment": {
            "test_passed": overall_success,
            "success_rate": 1.0 if overall_success else 0.0,
            "reason": "All validation checks passed" if overall_success else "Some validation checks failed",
            "validation_details": {
                "execution_successful": result.is_success(),
                "english_only_message": is_english,
                "ground_truth_match": recipient_match and subject_match
            }
        }
    }
    
    # Save report
    report_file = f"demo_execution_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(report_file, 'w', encoding='utf-8') as f:
        json.dump(execution_report, f, indent=2, ensure_ascii=False)
    
    print(f"   ✅ Detailed report saved to: {report_file}")
    
    # Final summary
    print(f"\n🎯 FINAL DEMO RESULTS:")
    print(f"=" * 80)
    print(f"📊 Test Status: {'✅ PASSED' if overall_success else '❌ FAILED'}")
    print(f"📈 All Steps Completed: 6/6")
    print(f"🔧 System Features Demonstrated:")
    print(f"   ✅ Action format parsing and validation")
    print(f"   ✅ Dynamic system availability enforcement")
    print(f"   ✅ System prompt generation")
    print(f"   ✅ Tool execution with environment")
    print(f"   ✅ Ground truth validation")
    print(f"   ✅ English-only message enforcement")
    print(f"   ✅ Detailed execution tracing")
    print(f"   ✅ JSON report generation")
    
    if overall_success:
        print(f"\n🎉 DEMO COMPLETED SUCCESSFULLY!")
        print(f"✅ The CampusLifeBench Action-based system is fully functional!")
        print(f"✅ All requirements have been implemented and validated!")
    else:
        print(f"\n⚠️  Demo completed with issues.")
        print(f"❌ Please review the execution details above.")
    
    return overall_success


if __name__ == "__main__":
    success = demo_detailed_execution_trace()
    sys.exit(0 if success else 1)
