
import json
from typing import Dict, Any, List

# --- Schema Definition ---

# The agent output is expected to be a JSON string containing simulation results.
# Expected structure:
# {
#   "simulation_id": "run_123",
#   "total_simulation_time": 3600.0,  # seconds
#   "cars_processed": 500,
#   "average_wait_time_seconds": 45.2,
#   "wait_times_per_car": [30.5, 55.1, 40.0, ...] # Optional detailed list
# }

EVALUATION_SCHEMA = {
    "type": "object",
    "properties": {
        "simulation_id": {"type": "string"},
        "total_simulation_time": {"type": "number", "description": "Total time elapsed in seconds."},
        "cars_processed": {"type": "integer", "description": "Total number of vehicles that successfully passed through the intersection(s)."},
        "average_wait_time_seconds": {"type": "number", "description": "The calculated mean waiting time across all processed cars."},
        "wait_times_per_car": {
            "type": "array",
            "items": {"type": "number"},
            "description": "A list containing the individual wait time for every processed car."
        }
    },
    "required": [
        "simulation_id",
        "total_simulation_time",
        "cars_processed",
        "average_wait_time_seconds"
    ]
}

# --- Scoring Logic ---

# Weighting Factors (These are examples; adjust based on specific scenario goals)
WEIGHT_AVG_WAIT_TIME = 0.6
WEIGHT_THROUGHPUT = 0.4

# Benchmarks (These should ideally be provided externally or derived from a baseline)
# For this harness, we define placeholder reference values.
REFERENCE_MAX_CARS = 600  # Target throughput for the simulation duration
REFERENCE_MAX_AVG_WAIT = 60.0  # Acceptable maximum average wait time (seconds)

def parse_agent_output(output: str) -> Dict[str, Any]:
    """Parses the JSON string output from the agent into a Python dictionary."""
    try:
        data = json.loads(output)
        
        # Basic structural validation (more rigorous validation against the schema
        # might be done by a separate JSON schema validator library if required,
        # but we perform essential checks here).
        
        if not isinstance(data, dict):
            raise ValueError("Output is not a valid JSON object.")
        
        required_keys = EVALUATION_SCHEMA["required"]
        for key in required_keys:
            if key not in data:
                raise KeyError(f"Missing required key in output: {key}")
                
        return data
        
    except json.JSONDecodeError as e:
        raise ValueError(f"Failed to decode JSON output: {e}")
    except Exception as e:
        raise ValueError(f"Parsing error: {e}")

def calculate_score(parsed_data: Dict[str, Any]) -> Dict[str, float]:
    """Calculates normalized scores based on simulation metrics."""
    
    cars_processed = parsed_data['cars_processed']
    avg_wait_time = parsed_data['average_wait_time_seconds']
    
    # 1. Throughput Score (Maximize cars processed)
    # Score ranges from 0 (if 0 cars processed) to 1.0 (if reference is met or exceeded)
    throughput_score = min(1.0, cars_processed / REFERENCE_MAX_CARS)
    
    # 2. Wait Time Score (Minimize average wait time)
    # Score ranges from 1.0 (if wait time is 0 or very low) down to 0.0 (if wait time exceeds the acceptable max)
    if avg_wait_time >= REFERENCE_MAX_AVG_WAIT:
        wait_time_score = 0.0
    else:
        # Linear scaling: 1.0 at 0s wait, 0.0 at REFERENCE_MAX_AVG_WAIT
        wait_time_score = 1.0 - (avg_wait_time / REFERENCE_MAX_AVG_WAIT)
        # Ensure it's not negative due to very small float errors, though mathematically impossible here
        wait_time_score = max(0.0, wait_time_score)

    # 3. Combined Weighted Score (Primary objective measure)
    total_score = (
        (throughput_score * WEIGHT_THROUGHPUT) + 
        (wait_time_score * WEIGHT_AVG_WAIT_TIME)
    )
    
    # Ensure total score is capped at 1.0 (if both metrics overperform their reference points)
    final_score = min(1.0, total_score)
    
    return {
        "throughput_score": throughput_score,
        "wait_time_score": wait_time_score,
        "total_score": final_score,
        "raw_cars_processed": float(cars_processed),
        "raw_average_wait_time": avg_wait_time
    }


def score_output(output: str) -> Dict[str, Any]:
    """
    Main function to evaluate the agent's output based on Traffic Control objectives.

    Args:
        output: The string output generated by the agent (expected to be JSON).

    Returns:
        A dictionary containing the parsed metrics, validation status, and final scores.
    """
    
    result: Dict[str, Any] = {
        "status": "FAILURE",
        "error_message": None,
        "metrics": {},
        "score": 0.0
    }

    try:
        # 1. Parse and validate structure
        parsed_data = parse_agent_output(output)
        
        # 2. Calculate scores
        scores = calculate_score(parsed_data)
        
        result["status"] = "SUCCESS"
        result["metrics"] = parsed_data
        result["score"] = scores["total_score"]
        result["detailed_scores"] = scores
        
    except (ValueError, KeyError) as e:
        result["error_message"] = str(e)
        result["status"] = "ERROR"
        result["score"] = 0.0
        
    return result

if __name__ == '__main__':
    # --- Example Usage ---

    # Example 1: Good performance
    good_output = json.dumps({
        "simulation_id": "sim_001",
        "total_simulation_time": 3600.0,
        "cars_processed": 550,
        "average_wait_time_seconds": 25.0,
        "wait_times_per_car": [10.1, 30.5, 45.0]
    })
    
    print("--- Scoring Good Output ---")
    good_result = score_output(good_output)
    print(json.dumps(good_result, indent=2))
    print(f"Final Score: {good_result['score']:.4f}\n")

    # Example 2: Poor performance (High wait time)
    poor_output = json.dumps({
        "simulation_id": "sim_002",
        "total_simulation_time": 3600.0,
        "cars_processed": 400,
        "average_wait_time_seconds": 80.0,  # Exceeds REFERENCE_MAX_AVG_WAIT (60s)
    })
    
    print("--- Scoring Poor Output ---")
    poor_result = score_output(poor_output)
    print(json.dumps(poor_result, indent=2))
    print(f"Final Score: {poor_result['score']:.4f}\n")

    # Example 3: Malformed output
    bad_output = '{"simulation_id": "malformed", "cars_processed": 100'
    
    print("--- Scoring Bad Output ---")
    bad_result = score_output(bad_output)
    print(json.dumps(bad_result, indent=2))
    print(f"Status: {bad_result['status']}")
