import os
import sys
from typing import Any, Dict, List
from datasets import load_dataset
import json

# Add parent directory to path to allow imports from `evaluation`
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from evaluation.commonsense_constraint import evaluation as commonsense_eval


class Evaluator:
    def __init__(self) -> None:
        """
        Initializes the evaluator.
        """
        pass

    def evaluate_plan(
        self, task_data: Dict[str, Any], plan: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        """
        Evaluates a given plan against the commonsense constraints defined by the task.

        Args:
            task_data: A dictionary containing the query data (budget, constraints, etc.).
            plan: A list of dictionaries, where each dictionary represents a day's plan.

        Returns:
            A dictionary with the results of the commonsense constraint evaluation.
        """
        print("  Running live evaluation on the proposed plan...")
        try:
            # The commonsense_eval function from the original benchmark
            commonsense_constraint_results = commonsense_eval(task_data, plan)
            print("  Evaluation complete.")
            return commonsense_constraint_results
        except Exception as e:
            print(f"  Error during evaluation: {e}")
            return {"error": str(e)}


if __name__ == "__main__":
    print("Running evaluator unit test...")
    evaluator = Evaluator()
    
    # Load the validation dataset from Hugging Face
    print("Loading task data from osunlp/TravelPlanner...")
    query_data_list = load_dataset('osunlp/TravelPlanner', 'train', trust_remote_code=True)['train']
    task_data = query_data_list[0]
    print("Task data loaded successfully.")

    plan = [{"days": 1, "current_city": "from St. Petersburg to Rockford", "transportation": "Flight Number: F3573659, from St. Petersburg to Rockford, Departure Time: 15:40, Arrival Time: 17:04", "breakfast": "-", "attraction": "-", "lunch": "-", "dinner": "Flying Mango, Rockford", "accommodation": "Private Room in a two bedroom apt., Rockford"}, {"day": 2, "current_city": "Rockford", "transportation": "-", "breakfast": "Nutri Punch, Rockford", "attraction": "Anderson Japanese Gardens, Rockford; Burpee Museum of Natural History, Rockford", "lunch": "Cafe Southall, Rockford", "dinner": "Coco Bambu, Rockford", "accommodation": "Private Room in a two bedroom apt., Rockford"}, {"day": 3, "current_city": "Rockford", "transportation": "-", "breakfast": "Dunkin' Donuts, Rockford", "attraction": "Discovery Center Museum, Rockford; Nicholas Conservatory & Gardens, Rockford", "lunch": "Aroma Rest O Bar, Rockford", "dinner": "Gajalee Sea Food, Rockford", "accommodation": "-"}]

    print("\n--- Evaluating Plan ---")
    print(f"Task: Travel from {task_data['org']} to {task_data['dest']} for {task_data['days']} days.")
    
    results = evaluator.evaluate_plan(task_data, plan)

    print("\n--- Evaluation Results ---")
    if "error" in results:
        print(f"An error occurred: {results['error']}")
    else:
        for constraint, (passed, reason) in results.items():
            status = "✅ PASSED" if passed else "❌ FAILED"
            message = f"- {constraint}: {status}"
            if not passed and reason:
                message += f"\n  Reason: {reason}"
            print(message)
    print("--------------------------")
    