#!/usr/bin/env python3
"""
Simple script to run Android GUI Control Task evaluation
Compatible with test.py output format
"""

import json
import os
import sys

# Add the parent directory to the path to allow imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from evaluation.evaluate import AndroidControlEvaluator

def main():
    # Configuration - modify these paths as needed
    gt_path = ''
    
    # You can specify a specific test result file or use the latest one
    # Option 1: Use a specific file
    # pred_path = ''
    
    # Option 2: Use the latest test.py result (recommended)
    pred_path = 'eval_results/batch_evaluation_results_20250909_134931.json'  # Update timestamp as needed
    # gt_path = 'data/gui_odyssey.json'
    
    output_path = 'eval_results/evaluation_results.json'
    
    # Check if files exist
    if not os.path.exists(gt_path):
        print(f"Error: Ground truth file not found: {gt_path}")
        return
    
    if not os.path.exists(pred_path):
        print(f"Error: Prediction file not found: {pred_path}")
        print("Please run test.py first to generate prediction results, or update the pred_path")
        return
    
    # Load data
    print(f"Loading ground truth data from: {gt_path}")
    with open(gt_path, 'r') as f:
        gt_data = json.load(f)
    
    print(f"Loading prediction data from: {pred_path}")
    with open(pred_path, 'r') as f:
        pred_data = json.load(f)
    
    print(f"Loaded {len(gt_data)} ground truth episodes and {len(pred_data)} prediction episodes")
    
    # Initialize evaluator
    evaluator = AndroidControlEvaluator()
    
    # Run evaluation using test.py format
    print("\nStarting evaluation...")
    metrics = evaluator.evaluate_test_results(gt_data, pred_data)
    
    # Print and save results
    evaluator.print_results(metrics)
    evaluator.save_results(metrics, output_path)
    
    print("\nEvaluation completed!")

if __name__ == "__main__":
    main() 