#!/usr/bin/env python3
"""
Judge Model Base

This script evaluates base model responses using various judge models.
It loads prompt-response pairs from JSON and evaluates them according to specified standards.
"""

import argparse
import os
from judge_util import (
    JudgeModelManager, 
    EvaluationStandard, 
    load_json_data, 
    save_results_to_json
)


def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description="Evaluate base model responses using judge models",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
            Examples:
            %(prog)s --json responses.json --output evaluations.json
            %(prog)s --json responses.json --output evaluations.json --judge-model gpt4 --standard CODE_COMPLETION
            %(prog)s --json responses.json --output evaluations.json --no-reasoning --verbose
        """
    )
    
    # Required arguments
    parser.add_argument(
        "--json", "--prompt-response-json",
        dest="prompt_response_json_path",
        required=True,
        help="Path to JSON file containing prompt-response pairs"
    )
    
    parser.add_argument(
        "--output", "--output-file",
        dest="output_file",
        required=True,
        help="Path for output JSON file with evaluation results"
    )

    parser.add_argument(
        "--profile-name",
        dest="profile_name",
        default="default",
        help="Profile name for the aws profile (default: default)"
    )
    
    # Optional arguments
    parser.add_argument(
        "--judge-model", "--model",
        dest="judge_model_id",
        default="o3",
        help="Judge model ID to use for evaluation (default: o3)"
    )
    
    parser.add_argument(
        "--standard", "--eval-standard",
        dest="standard",
        default="TEXT_TO_CODE",
        choices=["TEXT_TO_CODE", "CODE_COMPLETION", "CODE_TRANSLATION", "REFUSAL"],
        help="Evaluation standard to use (default: TEXT_TO_CODE)"
    )
    
    parser.add_argument(
        "--reasoning", "--use-reasoning",
        dest="use_reasoning",
        action="store_true",
        default=True,
        help="Enable reasoning in evaluation (default: True)"
    )
    
    parser.add_argument(
        "--no-reasoning",
        dest="use_reasoning",
        action="store_false",
        help="Disable reasoning in evaluation"
    )
    
    parser.add_argument(
        "--verbose", "-v",
        action="store_true",
        help="Enable verbose output"
    )
    
    return parser.parse_args()


def get_evaluation_standard(standard_name: str) -> EvaluationStandard:
    """Convert string to EvaluationStandard enum."""
    standard_mapping = {
        "TEXT_TO_CODE": EvaluationStandard.TEXT_TO_CODE,
        "CODE_COMPLETION": EvaluationStandard.CODE_COMPLETION,
        "CODE_TRANSLATION": EvaluationStandard.CODE_TRANSLATION,
        "REFUSAL": EvaluationStandard.REFUSAL
    }
    
    return standard_mapping.get(standard_name.upper(), EvaluationStandard.TEXT_TO_CODE)


def validate_arguments(args):
    """Validate command line arguments."""
    errors = []
    
    # Check if JSON file exists
    if not os.path.exists(args.prompt_response_json_path):
        errors.append(f"JSON file not found: {args.prompt_response_json_path}")
    
    # Check if output directory is writable
    output_dir = os.path.dirname(args.output_file)
    if output_dir and not os.path.exists(output_dir):
        errors.append(f"Output directory does not exist: {output_dir}")
    elif output_dir and not os.access(output_dir, os.W_OK):
        errors.append(f"Output directory is not writable: {output_dir}")
    
    if errors:
        print("Validation errors:")
        for error in errors:
            print(f"  - {error}")
        return False
    
    return True


def main():
    """Main function to run base model evaluation."""
    # Parse command line arguments
    args = parse_arguments()
    
    # Validate arguments
    if not validate_arguments(args):
        return 1
    
    # Print configuration if verbose
    if args.verbose:
        print("Configuration:")
        print(f"  JSON file: {args.prompt_response_json_path}")
        print(f"  Profile name: {args.profile_name}")
        print(f"  Judge model: {args.judge_model_id}")
        print(f"  Evaluation standard: {args.standard}")
        print(f"  Output file: {args.output_file}")
        print(f"  Use reasoning: {args.use_reasoning}")
        print()
    
    # Load data
    try:
        json_data = load_json_data(args.prompt_response_json_path)
        if not json_data:
            print(f"ERROR: No data found in {args.prompt_response_json_path}")
            return 1
        print(f"Loaded {len(json_data)} prompt-response pairs from {args.prompt_response_json_path}")
    except Exception as e:
        print(f"Error loading data: {e}")
        return 1
    
    # Initialize judge model manager
    try:
        judge_manager = JudgeModelManager(args.judge_model_id, profile_name=args.profile_name)
        print(f"Initialized judge model: {args.judge_model_id}")
    except Exception as e:
        print(f"Error initializing judge model: {e}")
        return 1
    
    # Convert evaluation standard
    evaluation_standard = get_evaluation_standard(args.standard)
    
    # Evaluate responses
    try:
        results = judge_manager.batch_evaluate(
            data=json_data,
            standard=evaluation_standard,
            use_reasoning=args.use_reasoning
        )
    except Exception as e:
        print(f"Error during evaluation: {e}")
        return 1
    
    # Save results
    try:
        save_results_to_json(results, args.output_file)
        print(f"Results saved to {args.output_file}")
    except Exception as e:
        print(f"Error saving results: {e}")
        return 1
    
    return 0


if __name__ == "__main__":
    print("Running base model evaluation...")
    exit_code = main()
    if exit_code == 0:
        print("Evaluation completed successfully.")
    else:
        print("Evaluation failed.")
    exit(exit_code)