#!/usr/bin/env python3
"""
Judge Model Agent

This script evaluates agent-generated responses using various judge models.
It loads prompts from CSV, reads corresponding workspace directories, and evaluates them.
"""

import argparse
import pandas as pd
import os
import json
from judge_util import (
    JudgeModelManager, 
    EvaluationStandard, 
    save_results_to_json
)


class WorkspaceResponseReader:
    """Handles reading and processing workspace directories generated by agents."""
    
    def __init__(self, max_file_size_mb: int = 10):
        self.max_file_size_mb = max_file_size_mb
        self.max_size_bytes = max_file_size_mb * 1024 * 1024
    
    def get_directory_tree(self, directory: str, prefix: str = "", is_last: bool = True) -> str:
        """Generate a visual tree representation of the directory structure."""
        if not os.path.exists(directory):
            return "Directory not found"
        
        tree_lines = []
        base_name = os.path.basename(directory) or directory
        tree_lines.append(f"{prefix}{'└── ' if is_last else '├── '}{base_name}/")
        
        # Get all items (files and directories) in the current directory
        try:
            items = os.listdir(directory)
        except PermissionError:
            tree_lines.append(f"{prefix}    [Permission Denied]")
            return "\n".join(tree_lines)
        
        # Separate and sort directories and files
        dirs = sorted([item for item in items if os.path.isdir(os.path.join(directory, item))])
        files = sorted([item for item in items if os.path.isfile(os.path.join(directory, item))])
        
        # Combine them (directories first, then files)
        all_items = dirs + files
        
        for i, item in enumerate(all_items):
            item_path = os.path.join(directory, item)
            is_last_item = i == len(all_items) - 1
            
            if os.path.isdir(item_path):
                # Recursively process subdirectories
                new_prefix = prefix + ("    " if is_last else "│   ")
                subtree = self.get_directory_tree(item_path, new_prefix, is_last_item)
                # Skip the first line (directory name) since we'll add it ourselves
                subtree_lines = subtree.split('\n')[1:]
                tree_lines.append(f"{prefix}{'└── ' if is_last_item else '├── '}{item}/")
                tree_lines.extend(subtree_lines)
            else:
                # Add files
                tree_lines.append(f"{prefix}{'└── ' if is_last_item else '├── '}{item}")
        
        return "\n".join(tree_lines)
    
    def read_workspace_response(self, directory: str) -> str:
        """
        Read all files in a workspace directory and return as structured JSON string.
        
        Args:
            directory: Path to the workspace directory
            
        Returns:
            JSON string containing directory tree, file contents, and summary
        """
        # Generate directory tree
        tree_structure = self.get_directory_tree(directory)
        directory_id = directory.split("/")[-1]
        tree_structure = tree_structure.replace(directory_id + "/", "workspace/")
        
        # Read all files
        response_dict = {}
        skipped_files = []
        
        for root, dirs, files in os.walk(directory):
            for file in files:
                file_path = os.path.join(root, file)
                relative_path = os.path.relpath(file_path, directory)
                
                try:
                    # Check file size first
                    file_size = os.path.getsize(file_path)
                    
                    if file_size > self.max_size_bytes:
                        size_mb = file_size / (1024 * 1024)
                        skipped_files.append({
                            "path": relative_path,
                            "size_mb": round(size_mb, 2),
                            "reason": f"File too large ({size_mb:.2f}MB > {self.max_file_size_mb}MB)"
                        })
                        print(f"Skipping large file {relative_path}: {size_mb:.2f}MB")
                        continue
                    
                    # Read the file if it's within size limit
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                        response_dict[relative_path] = content
                        
                except Exception as e:
                    print(f"Error reading file {file_path}: {e}")
                    skipped_files.append({
                        "path": relative_path,
                        "reason": f"Error: {str(e)}"
                    })
                    continue
        
        # Create final response with tree structure, file contents, and skipped files info
        final_response = {
            "directory_tree": tree_structure,
            "files": response_dict,
            "summary": {
                "total_files_read": len(response_dict),
                "files_skipped": len(skipped_files),
                "max_file_size_mb": self.max_file_size_mb
            }
        }
        
        # Add skipped files info if any files were skipped
        if skipped_files:
            final_response["skipped_files"] = skipped_files
        
        return json.dumps(final_response, indent=4, ensure_ascii=False)


def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description="Evaluate agent-generated responses using judge models",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
            Examples:
            %(prog)s --csv data.csv --workspace-dir ./workspaces/ --output results.json
            %(prog)s --csv data.csv --workspace-dir ./workspaces/ --judge-model gpt4 --standard TEXT_TO_CODE
            %(prog)s --csv data.csv --workspace-dir ./workspaces/ --max-file-size 20 --no-reasoning
        """
    )
    
    # Required arguments
    parser.add_argument(
        "--csv", "--prompt-csv",
        dest="prompt_csv_path",
        required=True,
        help="Path to CSV file containing prompts and PIDs"
    )
    
    parser.add_argument(
        "--workspace-dir", "--workspace-base-dir",
        dest="workspace_base_dir",
        required=True,
        help="Base directory containing workspace subdirectories"
    )

    parser.add_argument(
        "--profile-name",
        dest="profile_name",
        default="default",
        help="Profile name for the aws profile (default: default)"
    )
    
    parser.add_argument(
        "--output", "--output-file",
        dest="output_file",
        required=True,
        help="Path for output JSON file with evaluation results"
    )
    
    # Optional arguments
    parser.add_argument(
        "--judge-model", "--model",
        dest="judge_model_id",
        default="o3",
        help="Judge model ID to use for evaluation (default: o3)"
    )
    
    parser.add_argument(
        "--standard", "--eval-standard",
        dest="standard",
        default="TEXT_TO_CODE",
        choices=["TEXT_TO_CODE", "CODE_COMPLETION", "CODE_TRANSLATION", "REFUSAL"],
        help="Evaluation standard to use (default: TEXT_TO_CODE)"
    )
    
    parser.add_argument(
        "--max-file-size",
        dest="max_file_size_mb",
        type=int,
        default=10,
        help="Maximum file size in MB to read (default: 10)"
    )
    
    parser.add_argument(
        "--reasoning", "--use-reasoning",
        dest="use_reasoning",
        action="store_true",
        default=True,
        help="Enable reasoning in evaluation (default: True)"
    )
    
    parser.add_argument(
        "--no-reasoning",
        dest="use_reasoning",
        action="store_false",
        help="Disable reasoning in evaluation"
    )
    
    parser.add_argument(
        "--verbose", "-v",
        action="store_true",
        help="Enable verbose output"
    )
    
    return parser.parse_args()


def get_evaluation_standard(standard_name: str) -> EvaluationStandard:
    """Convert string to EvaluationStandard enum."""
    standard_mapping = {
        "TEXT_TO_CODE": EvaluationStandard.TEXT_TO_CODE,
        "CODE_COMPLETION": EvaluationStandard.CODE_COMPLETION,
        "CODE_TRANSLATION": EvaluationStandard.CODE_TRANSLATION,
        "REFUSAL": EvaluationStandard.REFUSAL
    }
    
    return standard_mapping.get(standard_name.upper(), EvaluationStandard.TEXT_TO_CODE)


def validate_arguments(args):
    """Validate command line arguments."""
    errors = []
    
    # Check if CSV file exists
    if not os.path.exists(args.prompt_csv_path):
        errors.append(f"CSV file not found: {args.prompt_csv_path}")
    
    # Check if workspace base directory exists
    if not os.path.exists(args.workspace_base_dir):
        errors.append(f"Workspace base directory not found: {args.workspace_base_dir}")
    
    # Check if output directory is writable
    output_dir = os.path.dirname(args.output_file)
    if output_dir and not os.path.exists(output_dir):
        errors.append(f"Output directory does not exist: {output_dir}")
    elif output_dir and not os.access(output_dir, os.W_OK):
        errors.append(f"Output directory is not writable: {output_dir}")
    
    # Validate max file size
    if args.max_file_size_mb <= 0:
        errors.append("Maximum file size must be greater than 0")
    
    if errors:
        print("Validation errors:")
        for error in errors:
            print(f"  - {error}")
        return False
    
    return True


def main():
    """Main function to run agent model evaluation."""
    # Parse command line arguments
    args = parse_arguments()
    
    # Validate arguments
    if not validate_arguments(args):
        return 1
    
    # Print configuration if verbose
    if args.verbose:
        print("Configuration:")
        print(f"  CSV file: {args.prompt_csv_path}")
        print(f"  Workspace directory: {args.workspace_base_dir}")
        print(f"  Profile name: {args.profile_name}")
        print(f"  Judge model: {args.judge_model_id}")
        print(f"  Evaluation standard: {args.standard}")
        print(f"  Output file: {args.output_file}")
        print(f"  Max file size: {args.max_file_size_mb}MB")
        print(f"  Use reasoning: {args.use_reasoning}")
        print()
    
    # Load prompts from CSV
    try:
        df = pd.read_csv(args.prompt_csv_path)
        if df.empty:
            raise ValueError("CSV file is empty")
        print(f"Loaded {len(df)} prompt-response pairs from {args.prompt_csv_path}")
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return 1
    
    # Initialize workspace reader and judge model manager
    workspace_reader = WorkspaceResponseReader(args.max_file_size_mb)
    
    try:
        judge_manager = JudgeModelManager(args.judge_model_id, profile_name=args.profile_name, is_agent_model=True)
        print(f"Initialized judge model: {args.judge_model_id}")
    except Exception as e:
        print(f"Error initializing judge model: {e}")
        return 1
    
    # Process each prompt and its corresponding workspace
    evaluation_data = []
    
    for index, row in df.iterrows():
        pid = row.get("pid")
        prompt = row.get("prompt")
        
        if not pid or not prompt:
            print(f"ERROR: Missing data in row {index}")
            continue
        
        # Read workspace response
        workspace_dir = os.path.join(args.workspace_base_dir, str(pid))
        
        if not os.path.exists(workspace_dir):
            print(f"ERROR: Workspace directory not found: {workspace_dir}")
            continue
        
        try:
            response = workspace_reader.read_workspace_response(workspace_dir)
        except Exception as e:
            print(f"ERROR: Failed to read workspace {workspace_dir}: {e}")
            continue
        
        evaluation_data.append({
            "pid": pid,
            "prompt": prompt,
            "response": response
        })
    
    # Convert evaluation standard
    evaluation_standard = get_evaluation_standard(args.standard)
    
    # Evaluate responses using the judge model
    try:
        results = judge_manager.batch_evaluate(
            data=evaluation_data,
            standard=evaluation_standard,
            use_reasoning=args.use_reasoning
        )
    except Exception as e:
        print(f"Error during evaluation: {e}")
        return 1
    
    # Save results to JSON file
    try:
        save_results_to_json(results, args.output_file)
        print(f"Results saved to {args.output_file}")
    except Exception as e:
        print(f"Error saving results: {e}")
        return 1
    
    return 0


if __name__ == "__main__":
    print("Running agent model evaluation...")
    exit_code = main()
    if exit_code == 0:
        print("Evaluation completed successfully.")
    else:
        print("Evaluation failed.")
    exit(exit_code)