import os
import json

from .lm_evaluation import evaluate_instruction_following, evaluate_mathematics, evaluate_multilingual
from .code_evaluation import evaluate_coding
from .safety_evaluation import evaluate_safety

def evaluate_model(model_path, task_type):
    """
    Evaluate model based on task type, each evaluation function handles conda environment switching internally
    
    Args:
        model_path: Model path
        task_type: Task type (instruction, math, coding, safety, multilingual)
    
    Returns:
        dict: Evaluation results
    """
    print(f"\n===== Evaluating model performance on {task_type} task =====")
    
    # Create output directory
    output_dir = os.path.join(os.path.dirname(model_path), f"eval_{task_type}")
    os.makedirs(output_dir, exist_ok=True)
    
    # Execute evaluation based on task type (each function has handled environment switching internally)
    if task_type == "instruction":
        results = evaluate_instruction_following(model_path, output_dir)
    elif task_type == "math":
        results = evaluate_mathematics(model_path, output_dir)
    elif task_type == "multilingual":
        results = evaluate_multilingual(model_path, output_dir)
    elif task_type == "coding":
        results = evaluate_coding(model_path, output_dir)
    elif task_type == "safety":
        results = evaluate_safety(model_path, output_dir)
    else:
        results = {"error": f"Unknown task type: {task_type}"}
    
    # Print the results part of evaluation results
    if "results" in results:
        print(f"\n{task_type} task evaluation results:")
        print(json.dumps(results["results"], indent=2, ensure_ascii=False))
    elif "error" in results:
        print(f"Evaluation error: {results['error']}")
    else:
        print("Evaluation completed, but results field not found")
    
    return results