import os
import json
import subprocess
import tempfile
import time
import glob

def run_lm_eval(model_path, tasks, output_dir, batch_size=8):
    """
    Run lm-evaluation-harness evaluation
    
    Args:
        model_path: Model path
        tasks: Task list string
        output_dir: Output directory
        batch_size: Batch size
    
    Returns:
        dict: Evaluation results
    """
    print(f"Evaluating model using lm-evaluation-harness: {model_path}")
    print(f"Tasks: {tasks}")
    
    # Get GPU ID
    gpu_id = os.environ.get("CUDA_VISIBLE_DEVICES", "0").split(",")[0]
    
    # Get HuggingFace Token
    hf_token = os.environ.get("HF_TOKEN", "")
    
    # Cache directory
    cache_dir = os.environ.get("HF_HOME", "/tmp/huggingface")
    
    # Set result file to a fixed name, do not use lm_eval's automatic naming
    output_file = os.path.join(output_dir, "fixed_results.json")
    
    # Create evaluation script with conda activation
    script_path = os.path.join(tempfile.gettempdir(), f"run_lm_eval_{time.time()}.sh")
    with open(script_path, "w") as f:
        f.write(f"""#!/bin/bash
# Activate conda environment
source $(conda info --base)/etc/profile.d/conda.sh
conda deactivate
conda activate lmeval

# Set environment variables
export CUDA_VISIBLE_DEVICES={gpu_id}
export HF_TOKEN={hf_token}
export HF_HOME={cache_dir}
export PYTHONIOENCODING=utf-8
export TOKENIZERS_PARALLELISM=false

# Print environment information for verification
echo "Python path: $(which python)"
echo "Current conda environment: $(conda info --envs | grep '*')"
echo "lm_eval path: $(which lm_eval)"

# Run evaluation
lm_eval --model hf \\
    --model_args pretrained={model_path},cache_dir={cache_dir},token={hf_token} \\
    --tasks {tasks} \\
    --device cuda:{gpu_id} \\
    --batch_size {batch_size} \\
    --output_path {output_file}

# List generated files
echo "======= List evaluation result directory contents ======="
ls -la {output_dir}/

# Print execution status
exit_code=$?
if [ $exit_code -eq 0 ]; then
    echo "Evaluation completed successfully"
else
    echo "Evaluation failed, exit code: $exit_code"
fi
exit $exit_code
""")
    
    os.chmod(script_path, 0o755)
    
    try:
        # Execute evaluation script and output results in real-time
        process = subprocess.Popen(
            script_path, 
            shell=True,
            # Do not capture output, display directly in current terminal
            stdout=None,  
            stderr=None
        )
            
        # Wait for process to complete and get return code
        process.wait()
        
        if process.returncode != 0:
            print(f"Evaluation failed, return code: {process.returncode}")
            return {"error": f"Evaluation failed, return code: {process.returncode}"}
            
        # First check fixed named result file
        if os.path.exists(output_file):
            with open(output_file, "r") as f:
                results = json.load(f)
            return results
            
        # If fixed named file does not exist, look for timestamped result files
        results_files = glob.glob(os.path.join(output_dir, "results_*.json"))
        if results_files:
            # Sort by modification time, take the latest
            latest_file = max(results_files, key=os.path.getmtime)
            print(f"Found result file: {latest_file}")
            with open(latest_file, "r") as f:
                results = json.load(f)
            return results
        
        # Try to find any json file
        json_files = glob.glob(os.path.join(output_dir, "*.json"))
        if json_files:
            # Sort by modification time, take the latest
            latest_file = max(json_files, key=os.path.getmtime)
            print(f"Found JSON file: {latest_file}")
            with open(latest_file, "r") as f:
                results = json.load(f)
            return results
            
        return {"error": "Evaluation completed but result file does not exist"}
            
    except Exception as e:
        print(f"Evaluation process error: {e}")
        import traceback
        traceback.print_exc()
        return {"error": str(e)}
    finally:
        # Clean up temporary files
        if os.path.exists(script_path):
            os.remove(script_path)

def evaluate_instruction_following(model_path, output_dir):
    """Evaluate instruction following capability"""
    print("Evaluating instruction following capability...")
    return run_lm_eval(model_path, "ifeval", output_dir, batch_size=8)

def evaluate_mathematics(model_path, output_dir):
    """Evaluate mathematics reasoning capability"""
    print("Evaluating mathematics reasoning capability...")
    return run_lm_eval(model_path, "gsm8k_cot", output_dir, batch_size=8)

def evaluate_multilingual(model_path, output_dir):
    """Evaluate multilingual capability"""
    print("Evaluating multilingual capability...")
    return run_lm_eval(
        model_path, 
        "m_mmlu_fr,m_mmlu_es,m_mmlu_de,m_mmlu_ru", 
        output_dir,
        batch_size=4
    )