import os
import json
import subprocess
import tempfile
import time
import glob

def evaluate_coding(model_path, output_dir):
    """
    Evaluate coding capabilities (HumanEval+ and MBPP+)
    
    Args:
        model_path: Model path
        output_dir: Output directory
        
    Returns:
        dict: Evaluation results
    """
    print("Evaluating coding capabilities...")
    
    # Get GPU ID
    gpu_id = os.environ.get("CUDA_VISIBLE_DEVICES", "0").split(",")[0]
    
    # Get HuggingFace Token
    hf_token = os.environ.get("HF_TOKEN", "")
    
    # Cache directory
    cache_dir = os.environ.get("HF_HOME", "/tmp/huggingface")
    
    # Result file fixed name
    output_file = os.path.join(output_dir, "code_eval.json")
    
    # Create evaluation script with conda activation
    script_path = os.path.join(tempfile.gettempdir(), f"run_code_eval_{time.time()}.sh")
    with open(script_path, "w") as f:
        f.write(f"""#!/bin/bash
# Activate conda environment
source $(conda info --base)/etc/profile.d/conda.sh
conda deactivate
conda activate bigcode

# Set environment variables
export CUDA_VISIBLE_DEVICES={gpu_id}
export HF_TOKEN={hf_token}
export HF_HOME={cache_dir}
export TOKENIZERS_PARALLELISM=false

# Print environment information for verification
echo "Python path: $(which python)"
echo "Current conda environment: $(conda info --envs | grep '*')"

# Check if bigcode-evaluation-harness exists
if [ ! -d "bigcode-evaluation-harness" ]; then
    echo "Clone bigcode-evaluation-harness repository"
    git clone https://github.com/bigcode-project/bigcode-evaluation-harness.git
fi

cd bigcode-evaluation-harness

# Use accelerate for code evaluation
accelerate launch main.py \\
  --model ../{model_path} \\
  --trust_remote_code \\
  --max_length_generation 512 \\
  --precision bf16 \\
  --tasks humanevalplus,mbppplus \\
  --temperature 0.2 \\
  --n_samples 10 \\
  --batch_size 10 \\
  --allow_code_execution \\
  --metric_output_path ../{output_file} \\
  --use_auth_token

# List generated files
echo "======= List evaluation result directory contents ======="
ls -la ../{output_dir}/

# Print execution status
exit_code=$?
if [ $exit_code -eq 0 ]; then
    echo "Code evaluation completed successfully"
else
    echo "Code evaluation failed, exit code: $exit_code"
fi
exit $exit_code
""")
    
    os.chmod(script_path, 0o755)
    
    try:
        # Execute evaluation script and display output directly in current terminal
        print("\n" + "=" * 50)
        print("Starting code capability evaluation, output will be displayed directly...")
        print("=" * 50 + "\n")
        
        process = subprocess.Popen(
            script_path, 
            shell=True,
            # Do not capture output, display directly in current terminal
            stdout=None,  
            stderr=None
        )
            
        # Wait for process to complete and get return code
        process.wait()
        
        if process.returncode != 0:
            print(f"Code evaluation failed, return code: {process.returncode}")
            return {"error": f"Code evaluation failed, return code: {process.returncode}"}
        
        # Check fixed named result file
        if os.path.exists(output_file):
            print(f"Found result file: {output_file}")
            with open(output_file, "r") as f:
                results = json.load(f)
            return results
            
        # If fixed named file does not exist, look for any json file
        json_files = glob.glob(os.path.join(output_dir, "*.json"))
        if json_files:
            # Sort by modification time, take the latest
            latest_file = max(json_files, key=os.path.getmtime)
            print(f"Found JSON file: {latest_file}")
            with open(latest_file, "r") as f:
                results = json.load(f)
            return results
            
        return {"error": "Evaluation completed but result file does not exist"}
            
    except Exception as e:
        print(f"Code evaluation process error: {e}")
        import traceback
        traceback.print_exc()
        return {"error": str(e)}
    finally:
        # Clean up temporary files
        if os.path.exists(script_path):
            os.remove(script_path)