import os
import json
import subprocess
import tempfile
import time
import glob

def evaluate_safety(model_path, output_dir):
    """
    Evaluate safety capabilities (WildGuardTest, HarmBench, etc.)
    
    Args:
        model_path: Model path
        output_dir: Output directory
        
    Returns:
        dict: Evaluation results
    """
    print("Evaluating safety capabilities...")

    # Get GPU ID
    gpu_id = os.environ.get("CUDA_VISIBLE_DEVICES", "0").split(",")[0]
    
    # Get HuggingFace Token
    hf_token = os.environ.get("HF_TOKEN", "")
    
    # Cache directory
    cache_dir = os.environ.get("HF_HOME", "/tmp/huggingface")
    
    # Result file fixed name
    output_file = os.path.join(output_dir, "safety_eval.json")
    generations_file = os.path.join(output_dir, "safety_generation.json")
    
    # Create evaluation script with conda activation
    script_path = os.path.join(tempfile.gettempdir(), f"run_safety_eval_{time.time()}.sh")
    with open(script_path, "w") as f:
        f.write(f"""#!/bin/bash
# Activate conda environment
source $(conda info --base)/etc/profile.d/conda.sh
conda deactivate
conda activate safety-eval

# Set environment variables
export CUDA_VISIBLE_DEVICES={gpu_id}
export HF_TOKEN={hf_token}
export HF_HOME={cache_dir}
export OPENAI_API_KEY='' # Virtual OpenAI key, not actually used

# Print environment information for verification
echo "Python path: $(which python)"
echo "Current conda environment: $(conda info --envs | grep '*')"

# Check if safety-eval-fork exists
if [ ! -d "safety-eval-fork" ]; then
    echo "Clone safety-eval repository"
    git clone https://github.com/lm-sys/safety-eval safety-eval-fork
    cd safety-eval-fork
    pip install -e .
    cd ..
else
    echo "safety-eval-fork directory already exists"
fi

cd safety-eval-fork

# Execute safety evaluation
python evaluation/eval.py generators \\
  --model_name_or_path ../{model_path} \\
  --use_vllm \\
  --model_input_template_path_or_name llama3 \\
  --tasks wildguardtest,harmbench,do_anything_now,xstest \\
  --report_output_path ../{output_file} \\
  --save_individual_results_path {generations_file} \\
  --batch_size 8

# List generated files
echo "======= List evaluation result directory contents ======="
ls -la {output_dir}/

# Print execution status
exit_code=$?
if [ $exit_code -eq 0 ]; then
    echo "Safety evaluation completed successfully"
else
    echo "Safety evaluation failed, exit code: $exit_code"
fi
exit $exit_code
""")
    
    os.chmod(script_path, 0o755)
    
    try:
        # Execute evaluation script and display output directly in current terminal
        print("\n" + "=" * 50)
        print("Starting safety capability evaluation, output will be displayed directly...")
        print("=" * 50 + "\n")
        
        process = subprocess.Popen(
            script_path, 
            shell=True,
            # Do not capture output, display directly in current terminal
            stdout=None,  
            stderr=None
        )
            
        # Wait for process to complete and get return code
        process.wait()
        
        if process.returncode != 0:
            print(f"Safety evaluation failed, return code: {process.returncode}")
            return {"error": f"Safety evaluation failed, return code: {process.returncode}"}
        
        # Check fixed named result file
        if os.path.exists(output_file):
            print(f"Found result file: {output_file}")
            with open(output_file, "r") as f:
                results = json.load(f)
            return results
            
        # If fixed named file does not exist, look for any json file
        json_files = glob.glob(os.path.join(output_dir, "*.json"))
        if json_files:
            # Sort by modification time, take the latest
            latest_file = max(json_files, key=os.path.getmtime)
            print(f"Found JSON file: {latest_file}")
            with open(latest_file, "r") as f:
                results = json.load(f)
            return results
            
        return {"error": "Evaluation completed but result file does not exist"}
            
    except Exception as e:
        print(f"Safety evaluation process error: {e}")
        import traceback
        traceback.print_exc()
        return {"error": str(e)}
    finally:
        # Clean up temporary files
        if os.path.exists(script_path):
            os.remove(script_path)