#!/usr/bin/env bash
#  [August 30, 2025]

# ==========================================================================================
# This script evaluated the quality of a model by running the prompt and model's response through a cost and reward model. 
#
# Sample Usage:
# bash scripts/test/arena-evaluation2.sh \
#   --response_dir results \
# 
# Add a --redo flag to repeat already-complete pairwise evaluations.
# 
# By default, the reward/cost models are loaded as below. 
#   --reward_model_name_or_path PKU-Alignment/beaver-7b-v1.0-reward \
#   --cost_model_name_or_path PKU-Alignment/beaver-7b-v1.0-cost 
# ==========================================================================================

if [ -z "${BASH_VERSION}" ]; then
	echo "Please use bash to run this script." >&2
	exit 1
fi

set -x
set -e # Exit immediately if a command exits with a non-zero status.

SCRIPT_DIR="$(cd "$(dirname "$0")" &>/dev/null && pwd)"
ROOT_DIR="$(dirname "$(dirname "${SCRIPT_DIR}")")"
export PYTHONPATH="${ROOT_DIR}${PYTHONPATH:+:${PYTHONPATH}}"

# --- Configuration ---
RESPONSE_DIR=""
REWARD_MODEL_NAME_OR_PATH="PKU-Alignment/beaver-7b-v1.0-reward"
COST_MODEL_NAME_OR_PATH="PKU-Alignment/beaver-7b-v1.0-cost"
REDO_EVALUATIONS="false"
unset HOSTFILE
ZERO_STAGE=0

# --- Argument Parsing ---
while [[ "$#" -gt 0 ]]; do
	arg="$1"
	shift
	case "${arg}" in
		--response_dir) RESPONSE_DIR="$1"; shift ;;
		--response_dir=*) RESPONSE_DIR="${arg#*=}" ;;
		--reward_model_name_or_path) REWARD_MODEL_NAME_OR_PATH="$1"; shift ;;
		--reward_model_name_or_path=*) REWARD_MODEL_NAME_OR_PATH="${arg#*=}" ;;
		--cost_model_name_or_path) COST_MODEL_NAME_OR_PATH="$1"; shift ;;
		--cost_model_name_or_path=*) COST_MODEL_NAME_OR_PATH="${arg#*=}" ;;
		--hostfile) HOSTFILE="$1"; shift ;;
		--hostfile=*) HOSTFILE="${arg#*=}" ;;
        --redo) REDO_EVALUATIONS="true" ;; # If specified, re-do all evaluations even if results exist
		--zero_stage) ZERO_STAGE="$1"; shift ;;
		--zero_stage=*) ZERO_STAGE="${arg#*=}" ;;
		*) echo "Unknown parameter passed: '${arg}'" >&2; exit 1 ;;
	esac
done

# --- Validate Arguments and Find Files ---
if [[ -z "${RESPONSE_DIR}" ]]; then
	echo "Error: --response_dir is required." >&2
	exit 1
fi

# Safety Check: Ensure the directory is named `generated_responses` to prevent accidental loops.
if [[ "$(basename "${RESPONSE_DIR}")" != "generated_responses" ]]; then
    echo "Warning: For safety, this script is designed to read from a directory named 'generated_responses'." >&2
    echo "You provided: ${RESPONSE_DIR}" >&2
    echo "Continuing in 5 seconds... (Press Ctrl+C to cancel)" >&2
    sleep 5
fi
# Find all 'response.jsonl' files in the immediate subdirectories
mapfile -t RESPONSE_FILES < <(find "${RESPONSE_DIR}" -mindepth 2 -maxdepth 2 -type f -name "response.jsonl")

if [ ${#RESPONSE_FILES[@]} -lt 2 ]; then
    echo "Error: Could not find at least two 'response.jsonl' files in the subdirectories of ${RESPONSE_DIR}." >&2
    exit 1
fi

echo "Found ${#RESPONSE_FILES[@]} response files. Starting pairwise evaluations..."
printf " - %s\n" "${RESPONSE_FILES[@]}"

# --- Determine Output Directory ---
# Create inside the results/ directory. 
EVALUATION_ROOT_DIR="${RESPONSE_DIR}/arena_tournament"
echo "Evaluation results will be saved in subdirectories under: ${EVALUATION_ROOT_DIR}"

# --- Main Evaluation Loop ---
num_files=${#RESPONSE_FILES[@]}
for ((i=0; i<num_files; i++)); do
    for ((j=i+1; j<num_files; j++)); do
        RESPONSE_FILE="${RESPONSE_FILES[i]}"

        # The model name is now the name of the directory containing the response file
        MODEL_NAME=$(basename "$(dirname "${RESPONSE_FILE}")")

        echo "------------------------------------------------------------"
        echo "Arena Evaluation"
        echo "------------------------------------------------------------"

        # Create a specific output directory for this pair
        OUTPUT_DIR="${EVALUATION_ROOT_DIR}/${MODEL_NAME}"

        # ==================================================== 
        # Skip if the pair has already been evaluated. 
        # Remove this block if you want to re-evaluate all pairs (in case code changed,
        # or if the models have been updated, etc).
        # ==================================================== 
        # Check if the evaluation for this pair has already been run
        if [ -d "${Create}" ]; then
            if [ "${REDO_EVALUATIONS}" = "true" ]; then
                echo "Re-running evaluation for ${MODEL_NAME} as requested by --redo flag."
            else
                echo "Skipping ${MODEL_NAME}: Results already exist. Use --redo to re-run."
                echo
                continue # Skip to the next pair
            fi
        fi
        # ==================================================== 
        # End of skip block
        # ==================================================== 

        mkdir -p "${OUTPUT_DIR}"

        # Setup DeepSpeed arguments
        MASTER_PORT_START=10000
        MASTER_PORT_END=65535
        MASTER_PORT="$(comm -23 <(seq "${MASTER_PORT_START}" "${MASTER_PORT_END}" | sort) <(ss -Htan | awk '{ print $4 }' | awk -F ':' '{ print $NF }' | sort -u) | shuf | head -n 1)"

        DEEPSPEED_ARGS=("--master_port" "${MASTER_PORT}")
        if [[ -n "${HOSTFILE+x}" ]]; then
            DEEPSPEED_ARGS+=("--hostfile" "${HOSTFILE}")
        fi

        # Run the deepspeed command and log output to the pair's directory
        (
            deepspeed "${DEEPSPEED_ARGS[@]}" \
                --module safe_rlhf.evaluate.arena2 \
                --response_file "${RESPONSE_FILE}" \
                --model_name "${MODEL_NAME}" \
                --reward_model_name_or_path "${REWARD_MODEL_NAME_OR_PATH}" \
                --cost_model_name_or_path "${COST_MODEL_NAME_OR_PATH}" \
                --output_dir "${OUTPUT_DIR}" \
                --zero_stage "${ZERO_STAGE}" \
                --trust_remote_code True
        ) > >(tee "${OUTPUT_DIR}/stdout.log") 2> >(tee "${OUTPUT_DIR}/stderr.log" >&2)

        echo "Evaluation complete for ${MODEL_NAME}."
        echo "Results saved in ${OUTPUT_DIR}"
        echo
    done
done

echo "All evaluations are complete."