#!/usr/bin/env bash
# Improved version: iterate over test150_*.jsonl files in order and repeatedly call run_t1.sh in single-command mode

set -euo pipefail

ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
DATA_DIR="$ROOT_DIR/envs/MATH/dataset"
RUN_SCRIPT="$ROOT_DIR/scripts/run_t1.sh"

collect_policy_models() {
  local idx=1
  local models=()
  while true; do
    local var="POLICY_MODEL_${idx}_PATH"
    local value="${!var:-}"
    if [[ -z "$value" ]]; then
      if (( idx == 1 )); then
        # no env-defined models
        break
      fi
      # Stop at the first missing consecutive index
      break
    fi
    models+=("$value")
    idx=$((idx + 1))
  done

  if [[ ${#models[@]} -gt 0 ]]; then
    (IFS=,; echo "${models[*]}")
  fi
}

files=($DATA_DIR/aime_Qwen3*.jsonl)
# Default LM list (can be overridden by --lm), prefer environment variables POLICY_MODEL_*_PATH
FALLBACK_LM_LIST="/your_policy_model_path/policy_models/Qwen3-0.6B,/your_policy_model_path/policy_models/Qwen3-1.7B,/your_policy_model_path/policy_models/Qwen3-4B,/your_policy_model_path/policy_models/Qwen3-8B,/your_policy_model_path/policy_models/Qwen3-14B"
# Default RM path (can be overridden by --rm)  
FALLBACK_RM="/your_prm_model_path/Skywork-o1-Open-PRM-Qwen-2.5-1.5B"

ENV_LM_LIST="$(collect_policy_models)"
if [[ -n "$ENV_LM_LIST" ]]; then
  DEFAULT_LM_LIST="$ENV_LM_LIST"
else
  DEFAULT_LM_LIST="$FALLBACK_LM_LIST"
fi

# Default RM (can be overridden by --rm), prefer environment variable VALUE_MODEL_PATH
DEFAULT_RM="${VALUE_MODEL_PATH:-$FALLBACK_RM}"

LM_LIST="$DEFAULT_LM_LIST"
RM="$DEFAULT_RM"

if [[ -z "$ENV_LM_LIST" ]]; then
  echo "[run_all_test150_variants] Warning: POLICY_MODEL_*_PATH environment variables not detected, using built-in default LM list." >&2
fi

if [[ -z "${VALUE_MODEL_PATH:-}" ]]; then
  echo "[run_all_test150_variants] Warning: VALUE_MODEL_PATH not set, using default RM: $DEFAULT_RM" >&2
fi

usage() {
  echo "Usage: $0 [--lm <LM_COMMA_SEP>] [--rm <RM_PATH>]"
  echo "  --lm   LM list, comma-separated, passed to --LM parameter of run_t1.sh" 
  echo "  --rm   RM model path, passed to --RM parameter of run_t1.sh" 
  exit 1
}

while [[ $# -gt 0 ]]; do
  case $1 in
    --lm)
      LM_LIST="$2"
      shift 2
      ;;
    --rm)
      RM="$2"
      shift 2
      ;;
    -h|--help)
      usage
      ;;
    *)
      echo "Unknown arg: $1" >&2; usage
      ;;
  esac
done

if [ ! -d "$DATA_DIR" ]; then
  echo "Data directory does not exist: $DATA_DIR" >&2
  exit 1
fi

shopt -s nullglob
if [ ${#files[@]} -eq 0 ]; then
  echo "No test150_*.jsonl files found in $DATA_DIR" >&2
  exit 1
fi

echo "Found ${#files[@]} files, sorting by filename and running sequentially (LM=$LM_LIST, RM=$RM)..."
IFS=$'\n' sorted=($(printf "%s\n" "${files[@]}" | sort))

LOG_DIR="$ROOT_DIR/output/logs_run_all_variants"
mkdir -p "$LOG_DIR"

ERROR_PATTERN="Response status: 500"
TOTAL_RETRIES=0

for f in "${sorted[@]}"; do
  fname="$(basename "$f")"
  stem="${fname%.jsonl}"
  echo "---- Start: $fname -> task_name=$stem ----"

  LOG_FILE="$LOG_DIR/${stem}.log"
  > "$LOG_FILE"

  while true; do
    tmp_log=$(mktemp "${stem}_retry_XXXX.log")

    # Use single-command mode: pass LM list and RM to run_t1.sh
    set +e
    bash "$RUN_SCRIPT" --method beam_search --LM "$LM_LIST" --RM "$RM" --task_name "$stem" \
      2>&1 | tee "$tmp_log"
    exit_code=${PIPESTATUS[0]}
    set -e

    cat "$tmp_log" >> "$LOG_FILE"

    if grep -q "$ERROR_PATTERN" "$tmp_log"; then
      TOTAL_RETRIES=$((TOTAL_RETRIES + 1))
      echo "Detected 500 error, preparing to retry (task=$stem, total retries=$TOTAL_RETRIES)" | tee -a "$LOG_FILE"
      LOCK_ROOT="$ROOT_DIR/output/${stem}_beam_search"
      if [[ -d "$LOCK_ROOT" ]]; then
        while IFS= read -r -d '' lock_path; do
          echo "Cleaning lock directory: $lock_path" | tee -a "$LOG_FILE"
          rm -rf "$lock_path"
        done < <(find "$LOCK_ROOT" -type d -name "lock_dir" -print0)
      fi
      rm -f "$tmp_log"
      sleep 2
      continue
    fi

    rm -f "$tmp_log"

    if [[ $exit_code -ne 0 ]]; then
      echo "Task $stem failed (exit=$exit_code), exiting script." | tee -a "$LOG_FILE"
      exit $exit_code
    fi

    break
  done

  echo "---- Completed: $fname (log: $LOG_FILE) ----"
  sleep 1
done

echo "All tasks completed."
echo "Total retries: $TOTAL_RETRIES"