#!/bin/bash

# 脚本用于生成、测试和评估 Dyck Language Errors 游戏数据

# 默认参数
NUM_OF_DATA=100
N_TYPES=3
TOTAL_LENGTH=20
MAX_ATTEMPTS=1000
GENERATE_ALL=false

# 解析命令行参数
while [[ $# -gt 0 ]]; do
  case "$1" in
    --num_of_data)
      NUM_OF_DATA="$2"
      shift 2
      ;;
    --n_types)
      N_TYPES="$2"
      shift 2
      ;;
    --total_length)
      TOTAL_LENGTH="$2"
      shift 2
      ;;
    --max_attempts)
      MAX_ATTEMPTS="$2"
      shift 2
      ;;
    --generate_all)
      GENERATE_ALL=true
      shift
      ;;
    *)
      echo "未知参数: $1"
      exit 1
      ;;
  esac
done

# 设置工作目录为项目根目录
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
PROJECT_ROOT="$(readlink -f "${SCRIPT_DIR}/../../..")"
cd "${PROJECT_ROOT}"

echo "Running Dyck Language Errors game generation and testing..."

# 创建输出目录
OUTPUT_DIR="${SCRIPT_DIR}/data"
mkdir -p "${OUTPUT_DIR}"

# 运行单元测试
echo "=== Running unit tests ==="
echo "1. Testing generate method..."
python -m games.tasks.dyck_language_errors.tests.test_generate
echo "2. Testing verify method..."
python -m games.tasks.dyck_language_errors.tests.test_verify
echo "3. Testing extract answer method..."
python -m games.tasks.dyck_language_errors.tests.test_extract_answer

# 函数：生成指定参数的数据
generate_data() {
  local num=$1
  local types=$2
  local length=$3
  local attempts=$4
  
  echo "=== Generating game data ==="
  echo "Generating ${num} game data, number of types: ${types}, sequence length: ${length}..."
  
  python -m games.tasks.dyck_language_errors.scripts.dyck_language_errors \
    --num_of_data "${num}" \
    --n_types "${types}" \
    --total_length "${length}" \
    --max_attempts "${attempts}"
    
  local nested_dir="${OUTPUT_DIR}/num_of_data_${num}/n_types_${types}/total_length_${length}"
  local nested_file="${nested_dir}/dyck_language_errors_${num}.jsonl"
  
  if [ -f "${nested_file}" ]; then
    echo "Game data generated: ${nested_file}"
  else
    echo "Error: Game data file not generated (n_types=${types}, total_length=${length})"
  fi
}

if [ "$GENERATE_ALL" = true ]; then
  # 生成指定的多组超参数数据，每组1000条
  echo "Generating dataset with multiple sets of hyperparameters, each set contains 1000 data..."
  
  # 组合1: n_types=3, total_length=10
  generate_data 1000 3 10 "${MAX_ATTEMPTS}"
  
  # 组合2: n_types=3, total_length=15
  generate_data 1000 3 15 "${MAX_ATTEMPTS}"
  
  # 组合3: n_types=4, total_length=20
  generate_data 1000 4 20 "${MAX_ATTEMPTS}"
  
  # 组合4: n_types=4, total_length=30
  generate_data 1000 4 30 "${MAX_ATTEMPTS}"
  
  echo "All dataset generation completed"
else
  # 生成单组参数的数据
  echo "Parameters: num_of_data=${NUM_OF_DATA}, n_types=${N_TYPES}, total_length=${TOTAL_LENGTH}, max_attempts=${MAX_ATTEMPTS}"
  generate_data "${NUM_OF_DATA}" "${N_TYPES}" "${TOTAL_LENGTH}" "${MAX_ATTEMPTS}"
  
  # 输出文件路径
  DATA_FILE="${OUTPUT_DIR}/dyck_language_errors_${NUM_OF_DATA}.jsonl"
  
  # 检查文件是否存在并输出提示
  if [ -f "${DATA_FILE}" ]; then
    echo "Game data generated: ${DATA_FILE}"
    echo "To check the pass rate, please use the following command:"
    echo "python -m games.tasks.dyck_language_errors.eval.check_passrate --game_data_path ${DATA_FILE}"
  fi
fi

echo "=== Completed ===" 