#!/usr/bin/env bash
set -euo pipefail

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"

: "${MODEL_NAME_OR_PATH:?set MODEL_NAME_OR_PATH to a local model path or Hugging Face model id}"
: "${PISA_ENDPOINT:?set PISA_ENDPOINT to the local verifier wrapper endpoint}"

python "${ROOT_DIR}/src/train_grpo_mvp.py" train \
  --train_jsonl "${ROOT_DIR}/dataset/sampled_train/grpo_train_sample.jsonl" \
  --model_name_or_path "${MODEL_NAME_OR_PATH}" \
  --output_dir "${ROOT_DIR}/outputs/grpo_mvp" \
  --verifier_mode pisa_http \
  --pisa_endpoint "${PISA_ENDPOINT}" \
  --difficulty_order \
  --invalid_structure_reward -0.1 \
  --placeholder_reward -0.1 \
  --num_generations 4 \
  --per_device_train_batch_size 2 \
  --gradient_accumulation_steps 4 \
  --learning_rate 5e-6
