#!/usr/bin/env bash
# GRPO + LoRA fine-tuning: SmolLM2-1.7B-Instruct as listener (RLVR future work)
# Requires: pip install transformers peft accelerate
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(dirname "$SCRIPT_DIR")"

cd "$REPO_ROOT"

source "$REPO_ROOT/../p311TheRockLM_venv/bin/activate"

python run_grpo.py \
    --config configs/grpo/smollm.yaml \
    --role listener \
    --counterpart rule-based \
    --seed 0
