#!/bin/bash
# Reproduce TMPC HH-RLHF results (Table 1 in the paper).
# All 1024 prompts, 3 TMPC iterations, buffer size 3.
set -e

OUTPUT_DIR="results/hhrlhf"
mkdir -p "$OUTPUT_DIR"

echo "=============================="
echo " HH-RLHF: TMPC Generation"
echo "=============================="
python run_hhrlhf.py \
    --input_file hhrlhf.csv \
    --output_folder "$OUTPUT_DIR/generation" \
    --rm_path rl-bandits-lab/hhrlhf_rm \
    --max_iterations 3 \
    --buffer_size 3 \
    --threshold 4 \
    --cuda_num 0 \
    --start 0 \
    --end 1024

echo "=============================="
echo " HH-RLHF: Evaluation"
echo "=============================="
python run_hhrlhf.py \
    --evaluate \
    --eval_input_folder "$OUTPUT_DIR/generation" \
    --eval_it 3 \
    --eval_range 1024 \
    --eval_output_file "$OUTPUT_DIR/eval_results.csv"

echo "HH-RLHF experiment complete. Results saved to $OUTPUT_DIR/"
