#!/bin/bash

# Example: run code generation evaluation on BigCodeBench test
# (make sure BigCodeBench is registered via examples/bugs/data_processing/prepare_bigcodebench_data.py)

set -euo pipefail

python3 -m examples.bugs.run_codegen_flow \
  --dataset bigcodebench \
  --split all \
  --n_parallel 64 \
  --model Qwen/Qwen3-4B \
  --base_url http://localhost:30000/v1 \
  --max_prompt_length 8192 \
  --max_response_length 8192 \
  --temperature 0.6 \
  --top_p 0.95 \
  --save_results \
  --output_dir logs \
  --print_samples 3
