set -x

# export VLLM_ATTENTION_BACKEND=XFORMERS

MODEL_PATH=$1
OUTPUT_DIR=$2
DATA_TYPE=$3

N=32
temperature=0.6

# Echo the values for verification
echo "Model Path: ${MODEL_PATH}"
echo "Output Directory: ${OUTPUT_DIR}"
echo "Dataset: ${DATA_TYPE}"

can_temperature=0.6
n_candidates=$4 
candidate_turn=$((16 / n_candidates))
can_N=$((16 / candidate_turn)) 
prompt_length=$((1536 * n_candidates))

python3 -m verl.trainer.main_generation \
    trainer.nnodes=1 \
    trainer.n_gpus_per_node=4 \
    data.path=./hdfs_data/${DATA_TYPE}.parquet \
    data.output_path=${OUTPUT_DIR} \
    +data.candidate_path=${OUTPUT_DIR}/${DATA_TYPE}/n${N}_t${temperature}.jsonl \
    data.n_samples=${can_N} \
    data.batch_size=102400 \
    +data.do_metrics=True \
    model.path=${MODEL_PATH} \
    rollout.temperature=${can_temperature} \
    rollout.prompt_length=${prompt_length} \
    rollout.response_length=32768 \
    +rollout.n_candidates=${n_candidates} \
    +rollout.candidate_turn=${candidate_turn} \
    +rollout.no_solution_discard=True \
    rollout.top_k=-1 \
    rollout.top_p=0.95 \
    rollout.gpu_memory_utilization=0.9 \
    rollout.tensor_model_parallel_size=1 
    
