set -x

# export VLLM_ATTENTION_BACKEND=XFORMERS
MODEL_PATH=$1
OUTPUT_DIR=$2
DATA_TYPE=$3

N=16
# N=32
temperature=0.6

# Echo the values for verification
echo "Model Path: ${MODEL_PATH}"
echo "Output Directory: ${OUTPUT_DIR}"
echo "Dataset: ${DATA_TYPE}"


can_N=4
can_temperature=0.6
n_candidates=4 
# candidate_turn=8 
candidate_turn=4

export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1

python3 -m verl.trainer.cot_synthesize_reward \
    trainer.nnodes=1 \
    trainer.n_gpus_per_node=2 \
    data.path=./hdfs_data/${DATA_TYPE}.parquet \
    data.output_path=${OUTPUT_DIR} \
    +data.candidate_path=${OUTPUT_DIR}/${DATA_TYPE}/n${N}_t${temperature}.jsonl \
    data.n_samples=${can_N} \
    data.batch_size=102400 \
    +data.do_metrics=True \
    model.path=${MODEL_PATH} \
    rollout.temperature=${can_temperature} \
    rollout.prompt_length=6172 \
    rollout.response_length=4096 \
    +rollout.n_candidates=${n_candidates} \
    +rollout.candidate_turn=${candidate_turn} \
    +rollout.no_solution_discard=True \
    rollout.top_k=-1 \
    rollout.top_p=0.95 \
    rollout.gpu_memory_utilization=0.9 \
    rollout.tensor_model_parallel_size=1 
    