set -x

# export VLLM_ATTENTION_BACKEND=XFORMERS

MODEL_PATH=$1
OUTPUT_DIR=$2
DATA_TYPE=$3

# N=32
N=16
temperature=0.6

# Echo the values for verification
echo "Model Path: ${MODEL_PATH}"
echo "Output Directory: ${OUTPUT_DIR}"
echo "Dataset: ${DATA_TYPE}"

python3 -m verl.trainer.main_generation \
    trainer.nnodes=1 \
    trainer.n_gpus_per_node=8 \
    data.path=./hdfs_data/${DATA_TYPE}.parquet \
    data.output_path=${OUTPUT_DIR} \
    data.n_samples=${N} \
    data.batch_size=102400 \
    +data.do_metrics=True \
    model.path=${MODEL_PATH} \
    rollout.temperature=${temperature} \
    rollout.prompt_length=1024 \
    rollout.response_length=31744 \
    rollout.top_k=-1 \
    rollout.top_p=0.95 \
    rollout.gpu_memory_utilization=0.9 \
    rollout.tensor_model_parallel_size=1 

can_N=4
can_temperature=0.6
n_candidates=4 
# candidate_turn=8 
candidate_turn=4

python3 -m verl.trainer.main_generation \
    trainer.nnodes=1 \
    trainer.n_gpus_per_node=8 \
    data.path=./hdfs_data/${DATA_TYPE}.parquet \
    data.output_path=${OUTPUT_DIR} \
    +data.candidate_path=${OUTPUT_DIR}/${DATA_TYPE}/n${N}_t${temperature}.jsonl \
    data.n_samples=${can_N} \
    data.batch_size=102400 \
    +data.do_metrics=True \
    model.path=${MODEL_PATH} \
    rollout.temperature=${can_temperature} \
    rollout.prompt_length=6144 \
    rollout.response_length=32768 \
    +rollout.n_candidates=${n_candidates} \
    +rollout.candidate_turn=${candidate_turn} \
    +rollout.no_solution_discard=True \
    rollout.top_k=-1 \
    rollout.top_p=0.95 \
    rollout.gpu_memory_utilization=0.9 \
    rollout.tensor_model_parallel_size=1
    
