export STORAGE_PATH="test"
export VLLM_DISABLE_COMPILE_CACHE=1

# ------------------iter3-------------------
export experiment_name="Qwen3-8B-Base_solver_v3"
export questioner_model_path=''
export solver_model_path=''
export memory_path=''

echo 'start generate question'
bash question_generate/question_generate_with_memory.bash $questioner_model_path 1000 $experiment_name $memory_path
echo 'start evaluate generated question'
bash question_evaluate/evaluate.sh $solver_model_path $experiment_name
echo 'start upload'
python question_evaluate/upload_local.py --repo_name ${experiment_name} --max_score 0.6 --min_score 0.3 --experiment_name ${experiment_name}


export rollout_data_dir=${STORAGE_PATH}/${experiment_name}
mkdir -p ${rollout_data_dir}
export train_set='train.json'

CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main \
    config=examples/config.yaml \
    data.max_response_length=4096 \
    worker.actor.model.model_path=$solver_model_path \
    worker.actor.global_batch_size=32\
    trainer.experiment_name=${experiment_name} \
    trainer.save_checkpoint_path=${STORAGE_PATH}/models/${experiment_name}/ \
    data.train_files=$train_set \
    data.val_files=val.json \
    data.rollout_batch_size=32 \
    data.val_batch_size=2 \
    trainer.total_epochs=1 \
    trainer.rollout_data_dir=$rollout_data_dir \
    data.format_prompt=./examples/format_prompt/solver.jinja \
    trainer.val_freq=4 \
    worker.rollout.n=8 \
    trainer.n_gpus_per_node=8 \
    worker.actor.micro_batch_size_per_device_for_update=8 \
    worker.actor.micro_batch_size_per_device_for_experience=8 \
    trainer.val_freq=-1 \
    trainer.save_freq=10

echo "merging model"
python scripts/model_merger.py --local_dir ${STORAGE_PATH}/models/${experiment_name}/global_step_45/actor
