# TODO
set -x
export VLLM_USE_MODELSCOPE=0
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_ATTENTION_BACKEND=XFORMERS

task_name="searchqa"

cd AgentGym-RL
source activate
conda activate agentgym-rl
export VLLM_ATTENTION_BACKEND=XFORMERS

env_server_url="http://127.0.0.1:36005"

sample_num=1
max_rounds=30

ckpt_path="global_step_150/actor"
model_path=${ckpt_path}/huggingface

cd AgentGym-RL/scripts
python model_merger.py \
    --local_dir ${ckpt_path}

HYDRA_FULL_ERROR=1 python3 -m verl.agent_trainer.main_generation  \
    data.path=AgentEval/${task_name} \
    data.max_prompt_length=750 \
    data.max_response_length=14098 \
    data.n_samples=${sample_num} \
    data.batch_size=32 \
    agentgym.task_name=${task_name} \
    agentgym.env_addr=${env_server_url} \
    agentgym.max_rounds=${max_rounds} \
    agentgym.timeout=500 \
    model.path=${model_path} \
    rollout.gpu_memory_utilization=0.95 \
    rollout.temperature=1 \
    rollout.max_model_len=32768 \
    rollout.max_tokens=512 \
    rollout.tensor_model_parallel_size=1 \
    rollout.rollout_log_dir=executer_logs
status=$?
exit $status