# This repository is for DECS

## Prepare the Training Framework
Download and installing veRL 0.2.0dev0.

## Train 
Run the following script to begin the training

```bash 
# MODEL_NAME=qwen34b
# MODEL_NAME=llama3_8b_medical
# MODEL_NAME=r1_distill_qwen32b
# MODEL_NAME=phi4_mini
MODEL_NAME=deepscaler_1.5b
# MODEL_NAME=r1_distill_qwen1.5b
# MODEL_NAME=r1_distill_qwen7b
# MODEL_NAME=qwen2.5_math_7b_base
# MODEL_NAME=medsins
# 声明一个关联数组来存储映射
declare -A model_mapping

# 添加 MODEL_NAME 到 ckpt 的映射
model_mapping=(
    ["llama3.2"]="Llama-3.2-3B-Instruct"
    ["llama3_8b_medical"]="Meta-Llama-3-8B-Instruct"
    ["llama3.1_8b"]="Meta-Llama-3.1-8B-Instruct"
    ["qwen2.5_3b"]="Qwen2.5-3B-Instruct"
    ["qwen2.5_math_7b_base"]="Qwen2.5-Math-7B"
    ["qwen2.5_3b_base"]="Qwen2.5-3B"
    ["qwen2_7b"]="Qwen2-7B-Instruct"
    ["r1_distill_qwen1.5b"]="DeepSeek-R1-Distill-Qwen-1.5B"
    ["r1_distill_qwen7b"]="DeepSeek-R1-Distill-Qwen-7B"
    ["r1_distill_llama8b"]="DeepSeek-R1-Distill-Llama-8B"
    ["r1_distill_qwen32b"]="DeepSeek-R1-Distill-Qwen-32B"
    ["deepscaler_1.5b"]="DeepScaleR-1.5B-Preview"
    ["phi4_mini"]="Phi-4-mini-reasoning"
    ["medsins"]="MMedS-Llama3-3-8B"
    ["qwen34b"]="Qwen3-4B"
)


# 初始化 ckpt 变量
ckpt=""

# 遍历映射，检查 MODEL_NAME 是否包含某个键
for key in "${!model_mapping[@]}"; do
    if [[ $MODEL_NAME == "$key" ]]; then
        ckpt=${model_mapping[$key]}
        break
    fi
done

MODEL_PATH=/path/to/ckpt/${ckpt}

data_name="deepscaler"

adv=grpo_proc_length

save_name=${MODEL_NAME}_${adv}_a001_b001_c001_n16_nozeroadv_fm0_invert_dapo_adp_lr02


other_configs="data.is_base=False trainer.total_epochs=3 actor_rollout_ref.actor.clip_ratio_high=0.2 trainer.val_before_train=False data.max_response_length=16384 data.prompt_type=default actor_rollout_ref.rollout.max_num_batched_tokens=32768 reward_model.reward_manager=chunk reward_model.format_score=0 "
# train 4card 
# replace space to underscore in data_name
if [[ $data_name == *" "* ]]; then
    IFS=' ' read -ra ADDR <<<"$data_name"
    save_data_name=${ADDR[0]} 
    for i in "${ADDR[@]:1}"; do
        save_data_name="${save_data_name}_${i}"
    done
else 
    save_data_name=${data_name}
    ADDR=($data_name)
fi

log_path=logs/${save_data_name}/${save_name}
mkdir -p ${log_path}


rollout_n=16
dynamic_configs="chunk_config.enable=True chunk_config.judge_model=/path/to/judge/model chunk_config.judge_url=10.140.54.16:10047 chunk_config.ori_adv_factor=0 chunk_config.only_entropy_token=False chunk_config.only_minus_entropy_token=False"


remote_split_configs=" chunk_config.use_entropy=False chunk_config.alpha=0.001 chunk_config.beta=0.001 chunk_config.gamma=0.001  chunk_config.filter_by_entropy=False chunk_config.only_right=True chunk_config.high_entropy_quantile=0.8 chunk_config.no_zero_adv=True chunk_config.as_return=False chunk_config.reward_func=invert"



dapo_configs=" algorithm.filter_groups.enable=True algorithm.filter_groups.max_num_gen_batches=10 algorithm.filter_groups.enable_kappa=True algorithm.filter_groups.kappa_lr=0.2"
sbatch -o $log_path/train.log train_rl_chunk.sh "${data_name}" $adv $MODEL_PATH $save_name ${rollout_n} "${other_configs}" "${dynamic_configs}" "${remote_split_configs}" "${dapo_configs}"
```