#!/bin/bash


export TOKENIZERS_PARALLELISM=false
export CUDA_VISIBLE_DEVICES=1,2,3

MODEL_PATH="/qwen7B"
TRAIN_DATA="/phase2_train_new.json"
TEST_DATA="/phase2_test.new.json"


LEARNING_RATE=1e-4
BIAS_EXPERT_LR_MULTIPLIER=3.0


TOTAL_BATCH_SIZE=128
PER_DEVICE_BATCH_SIZE=2


torchrun --nproc_per_node=3 finetune_qwen.py \
    --model_path "${MODEL_PATH}" \
    --train_data_path "${TRAIN_DATA}" \
    --test_data_path "${TEST_DATA}" \
    --enable_joint_training \
    --learning_rate ${LEARNING_RATE} \
    --bias_expert_lr_multiplier ${BIAS_EXPERT_LR_MULTIPLIER} \
    --total_batch_size ${TOTAL_BATCH_SIZE} \
    --per_device_train_batch_size ${PER_DEVICE_BATCH_SIZE} \
    --per_device_eval_batch_size 1 \
    --datasets "prm800k_joint" \
    --server "local" \

