#!/bin/bash

# Define project and training mode
# MODE matches the 'Dynamic Fine-Tuning' (DFT) described in the paper
project_name="numina-cot"
MODE="dft"

experiment_name="numina-cot-${MODE}_1.5B_style_aligned"

# Standardize output directory
save_path="checkpoints/$experiment_name"

# Create checkpoint directory if it doesn't exist
mkdir -p $save_path

echo "Starting training for experiment: $experiment_name"
echo "Saving checkpoints to: $save_path"

torchrun --standalone --nnodes=1 --nproc_per_node=gpu \
    -m verl.trainer.fsdp_${MODE}_trainer \
    data.train_files='data/train.parquet' \
    data.val_files='data/math500/test.parquet' \
    data.prompt_key='extra_info' \
    data.response_key='extra_info' \
    data.train_batch_size=256 \
    data.max_length=2048 \
    optim.lr=5e-5 \
    data.prompt_dict_keys="['question']" \
    data.response_dict_keys="['answer']" \
    data.micro_batch_size_per_gpu=2 \ 
    model.partial_pretrain='Qwen/Qwen2.5-Math-1.5B' \
    model.use_liger=True \
    model.fsdp_config.model_dtype='bf16' \
    trainer.default_local_dir=$save_path \
    trainer.project_name=$project_name \
    trainer.experiment_name="$experiment_name-$(date +%Y%m%d-%H%M%S)" \
    trainer.logger="['console']" \
    trainer.default_hdfs_dir=null \
    trainer.test_freq=5 \
    trainer.save_freq=1 \
    trainer.total_epochs=1 \
    ulysses_sequence_parallel_size=1 \
    use_remove_padding=true

# Note on logger: Add 'wandb' to trainer.logger list if Weights & Biases tracking is required.
# Ensure WANDB_API_KEY is set in the environment.