cd /.../.../.../fastchat/
mkdir /.../.../.../model/
hdfs dfs -get ....../home/.../.../user/.../debug/gemma-2b/TinyLlama-1.1B-Chat-v1.0 /.../.../.../model/
hdfs dfs -get ....../home/.../.../user/.../debug/gemma-2b/starcoder2-3b /.../.../.../model/
hdfs dfs -get ....../home/.../.../user/.../metadata/generate/.../filter_empty/..._final.parquet.jsonl /.../.../.../
pip3 install trl==0.9.6

torchrun --nproc_per_node=2 --master_port=20001 fastchat/train/code.dpo.py \
    --dataset_name=/.../.../.../..._final.parquet.jsonl \
    --my_task_name="starcoder2-3b-code-dpo" \
    --model_name_or_path=/.../.../.../model/starcoder2-3b \
    --per_device_train_batch_size 1 \
    --learning_rate 1e-3 \
    --gradient_accumulation_steps 8 \
    --logging_steps 10 \
    --eval_steps 500 \
    --output_dir="codedpo_epoch10" \
    --warmup_steps 150 \
    --report_to wandb \
    --logging_first_step \
    --no_remove_unused_columns \
    --bf16 \
    --num_train_epochs=10  \
    --max_prompt_length=512 \
    --use_peft \
    --lora_r=16 \
    --lora_alpha=16 \
    --max_length=2048 2>&1 | tee code.dpo.run.log




