cd /.../.../.../fastchat/
mkdir /.../.../.../model/
hdfs dfs -get ....../home/.../.../user/.../debug/gemma-2b/starcoder2-7b /.../.../.../model/
hdfs dfs -get ....../home/.../.../user/.../metadata/generate/.../filter_empty/..._final.parquet.jsonl /.../.../.../
pip3 install trl==0.9.6

if [ -z "$RUN_CMD" ]; then
  echo "RUN_CMD is not defined. Setting default value."
  RUN_CMD="python3"
fi

echo "Debug: OTHER value is: '$OTHER'"
if [ -n "${OTHER+x}" ]; then
    echo "OTHER is defined. Using its value."
    read -ra OTHER_ARGS <<< "$OTHER"
else
    echo "OTHER is not defined. Initializing as empty array."
    OTHER_ARGS=()
fi
if [ ${
    echo "No additional arguments provided."
else
    echo "Additional arguments: ${OTHER[*]}"
fi

echo "Running command: $RUN_CMD"
$RUN_CMD fastchat/train/code.dpo.py \
    --dataset_name=/.../.../.../..._final.parquet.jsonl \
    --my_task_name="starcoder2-7b-code-dpo" \
    --model_name_or_path=/.../.../.../model/starcoder2-7b \
    --per_device_train_batch_size 1 \
    --learning_rate 1e-3 \
    --gradient_accumulation_steps 8 \
    --logging_steps 10 \
    --do_eval=True \
    --evaluation_strategy="steps" \
    --eval_steps 300 \
    --output_dir="codedpo_epoch10" \
    --warmup_steps 150 \
    --report_to wandb \
    --logging_first_step \
    --no_remove_unused_columns \
    --bf16 \
    --num_train_epochs=10  \
    --max_prompt_length=512 \
    --max_length=2048  ${OTHER[@]} 2>&1 | tee code.dpo.run.log





