deepspeed --num_gpus 8 \
    --num_nodes 8 \
    --master_port 6429 \
    --hostfile configs/hostfile \
    train.py \
    --model_name_or_path "../../models/Llama-2-7b-hf" \
    --train_file "train.jsonl" \
    --val_file "val.jsonl" \
    --data_path "../../data/rico25-max25/html_format" \
    --output_dir "../../models/rico25-max25" \
    --num_train_epochs 10 \
    --model_max_length 2048 \
    --per_device_train_batch_size 6 \
    --per_device_eval_batch_size 6 \
    --gradient_accumulation_steps 1 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 100 \
    --save_total_limit 4 \
    --learning_rate 2e-5 \
    --warmup_steps 2 \
    --logging_steps 2 \
    --lr_scheduler_type "cosine" \
    --report_to "tensorboard" \
    --gradient_checkpointing True \
    --deepspeed configs/deepspeed_config_2.json \
    --fp16 True \
    --remove_unused_columns False