cd /mnt/dolphinfs/hdd_pool/docker/user/hadoop-basecv/jiangshu/code/LLaMA-Factory/

DS_CONFIG_PATH='/mnt/dolphinfs/hdd_pool/docker/user/hadoop-basecv/jiangshu/code/LLaMA-Factory/examples/deepspeed/ds_z3_config.json'

# 挂vpn
export http_proxy=http://10.229.18.23:3128
export https_proxy=http://10.229.18.23:3128

JOB_ARGS=($(python hope/gpu_looker.py))
echo "JOB_ARGS: ${JOB_ARGS[@]}"

export FORCE_TORCHRUN=1
export NNODES="${JOB_ARGS[0]}"
export GPUS_PER_NODE="${JOB_ARGS[1]}"
export MASTER_ADDR="${JOB_ARGS[2]}"
export MASTER_PORT="${JOB_ARGS[3]}"
export NODE_RANK="${JOB_ARGS[4]}"


# 挂载环境
eval "$('/mnt/dolphinfs/hdd_pool/docker/user/hadoop-basecv/jiangshu/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
conda activate lfnew

MODEL_PATH="/mnt/dolphinfs/hdd_pool/docker/user/hadoop-basecv/cache/QwenVL25/models--Qwen--Qwen2.5-VL-7B-Instruct/snapshots/c6adf136ef57b4f6a6d7897c11bd3863bda237d7"




# CUDA_VISIBLE_DEVICES=0
# python src/train.py\
torchrun --nproc_per_node="${GPUS_PER_NODE}" --nnodes="${NNODES}" --node_rank="${NODE_RANK}" --master_addr="${MASTER_ADDR}" --master_port="${MASTER_PORT}"  src/train.py \
    --deepspeed $DS_CONFIG_PATH \
    --model_name_or_path ${MODEL_PATH} \
    --trust_remote_code \
    --stage sft \
    --do_train \
    --finetuning_type lora \
    --lora_rank 8 \
    --lora_target all \
    --dataset identity,alpaca_en_demo \
    --template qwen2_vl \
    --cutoff_len 2048 \
    --max_samples 1000 \
    --overwrite_cache \
    --preprocessing_num_workers 16 \
    --dataloader_num_workers 4 \
    --output_dir saves/qwen25vl-7b/lora/sft \
    --logging_steps 10 \
    --save_steps 500 \
    --plot_loss \
    --overwrite_output_dir \
    --save_only_model false \
    --report_to none \
    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 8 \
    --learning_rate 1e-4 \
    --num_train_epochs 10.0 \
    --lr_scheduler_type cosine \
    --warmup_ratio 0.1 \
    --bf16 \
    --ddp_timeout 180000000
