"""

CUDA_VISIBLE_DEVICES=0 python -m src.llamafactory.train.mdpo.workflow --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 1 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 4 --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 1 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy online-dpo --test



CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 2 --gradient_accumulation_steps 2 --learning_rate 1e-6 --num_train_epochs 4 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 1 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy svco-textref

# dpo
CUDA_VISIBLE_DEVICES=4,5 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 2 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 1 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy svco-textref --anchor_ratio 0.0; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_svco-textref/llava-1.5-7b-hf_Exp6" --device=4 --rule-based-only; \
cd ../LLaMAFactory; \
CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 1 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy orm --neg_anchor_ratio 1.0; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp1" --device=4 --rule-based-only; \
cd ../LLaMAFactory; \

CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 1 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy orm --anchor_ratio 0.0; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp3/checkpoint-1124" --device=0 --rule-based-only; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp3/checkpoint-2248" --device=0 --rule-based-only; \
cd ../LLaMAFactory; \
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 2 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy orm --anchor_ratio 0.0; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp4/checkpoint-562" --device=0 --rule-based-only; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp4/checkpoint-1124" --device=0 --rule-based-only

CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy mdpo; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_mdpo/llava-1.5-7b-hf_Exp2/checkpoint-281" --device=0 --rule-based-only; \
cd ../LLaMAFactory; \
CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy mdpo --neg_anchor_ratio 1.0; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_mdpo/llava-1.5-7b-hf_Exp3/checkpoint-281" --device=0 --rule-based-only; \


bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_dpo/llava-1.5-7b-hf_Exp1/checkpoint-281" --device=1 --rule-based-only; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_dpo/llava-1.5-7b-hf_Exp2/checkpoint-281" --device=1 --rule-based-only; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_dpo/llava-1.5-7b-hf_Exp3/checkpoint-281" --device=1 --rule-based-only; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp5/checkpoint-281" --device=1 --rule-based-only; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp8/checkpoint-281" --device=1 --rule-based-only; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_svco-textref/llava-1.5-7b-hf_Exp6/checkpoint-281" --device=1 --rule-based-only; \
bash eval_all.sh --ckpt_path="/opt/tiger/vtcl/model/dpo-ckpt" --device=1 --rule-based-only; \
bash tmp_eval_base.sh --device=1 --rule-based-only


CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 2 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy orm --not_half_dpo_loss --anchor_ratio 0.0; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_mdpo/llava-1.5-7b-hf_Exp5/checkpoint-281" --device=4 --rule-based-only; \
cd ../LLaMAFactory; \
CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy orm --not_half_dpo_loss --neg_anchor_ratio 1.0; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp5/checkpoint-562" --device=4 --rule-based-only; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp6/checkpoint-281" --device=4 --rule-based-only

CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1_single --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode difBatch --strategy orm --not_half_dpo_loss --neg_anchor_ratio 1.0; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp7/checkpoint-562" --device=4 --rule-based-only; \
cd ../LLaMAFactory; \
CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1_single --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 2 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode difBatch --strategy orm --not_half_dpo_loss --anchor_ratio 0.0; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp7/checkpoint-562" --device=4 --rule-based-only; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp7/checkpoint-1124" --device=4 --rule-based-only


CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1_single --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 2 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode difBatch --strategy orm --not_half_dpo_loss --neg_anchor_ratio 1.0; \
CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy svco-textref; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_svco/llava-1.5-7b-hf_Exp7/checkpoint-281" --device=4 --rule-based-only & \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp6/checkpoint-562" --device=5 --rule-based-only & \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_orm/llava-1.5-7b-hf_Exp6/checkpoint-1124" --device=6 --rule-based-only & wait


CUDA_VISIBLE_DEVICES=2,3 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 8 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 2 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy sft; \
CUDA_VISIBLE_DEVICES=2,3 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 8 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy sft --anchor_ratio 1.0; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_sft/llava-1.5-7b-hf_Exp1/checkpoint-281" --device=2 --rule-based-only & \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_sft/llava-1.5-7b-hf_Exp2/checkpoint-281" --device=3 --rule-based-only & wait; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_mdpo/llava-1.5-7b-hf_Exp6/checkpoint-1124" --device=6 --rule-based-only & wait

cd ../LLaMAFactory; \
CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_mdpo_r80 --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy ori-mdpo; \
CUDA_VISIBLE_DEVICES=4,5,6,7 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_mdpo_r50 --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 4 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy ori-mdpo; \
cd ../svco-eval; \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_ori-mdpo/llava-1.5-7b-hf_Exp1/checkpoint-281" --device=4 --rule-based-only & \
bash eval_all.sh --ckpt_path="/mnt/bn/lf-lsj/tasks/vtcl/saves/mdpo_ori-mdpo/llava-1.5-7b-hf_Exp2/checkpoint-281" --device=5 --rule-based-only & wait


CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --adapter_name_or_path "/mnt/bn/lf-lsj/model/vtcl/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 2 --gradient_accumulation_steps 1 --learning_rate 1e-6 --num_train_epochs 2 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy online-orm --not_half_dpo_loss

# step-save eval  opadpo-original dataset  online
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opadpo_train --eval_dataset opadpo_val --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy steps --save_steps 70 --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 1 --gradient_accumulation_steps 2 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy steps --eval_steps 70 --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy online-orm --not_half_dpo_loss --anchor_ratio 0.0


# based on opa model
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train --model_name_or_path "/opt/tiger/vtcl/model/llava-1.5-7b-hf" --adapter_name_or_path "/opt/tiger/vtcl/model/dpo-ckpt" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opadpo_train --eval_dataset opadpo_val --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy steps --save_steps 70 --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 1 --gradient_accumulation_steps 2 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 4 --eval_strategy steps --eval_steps 70 --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy online-orm --not_half_dpo_loss --anchor_ratio 0.0


CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opadpo_train --eval_dataset opadpo_val --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 1 --gradient_accumulation_steps 2 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 2 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy sft; \
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opa_s16_geSTD_Cge6N2_le5L1 --val_size 0.05 --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 1 --gradient_accumulation_steps 2 --learning_rate 1e-6 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 2 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy dpo --anchor_ratio 0.0


CUDA_VISIBLE_DEVICES=4,5 llamafactory-cli train --model_name_or_path "/mnt/bn/lf-lsj/model/vtcl/llava-1.5-7b-hf" --trust_remote_code --stage mdpo --do_train --finetuning_type lora --lora_target all --pref_beta 0.1 --pref_loss sigmoid --save_only_model --dataset opadpo_train --eval_dataset opadpo_val --dataset_dir "/opt/tiger/vtcl/LLaMAFactory/data" --template llava_new --cutoff_len 2048 --overwrite_cache --preprocessing_num_workers 16 --output_dir "saves/test/llava-1.5/lora/dpo" --logging_steps 10 --save_strategy epoch --plot_loss --overwrite_output_dir --load_best_model_at_end --per_device_train_batch_size 2 --gradient_accumulation_steps 32 --learning_rate 2e-5 --num_train_epochs 1 --eval_on_start --lr_scheduler_type cosine --warmup_ratio 0.1 --ddp_timeout 180000000 --per_device_eval_batch_size 2 --eval_strategy epoch --lora_rank 256 --save_total_limit 100 --vtcl_mode single --strategy sft
"""