args = ['--deepspeed', 'scripts/zero2.json', '--model_name_or_path', '/home/kkallidromitis/llada/checkpoint-23000', '--version', 'llada', '--data_path', '/home/kkallidromitis/llava-llada/llava_next_raw_format_processed_200k.json', '--image_folder', '/home/kkallidromitis/LLaVA-NeXT-Data/llava_next_images', '--mm_tunable_parts=mm_vision_tower,mm_mlp_adapter,mm_language_model', '--mm_vision_tower_lr=2e-6', '--vision_tower', '/home/kkallidromitis/vlm-llada-eval/siglip-so400m-patch14-384', '--mm_projector_type', 'mlp2x_gelu', '--mm_vision_select_layer', '-2', '--mm_use_im_start_end', 'False', '--mm_use_im_patch_token', 'False', '--group_by_modality_length', 'True', '--mm_patch_merge_type', 'spatial_unpad', '--bf16', 'True', '--load_vlm', '--run_name', 'llavanext-_home_kkallidromitis_vlm-llada-eval_siglip-so400m-patch14-384-_home_kkallidromitis_vlm-llada-eval_LLaDA-8B-Instruct-mlp2x_gelu-pretrain_blip558_v4-cont-200k-700k-reproduced2-lr-2e-5-sigmoid', '--output_dir', './xvlms/llavanext-_home_kkallidromitis_vlm-llada-eval_siglip-so400m-patch14-384-_home_kkallidromitis_vlm-llada-eval_LLaDA-8B-Instruct-mlp2x_gelu-pretrain_blip558_v4-cont-200k-700k-reproduced2-lr-2e-5-sigmoid', '--num_train_epochs', '1', '--per_device_train_batch_size', '3', '--per_device_eval_batch_size', '1', '--image_aspect_ratio', 'anyres', '--image_grid_pinpoints', '[(768, 768)]', '--gradient_accumulation_steps', '5', '--evaluation_strategy', 'steps', '--save_strategy', 'steps', '--eval_steps', '1', '--save_steps', '200', '--save_total_limit', '2', '--learning_rate', '2e-5', '--weight_decay', '0.', '--warmup_ratio', '0.03', '--lr_scheduler_type', 'cosine_with_min_lr', '--logging_steps', '1', '--tf32', 'True', '--model_max_length', '4096', '--gradient_checkpointing', 'True', '--dataloader_num_workers', '2', '--lazy_preprocess', 'True', '--report_to', 'wandb', '--dataloader_drop_last', 'True', '--attn_implementation', 'sdpa', '--resume_from_checkpoint', 'latest', '--lmms_eval_generate_tasks=vqav2_val_lite,chartqa_lite,textvqa_val_lite,docvqa_val_lite,infovqa_val_lite', '--lr_scheduler_kwargs', '{"min_lr_rate":0.1}', '--policy', 'logit_normal', '--policy_args', '{"logit_mean":0.0,"logit_std":1.0}']
import transformers

parser = transformers.HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))