{
      "output_dir": "proj_1",
      "device": "cuda",
      "resume_from_checkpoint": null,
      "resume_from_checkpoint_reset_steps": false,
      "save_state": true,
      "train_batch_size": 16,
      "eval_batch_size": 16,
      "learning_rate": 1e-4,
      "max_grad_norm": 1.0,
      "weight_decay": 5e-6,
      "adam_beta1": 0.9,
      "adam_beta2": 0.999,
      "adam_epsilon": 1e-8,
      "epochs_l": [4608],
      "dataset_size_l": ["32"],
      "warmup_steps": 100,
      "scheduler": "cosine_warmup",
      "logging_steps": 40,
      "save_steps": 512,
      "eval_steps": 512,
      "generate_steps": 512,
      "eval_at_step_zero": false,
      "generate_at_step_zero": false,
      "gradient_accumulation_steps": 1,
      "pad_to_multiple_of": 8,
      "debug": false,
      "menc_names_or_paths": [
            "chendelong/RemoteCLIP-ViT-L-14"
      ],
      "mm_dim": 768,
      "load_extracted_features": [true],
      "mm_dtype": "float32",
      "lm_name_or_path": "meta-llama/Llama-3.2-1B-Instruct",
      "lm_dtype": "bfloat16",
      "dataset_names_or_paths": ["sydney"],
      "proj_name_or_path": "proj_1",
      "proj_arch": "mlp",
      "proj_act": "quick_gelu",
      "proj_n_layers": 2,
      "proj_dropout": 0.1
}
  