{
    "peft_method": "ia3",
    "pretrained_model": "bigscience/T0_3B",
    "checkpoint_frequency": 1000,
    "inference_dataset": null,
    "inference_dataset_mixture": "T0_held_out",
    "length_normalization": false,
    "eval_batch_size": 16,
    "eval_template_idx": -2,
    "gradient_accumulation_factor": 16,
    "lr": 0.0001,
    "max_datapoints_per_dataset": 500000,
    "max_gen_len": 64,
    "max_seq_len": 512,
    "norm_to_clip_gradient": null,
    "num_batches": 10000,
    "optimizer": "adamw",
    "resume_checkpoint_filepath": null,
    "scheduler": null,
    "seed": 42,
    "should_eval_at_beginning": true,
    "should_eval_train": false,
    "should_eval_validation": true,
    "should_save_most_recent_state": true,
    "should_save_to_gcp": false,
    "train_batch_size": 8,
    "train_template_idx": -2,
    "train_dataset_mixture": "T0_held_out",
    "train_dataset": null,
    "use_bfloat16_during_training": true,
    "use_bfloat16_during_eval": true,
    "warmup_ratio": 0.0,
    "weight_decay": 0.0,
    "early_stopping_num_checkpoints_without_improvement": 5
}