{
    "output_dir": "PF_15XL/mtl_vqvae_single_out_seed11",
    "save_logs_flag": true,
    "print_logs": true,
    "log_interval": 1,
    "save_interval": 1000,
    "plot_interval": -1,
    
    "base_seed": 11,

    "language_model_ckpt_folder_path": "PF_15XL/LLMout/mtl_FINAL_15XL_block_size_128_num_samples_156254208_padding_avare_False",    
    "LM_compile": false,

    "gradient_accumulation_steps": 1,
    "batch_size": 32,

    "dataset_dir": "PF_15XL/samples",
    "max_seq_length": 128,
    "max_nodes": 28,
    "max_new_tokens": 10,
    "split": "vqvae",
    "init_from": "scratch",

    "learning_rate": 0.0001,
    "max_iters": 25000,
    "weight_decay": 0.1,
    "beta1": 0.9,
    "beta2": 0.95,
    "grad_clip": 1.0,
    "decay_lr": true,
    "warmup_iters": 100,
    "lr_decay_iters": 1000,
    "min_lr": 1e-05,
    "vqvae_single_config": {
        "grow_beta": false,
        "betainit": 0.25,
        "betafinal": 1.0,
        "warmup_iters_beta": 1000,
        "codebook_size": 128,
        "codebook_reset_counter_multiplier": 64,
        "beta": 0.25,
        "d": 768,
        "hidden_dim": 512,
        "cosine_push_weight": 1,
        "entropy_loss_weight": 0.6,
        "entropy_temperature": 4,
        "mask_prob": 0.1,
        "usage_tracking_window": 2000
    },
    "wandb_flag": true,
    "wandb_project_name": "PF_15XL_Experiments",
    "wandb_run_name": "mtl_vqvae_single_training_seed11",
    "wandb_group": "path_finding_experiments",
    "wandb_entity": "llm_analysis",
    "backend": "nccl",
    "device": "cuda",
    "vq_compile": false,
    "dtype": "bfloat16"
}