{
    "train_data": ["data/train_data.bin"],
    "test_data": ["data/test_data.bin"],
    "pretrain_out_dir": "Result/Pretraining",
    "inference_out_dir": "Result/Inference",

    "max_epoch": 1,
    "eval_interval": 1,
    "log_interval": 1,
    "save_interval": 10000,
    "eval_iters": 200,
    "eval_only": false,

    "Original": [1792, 10, 12],
    "batch_num": 8,
    "batch_size": 32,
    "max_seq_len": 512,
    "dim": 2048,  
    "n_layers": 10,
    "n_heads": 10,
    "multiple_of": 32,
    "device": "cuda:0",
    "init_from": "scratch",

    
    "dropout": 0.4,
    "bias": false,
    "learning_rate": 3e-3,
    "weight_decay": 1e-1,
    "beta1": 0.9,
    "beta2": 0.95,
    "grad_clip": 1.0,
    "decay_lr": false,
    "backend": "nccl",
    "dtype": "float16",
    "compile": false
    
  }