diffusion:
    architecture_max_layer: 6
    architecture_n_vocab: 5
    weight_max_size: 64
    patch_size: 4
    hidden_size: 1152
    depth: 12
    num_heads: 16
    mlp_ratio: 4
    learn_sigma: True
    use_swiglu: True
    use_swiglu_large: True


data:
    path: "dataset/pickcube/train_set.h5"
    env_name: "PickCube-v1"


sample:
    unseen_arch: False
    a2w_architecture_json: "configs/pickcube_config/config/train_arch_4layer.json"
    num_samples: 8
    num_weights_per_arch: 10
    num_sampling_steps: 1000
    seed: 1
    checkpoint: "results/003-NNiT_PickCube_500k/checkpoints/final_checkpoint.pt"
    save_dir: "samples/003-NNiT_PickCube_500k/samples_a2w_train"
    device: "cuda"
    eval_steps: 50