diffusion:
    architecture_max_layer: 6
    architecture_n_vocab: 5
    weight_max_size: 64
    patch_size: 4
    hidden_size: 1152
    depth: 12
    num_heads: 16
    mlp_ratio: 4
    learn_sigma: True
    use_swiglu: True
    use_swiglu_large: True


data:
    path: "dataset/pickcube/train_set.h5"
    env_name: "PickCube-v1"

sample:
    num_samples: 100
    num_weights_per_arch: 1
    num_sampling_steps: 1000
    seed: 1
    checkpoint: "results/003-NNiT_PickCube_500k/checkpoints/final_checkpoint.pt"
    save_dir: "samples/003-NNiT_PickCube_500k/samples_joint"
    device: "cuda"
    eval_steps: 50