diffusion:
    architecture_max_layer: 6
    architecture_n_vocab: 5
    weight_max_size: 64
    patch_size: 4
    hidden_size: 1152
    depth: 12
    num_heads: 16
    mlp_ratio: 4
    learn_sigma: True
    use_swiglu: True
    use_swiglu_large: True


data:
    path: "dataset/pushcube/train_set.h5"
    env_name: "PushCube-v1"


sample:
    num_samples: 100
    num_weights_per_arch: 1
    num_sampling_steps: 1000
    seed: 42
    checkpoint: "results/002-NNiT_PushCube/checkpoints/final_checkpoint.pt"
    save_dir: "samples/002_samples_PushCube/samples_joint"
    device: "cuda"
    eval_steps: 50