diffusion:
    architecture_max_layer: 6
    architecture_n_vocab: 5
    weight_max_size: 64
    patch_size: 4
    hidden_size: 1152
    depth: 12
    num_heads: 16
    mlp_ratio: 4
    learn_sigma: True
    use_swiglu: True
    use_swiglu_large: True

data:
    path: "dataset/pushcube/train_set.h5"
    env_name: "PushCube-v1"



sample:
    unseen_arch: False
    a2w_architecture_json: "configs/pushcube_config/config/train_arch_4layer.json"
    num_samples: 8
    num_weights_per_arch: 10
    num_sampling_steps: 1000
    seed: 42
    checkpoint: "results/002-NNiT_PushCube/checkpoints/final_checkpoint.pt"
    save_dir: "samples/002_samples_PushCube/samples_a2w_train"
    device: "cuda"
    eval_steps: 50