hidden_dim: 96
timestep_dim: 32
num_heads: 4
expansion: 2

num_detector_encoder_layers: 8
num_parton_encoder_layers: 6
num_parton_decoder_layers: 6
num_denoising_layers: 10

trivial_vae: False
conditional_vae: True
deterministic_vae: True
unconditional_vae_decoder: True

normalize_parton: True
normalize_parton_scale: True

seed: 0
batch_size: 4096
learning_rate: 1e-4
gradient_clipping: 1.0

reconstruction_loss_scale: 1.0
self_mass_loss_scale: 0.01

kl_loss_scale: 0.0
mass_loss_scale: 0.0
vae_prior_loss_scale: 0.0

noise_schedule: "nnet"
noise_schedule_outputs: 1

weighting: "sigmoid"
sigmoid_weight_offset: 2.0

num_batches: 1000000