lr: 0.0001
loss_type: cosine

hidden_size: 3072
layer_num: 1
num_heads: 2
dropout: 0.1


# Diffusion
w: 4
timesteps: 1000
beta_sche: linear
beta_start: 0.0001
beta_end: 0.02
ddim_step: 100

mu: 0.4             # Balances reconstruction and alignment loss
margin: 0.1           # Margin 'm' for the triplet alignment loss

sample_func: batch

neg_samples: 64
