dim_x: 2
dim_y: 5
d_model: 16
emb_depth: 3
dim_feedforward: 64
nhead: 1
dropout: 0.0
num_layers: 4
num_std_layers: 2
cov_approx: 'lowrank' # cholesky or lowrank parameterization
prj_dim: 20
prj_depth: 4
diag_depth: 4 # only for lowrank parameterization option
drop_y: 0.5