dim_x: 2
dim_y: 3
d_model: 64
emb_depth: 4
dim_feedforward: 128
nhead: 4
dropout: 0.0
num_layers: 6
num_std_layers: 2
cov_approx: 'cholesky' # cholesky or lowrank parameterization
prj_dim: 20
prj_depth: 4
diag_depth: 4 # only for lowrank parameterization option