# Add global parameters section at the top
globals:
  latent_cond_len: &latent_cond_len 2  # Define anchor with "&" symbol

dataset:
  nice_name: MuJoCo
  name: mujoco
  dataset_size: 10000
  dim: 14
  seq_length: 24
  pred_length: 12
  train_proportion: 0.8
  val_proportion: 0.1
  seed: 42
  train_batch_size: 128
  gradient_accumulation_batch_size_multiplier: 2 # 256 effective training batch size
  val_batch_size: 128

  noise_std: 0.001
  latent_sigma: 0.0001

  noise_std_updated: 0.0001
  latent_sigma_updated: 0.1

  old_dataloader:
    target: Utils.Data_utils.mujoco_dataset.MuJoCoDataset
    params:
      num: 10000
      dim: 14
      window: 24  # seq_length
      save2npy: True
      neg_one_to_one: True
      seed: 123
      period: train

  metric_to_compute:
    - crps
    - nll
    - nrmse

  evaluation_settings:
    - future_latent
    - future_observation


baseline_diffusion_model:

  objective: flow_matching
  model_type: MyBaselineDiffusionModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000


true_baseline_autoregressive:

  objective: ml
  model_type: MyBaselineAutoregressiveModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    parametrization: std
    predict_cov: True

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

baseline_autoregressive:

  objective: ml
  model_type: MyAutoregressiveModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    parametrization: std
    predict_cov: True

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000


my_autoregressive:

  objective: mse
  model_type: MyAutoregressiveModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    parametrization: std
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_autoregressive_reparam:

  objective: mse
  model_type: MyReparameterizedAutoregressiveModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    parametrization: mixed
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_non_probabilistic:

  objective: mse
  model_type: MyNonProbabilisticModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    parametrization: std
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_neural_sde:

  objective: drift_matching
  model_type: MyNeuralSDE

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    predict_flow_or_drift: drift

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_neural_ode:

  objective: flow_matching
  model_type: MyNeuralSDE

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    predict_flow_or_drift: flow

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000


my_diffusion_model:

  objective: flow_matching
  model_type: MyDiffusionModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_autoregressive_rnn:

  objective: mse
  model_type: MyReparameterizedAutoregressiveRNNModel

  model:
    hidden_size: 128
    parametrization: mixed
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_autoregressive_reparam_rnn:

  objective: mse
  model_type: MyReparameterizedAutoregressiveRNNModel

  model:
    hidden_size: 128
    parametrization: mixed
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

true_baseline_autoregressive_rnn:

  objective: ml
  model_type: MyBaselineAutoregressiveRNNModel

  model:
    hidden_size: 128
    parametrization: std
    predict_cov: True

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

baseline_autoregressive_rnn:

  objective: ml
  model_type: MyBaselineAutoregressiveRNNModel

  model:
    hidden_size: 128
    parametrization: std
    predict_cov: True

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_diffusion_model_rnn:

  objective: flow_matching
  model_type: MyDiffusionRNNModel

  model:
    hidden_size: 128

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_non_probabilistic_rnn:

  objective: mse
  model_type: MyNonProbabilisticRNNModel

  model:
    hidden_size: 128
    parametrization: mixed
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

baseline_diffusion_model_rnn:

  objective: flow_matching
  model_type: MyBaselineDiffusionRNNModel

  model:
    hidden_size: 128

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_neural_sde_rnn:

  objective: drift_matching
  model_type: MyNeuralSDERNN

  model:
    hidden_size: 128
    predict_flow_or_drift: drift

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_neural_ode_rnn:

  objective: flow_matching
  model_type: MyNeuralSDERNN

  model:
    hidden_size: 128
    predict_flow_or_drift: flow

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

