# Add global parameters section at the top
globals:
  latent_cond_len: &latent_cond_len 2  # Define anchor with "&" symbol

dataset:
  name: double_pendulum
  dataset_size: 10000
  dim: 4
  seq_length: 64
  pred_length: 32
  train_proportion: 0.8
  val_proportion: 0.1
  seed: 42
  train_batch_size: 64
  gradient_accumulation_batch_size_multiplier: 4 # 256 effective training batch size
  val_batch_size: 512



baseline_diffusion_model:

  objective: flow_matching
  model_type: MyBaselineDiffusionModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

true_baseline_autoregressive:

  objective: ml
  model_type: MyBaselineAutoregressiveModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    parametrization: std
    predict_cov: True

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000


baseline_autoregressive:

  objective: ml
  model_type: MyAutoregressiveModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    parametrization: std
    predict_cov: True

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000


my_autoregressive:

  objective: mse
  model_type: MyAutoregressiveModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    parametrization: std
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_non_probabilistic:

  objective: mse
  model_type: MyNonProbabilisticModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    parametrization: std
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_neural_sde:

  objective: drift_matching
  model_type: MyNeuralSDE

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    predict_flow_or_drift: drift

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_neural_ode:

  objective: flow_matching
  model_type: MyNeuralSDE

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32
    predict_flow_or_drift: flow

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000



my_diffusion_model:

  objective: flow_matching
  model_type: MyDiffusionModel

  model:
    n_layers: 10
    filter_width: 4
    hidden_channel_size: 128
    num_transformer_heads: 32

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000



my_small_non_probabilistic:

  objective: mse
  model_type: MyNonProbabilisticModel

  model:
    n_layers: 1
    filter_width: 4
    hidden_channel_size: 4
    num_transformer_heads: 4
    parametrization: std
    predict_cov: False

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_autoregressive_rnn:

  objective: mse
  model_type: MyReparameterizedAutoregressiveRNNModel

  model:
    hidden_size: 128
    parametrization: mixed
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_autoregressive_reparam_rnn:

  objective: mse
  model_type: MyReparameterizedAutoregressiveRNNModel

  model:
    hidden_size: 128
    parametrization: mixed
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

true_baseline_autoregressive_rnn:

  objective: ml
  model_type: MyBaselineAutoregressiveRNNModel

  model:
    hidden_size: 128
    parametrization: std
    predict_cov: True

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

baseline_autoregressive_rnn:

  objective: ml
  model_type: MyBaselineAutoregressiveRNNModel

  model:
    hidden_size: 128
    parametrization: std
    predict_cov: True

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_diffusion_model_rnn:

  objective: flow_matching
  model_type: MyDiffusionRNNModel

  model:
    hidden_size: 128

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_non_probabilistic_rnn:

  objective: mse
  model_type: MyNonProbabilisticRNNModel

  model:
    hidden_size: 128
    parametrization: mixed
    predict_cov: False

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

baseline_diffusion_model_rnn:

  objective: flow_matching
  model_type: MyBaselineDiffusionRNNModel

  model:
    hidden_size: 128

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_neural_sde_rnn:

  objective: drift_matching
  model_type: MyNeuralSDERNN

  model:
    hidden_size: 128
    predict_flow_or_drift: drift

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

my_neural_ode_rnn:

  objective: flow_matching
  model_type: MyNeuralSDERNN

  model:
    hidden_size: 128
    predict_flow_or_drift: flow

  latent_cond_len: *latent_cond_len

  optimizer:
    lr: 1.0e-4
    max_train_steps: 300_000
    warmup_steps: 1000

# python main.py --config_file=Config/double_pendulum.yaml --freq=0 --sde_type=brownian --model_name=baseline_autoregressive --group=exp_april_7 --debug

# python main.py --config_file=Config/double_pendulum.yaml --freq=0 --sde_type=brownian --model_name=my_autoregressive --group=check_ar --retrain --train
# python main.py --config_file=Config/double_pendulum.yaml --freq=0 --sde_type=brownian --model_name=my_non_probabilistic --group=check_ar --retrain --train
# python main.py --config_file=Config/double_pendulum.yaml --freq=1 --sde_type=brownian --model_name=my_neural_sde --group=check_ar --retrain --train
# python main.py --config_file=Config/double_pendulum.yaml --freq=1 --sde_type=brownian --model_name=my_neural_ode --group=check_ar --retrain --train
# python main.py --config_file=Config/double_pendulum.yaml --freq=1 --sde_type=brownian --model_name=my_diffusion_model --group=check_ar --retrain --train


# python main.py --config_file=Config/double_pendulum.yaml --freq=1 --sde_type=brownian --model_name=my_small_non_probabilistic --group=asdf --retrain --train

