wandb_config:
  project_name: "Reccurent-Baselines-RATE-T-Maze"
  wwandb: True

data_config:
  # noise augmentation of data
  multiplier: 1000
  # number of hint steps
  hint_steps: 1
  # cutting previous data in half when adding new segment during training. 
  cut_dataset: False
  # reward on succesful completion of an episode
  desired_reward: 1.0 # defalt: 1.0
  # adding only succesful trajectories to the dataset
  win_only: True # default: True

training_config:
  learning_rate: 0.0003 # 1e-4
  lr_end_factor: 0.1
  beta_1: 0.9
  beta_2: 0.95
  weight_decay: 0.1
  batch_size: 64 # 64
  warmup_steps: 100  # 100
  grad_norm_clip: null # 1.0
  epochs: 100 #250
  ckpt_epoch: 10
  use_erl_stop: False
  # inference during training
  online_inference: True
  # ! WARNING ! IMPORTANT INFO
  # * IF YOU WANT TO TRAIN DT WITH context_length=90 (or if you want to train RATE with
  # * 3 segments and context 30), set context_length=30 and sections=3
  context_length: 10 # if RATE/GRATE: L = L, if DT: L = sections * L
  sections: 3        # if RATE/GRATE: S = S, if DT: S = 1
  

model_config:
  mode: "tmaze"
  state_dim: None
  act_dim: None
  num_layers: 1 # 3
  hidden_size: 128 # 256
  dropout: 0.1 # 0.2 

online_inference_config:
  episode_timeout: 
  corridor_length: 





