env: "gymma"

env_args:
  key: lbforaging:ANY
  time_limit: 50
  pretrained_wrapper: null

test_greedy: True

test_nepisode: 40

# save_model_interval: 10000
# t_max: 50000 # 50000 training gradient steps

# offline dataset, used only for single-task offline
offline_data_folder: "dataset"
offline_bottom_data_path: ""
offline_data_quality: "expert"
offline_max_buffer_size: 4000
offline_data_shuffle: False
offline_data_type: "h5"

# --- Agent parameters ---
obs_agent_id: True # Include the agent's one_hot id in the observation -> NOT for generalizable obs ?
obs_last_action: True # Include the agent's last action (one_hot) in the observation
# id_length: 4 # 15 -> 2^4 in binary, also by default in mt-algs

# --- RL settings ---

gamma: 0.99
tune_all: False
standardise_returns: False
standardise_rewards: False

data_device: "cuda"

# --- Alg settings for COMAD ---
n_reuse_heads: 2
delta_m_thres: 0.95
ebm_noise_scale: 1

cont_train_steps: 20000
t_max: 20000

test_interval: 250
log_interval: 250
runner_log_interval: 250
learner_log_interval: 250