meta_data:
  exp_name: "mad_smac"
  script_path: "run_scripts/train.py"
  num_workers: 1
  job_name: "{dataset}/h_{horizon}-hh_{history_horizon}-{model}-r_{returns_scale}-guidew_{condition_guidance_w}-ctde_{decentralized_execution}"

variables:
  seed: [515]

  horizon: [8]
  history_horizon: [0]
  returns_scale: [20]
  dataset: ["2m_vs_1z-expert-imb"]
  condition_guidance_w: [1.2]

constants:
  # misc
  seed: 100
  env_type: "smac"
  n_agents: 2
  use_action: True
  discrete_action: True
  num_actions: 7  # ?
  residual_attn: True
  decentralized_execution: False
  use_zero_padding: False
  pred_future_padding: True
  use_ddim_sample: True
  n_ddim_steps: 15

  # model
  model: "models.SharedConvAttentionDeconv"
  diffusion: "models.GaussianDiffusionWrapped"
  share_inv: False
  joint_inv: True
  share_fwd: False
  joint_fwd: True
  n_diffusion_steps: 200
  action_weight: 10
  loss_weights: null
  loss_discount: 1
  use_return_to_go: True
  dim_mults: [1, 4, 8]
  returns_condition: True
  states_condition: True
  predict_epsilon: True
  calc_energy: False
  dim: 128
  hidden_dim: 256
  condition_dropout: 0.25
  condition_guidance_w: 1.2
  train_only_inv: False
  clip_denoised: True
  test_ret: 1.0
  renderer: "utils.SMACRenderer"

  # dataset
  loader: "datasets.SequenceAugDataset"
  normalizer: "CDFNormalizer"
  max_n_episodes: 300000
  preprocess_fns: []
  use_padding: True
  discount: 0.99
  max_path_length: 150  # different
  termination_penalty: 0.0
  circular_shift: True
  shift_ratio: 0.1

  # training
  n_steps_per_epoch: 10000
  n_train_steps: 1000000
  batch_size: 32
  learning_rate: 0.0002
  gradient_accumulate_every: 2
  ema_decay: 0.995
  log_freq: 1000
  save_freq: 250000
  sample_freq: 10000
  n_saves: 5
  save_parallel: False
  n_reference: 3
  save_checkpoints: True

  # eval
  evaluator: "utils.MADEvaluator"
  num_envs: 10  # ?
  num_eval: 50  # 100
  eval_freq: 250000

  # load checkpoint
  continue_training: False
