# experiment params
"NUM_SEEDS": 2
"SEED": 5

'env':
  "ENV_NAME": "_"
  # "ENV_NAME": "MPE_simple_spread_v3"
  "ENV_KWARGS": {}

'alg':
  "EPOCHS": 1e4
  "TOTAL_TIMESTEPS": 5e6
  "BUFFER_SIZE": 5000
  "BUFFER_BATCH_SIZE": 32
  "BATCH_SIZE": 128
  "AGENT_HIDDEN_DIM": 64
  "AGENT_INIT_SCALE": 2.
  "PARAMETERS_SHARING": True
  "EPSILON": 0.05   # added noise to data collection
  "EPSILON_START": 1.0
  "EPSILON_FINISH": 0.05
  "EPSILON_ANNEAL_TIME": 1e5
  "MAX_GRAD_NORM": 25
  "TARGET_UPDATE_INTERVAL": 3 
  "LR": 0.001
  "LR_LINEAR_DECAY": False
  "EPS_ADAM": 0.001
  "TD_LAMBDA_LOSS": False
  "TD_LAMBDA": 0.6
  "GAMMA": 0.9
  "VERBOSE": True
  "MSE_LOSS_COEF": 30
  "OFFLINE_DATA_PATHS":
    "MPE_simple_reference_v3":
      - "data/reference/vdn104/"
      - "data/reference/vdn149/"
      - "data/reference/vdn199/"
      - "data/reference/vdn242/"
      - "data/reference/vdn298/"
    "MPE_simple_spread_v3":
      - "data/spread/vdn149/"
      - "data/spread/vdn198/"
      - "data/spread/vdn297/"
      - "data/spread/vdn348/"
      - "data/spread/vdn387/"
    "MPE_simple_spread_v3_na4":
      - "data/spread/na4/vdn99/"
      - "data/spread/na4/vdn118/"
      - "data/spread/na4/vdn136/"
      - "data/spread/na4/vdn150/"
      - "data/spread/na4/vdn175/"
    "MPE_simple_spread_v3_na5":
      - "data/spread/na5/vdn99/"
      - "data/spread/na5/vdn117/"
      - "data/spread/na5/vdn153/"
      - "data/spread/na5/vdn165/"
      - "data/spread/na5/vdn184/"
    "MPE_simple_spread_v3_na6":
      - "data/spread/na6/vdn137/"
      - "data/spread/na6/vdn157/"
      - "data/spread/na6/vdn176/"
      - "data/spread/na6/vdn190/"
      - "data/spread/na6/vdn200/"
    "MPE_simple_spread_v3_na7":
      - "data/spread/na7/vdn159/"
      - "data/spread/na7/vdn169/"
      - "data/spread/na7/vdn177/"
      - "data/spread/na7/vdn195/"
      - "data/spread/na7/vdn216/"
    "MPE_simple_tag_v3":
        - "data/tag/vdn621/"
        - "data/tag/vdn525/"
        - "data/tag/vdn392/"
        - "data/tag/vdn271/"
        - "data/tag/vdn187/"
  "OFFLINE_UNILATERAL_DATA_PATHS":
    "MPE_simple_spread_v3":
      - "data/spread/vdn149|198unilateral/"
      - "data/spread/vdn149|297unilateral/"
      - "data/spread/vdn149unilateral/"
    "MPE_simple_spread_v3_na4":
    - "data/spread/na4/vdn99|136unilateral/"
    - "data/spread/na4/vdn99|150unilateral/"
    - "data/spread/na4/vdn99|175unilateral/"
    "MPE_simple_spread_v3_na5":
      - "data/spread/na5/vdn99|153unilateral/"
      - "data/spread/na5/vdn99|165unilateral/"
      - "data/spread/na5/vdn99|184unilateral/"
    "MPE_simple_spread_v3_na6":
      - "data/spread/na6/vdn137|176unilateral/"
      - "data/spread/na6/vdn137|190unilateral/"
      - "data/spread/na6/vdn137|200unilateral/"
    "MPE_simple_spread_v3_na7":
      - "data/spread/na7/vdn159|177unilateral/"
      - "data/spread/na7/vdn159|195unilateral/"
      - "data/spread/na7/vdn159|216unilateral/"
    "MPE_simple_tag_v3":
      - "data/tag/vdn621|480unilateral/"
      - "data/tag/vdn621|271unilateral/"
      - "data/tag/vdn621|187unilateral/"
    "MPE_simple_reference_v3":
      - "data/reference/vdn104|149unilateral/" 
      - "data/reference/vdn104|199unilateral/"
      - "data/reference/vdn149|199unilateral/"
  "NUM_BATCH":
    - 1
    - 0
    - 0
    - 0
    - 0
  "NUM_UNILATERAL_BATCH":
    - 1
    - 1
    - 1
  "USE_UNILATERAL": False

  # "WANDB_ONLINE_REPORT": True
  "NUM_TEST_EPISODES": 256
  "NUM_STEPS": 130
  "NUM_ENVS": 128
  "TEST_INTERVAL": 5000
  "REWARD_MODEL_TYPE": "FF"
  # "REWARD_MODEL_TYPE": "RNN" # "FF"
  #"REWARD_NETWORK_PARAM_PATH": "models/reward_model/medium/reference_rmff_mix2|1|1_mse3.msgpack" # path to the reward model
  "REWARD_NETWORK_PARAM_PATH": "results/mpe_rm_mse_3/reward_model_best.msgpack"
  "REFERENCE_NETWORK_PARAM_PATH": "models/actor_model/expert/reference_imitation.msgpack"
  "SAVE_PATH": "_"  # where to save the model params 
  "FF_LAYER_DIM": 64
  "RM_HIDDEN_SIZE": 256
  "RE_HIDDEN_SIZE": 128
  "ADD_LOG_PROB": True
  "REF_LOG_COEF": 10

  "IDIOTIC_AGENT": False
  "IDIOTIC_AGENT_PATH": "_"
  "TRAJ_BATCH_PATH": "data/vdn_medium/"
  "OFFLINE_DATA_PATH":
    - "data/vdn_expert/"
    - "data/vdn_medium/"
  "OFFLINE_DATA_NUMS": [20, 1]
  "EXPERT_PARAM_PATH": "_"

# wandb params
"WANDB_MODE": "online"  # enable this line in training
# "WANDB_MODE": "disabled"
"ENTITY": "comm_marl"
"PROJECT": "mpe_reference_vdn"
"PROJECT_PREFIX": ""
"DISABLE_JIT": False
