experiment_name: TRIRL

hydra:
  run:
    dir: outputs/${experiment_name}_${now:%Y-%m-%d_%H-%M-%S}

env:
  # Icy Grid World
  world_size: 5
  r_default: -1
  r_abs: 0
  r_goal: 0
  p_slip: 0.2
  initial_idx: [0]
  absorbing_idx: [24]
  terminal_idx: [] # Terminal and absorbing states are treated similarly
  penultimate_coords:
    - [4, 3]
    - [3, 4]
  rewarding_actions_idx: [0, 2]

expert:
  n_trajectories: 200
  discount: 0.99

mce_irl:
  trirl: True
  init_reward: 1.0
  discount: 0.99
  max_iter: 300
  
  # tunable hyperparameters
  epsilon: 0.01
  beta: 50.0
  init_eta: 0.1
  eval_dual: True

plotting:
  style:
    border:
      color: "#bd103b"
      linewidth: 0.5
    cmap: "Blues"

  rc_params:
    font: "monospace"
    titlesize: 24
    labelsize: 24
    legendsize: 24
    fontsize: 24
    ticksize: 24
    figsize: [4,4]
    cbar_off: True
    dpi: 200

    world_size: ${env.world_size}

