# --- Defaults for 4x4 predator prey, 4 agent capture, walls ---

env: "stag_hunt"

env_args:
    capture_action: True
    n_agents: 8
    n_stags: 8
    n_hare: 0
    miscapture_punishment: 0
    agent_obs: [2, 2]       # (radius-1) of the agent's observation, e.g., [0, 0] observes only one pixel
    agent_move_block: [0,1,2]   # by which entities is an agent's move blocked (0=agents, 1=stags, 2=hare)
    capture_action: True   # whether capturing requires an extra action (True) or just capture_conditions (False)
    capture_conditions: [0, 1]  # number of free fields available to [stag, hare] to be captured
    capture_action_conditions: [2, 1]  # number of agents that have to simultaneously execute "catch" action
    # capture_conditions: [0, 0]  # number of free fields available to [stag, hare] to be captured
    # capture_action_conditions: [2, 2]  # number of agents that have to simultaneously execute "catch" action
    capture_freezes: True   # whether capturing any prey freezes the participating agents (True) or not (False)
    capture_terminal: False # whether capturing any prey ends the episode
    directed_observations: False   # Agents observe square around them (False) or a cone in the direction of the last action (True).
    directed_cone_narrow: True     # Whether the diagonal is excluded from the directed_observation cone (True)
    directed_exta_actions: True    # Whether the observation cone is controlled by movement (False) or actions (True)
    episode_limit: 200      # maximum number of time steps per episode
    # episode_limit: 50      # maximum number of time steps per episode
    intersection_global_view: False # intersection specific (MACKRL)
    intersection_unknown: False     # intersection specific (MACKRL)
    # miscapture_punishment: -1       # punishment if less than capture_action_conditions agents executes capture_action
    mountain_slope: 0.0     # probability that an "up" action will not be executed (stag_hunt = 0.0)
    mountain_spawn: False   # whether prey spawns in their preferred habitat (True) or randomly (False)
    mountain_agent_row: -1  # the row in which the agents are spawned (0 is top). Negative values spawn agents randomly.
    # n_agents: 2             # number of hungers, i.e., agents
    # n_hare: 2               # number of hares in the environment
    # n_stags: 1              # number of stags in the environment
    observe_state: False    # whether an observation is only partial (False) or central including agent position (True)
    observe_walls: False    # observe walls as an extra feature (only for state_as_list=False and toroidal=False)
    observe_ids: False      # observe agent ID, instead of agent presence (only for state_as_list=False)
    observe_one_hot: False  # observe agent ID as one-hot vector (only for observer_ids=True)
    p_stags_rest: 0.0       # probability that a stag will not move (at each time step)
    p_hare_rest: 0.0        # probability that a hare will not move (at each time step)
    prevent_cannibalism: True   # If set to False, prey can be captured by other prey (witch is rewarding)
    # prevent_cannibalism: False   # If set to False, prey can be captured by other prey (witch is rewarding)
    print_caught_prey: False    # debug messages about caught prey and finished episodes
    print_frozen_agents: False  # debug messages about frozen agents after some prey has been caught
    random_ghosts: False    # If True, prey turns into ghosts randomly (neg. reward), indicated by a corner-feature
    random_ghosts_prob: 0.5 # Probability that prey turns into ghost
    random_ghosts_mul: -1   # Catching ghosts incurs a reward/punishment of random_ghost_mul*reward
    random_ghosts_indicator: False  # If True, the indicator for ghosts is in a different corner every episode
    remove_frozen: True     # whether frozen agents are removed (True) or still present in the world (False)
    reward_hare:  1         # reward for capturing a hare
    reward_stag:  10         # reward for capturing a stag
    reward_collision: 0     # reward (or punishment) for colliding with other agents
    reward_time: 0         # reward (or punishment) given at each time step
    state_as_graph: False   # whether the state is a list of entities (True) or the entire grid (False
    toroidal: False         # whether the world is bounded (False) or toroidal (True)
    world_shape: [10, 10]   # the shape of the grid-world [height, width]

t_max: 1050000
test_nepisode: 16 # Number of episodes to test for
test_interval: 10000