correct_reward = 1
wrong_reward = -.5
# correct_reward = 10
# wrong_reward = -5

fixed_sender_config = {
    "lengths":(1,3),
    "starts": (0,1),
    "receiver_booth_loc":0,
    "booth_loc":0,
    "episode_limit": 10,
    "right_r": correct_reward,
    "wrong_r":wrong_reward,
    "num_sender_decoy_booths": 0,
    "decoy_booths_fixed": 1,
    "use_intermediate_reward": False,
    "use_mi_shaping": True,
    "use_mi_loss": True,
    "use_oh_token": True
}

fixed_sender_iql_env_config =  {
    "lengths":(1,3),
    "starts": (0,1),
    "receiver_booth_loc":0,
    "booth_loc":0,
    "episode_limit": 10,
    "right_r": correct_reward,
    "wrong_r":wrong_reward,
    "num_sender_decoy_booths": 0,
    "decoy_booths_fixed": 1,
    "use_intermediate_reward": False,
    "use_mi_shaping": False,
    "use_mi_loss": False,
    "use_oh_token": True
}

env_config =  {
    "lengths":(8,4),
    "starts": (4,2),
    "receiver_booth_loc":0,
    "booth_loc":7,
    "episode_limit": 20,
    "right_r": correct_reward,
    "wrong_r":wrong_reward,
    "num_sender_decoy_booths": 2,
    "decoy_booths_fixed": 1,
    "use_intermediate_reward": False,
    "use_mi_shaping": True,
    "use_mi_loss": True,
    "use_oh_token": False
}


iql_env_config =  {
    "lengths":(8,4),
    "starts": (4,2),
    "receiver_booth_loc":0,
    "booth_loc":7,
    "episode_limit": 20,
    "right_r": correct_reward,
    "wrong_r":wrong_reward,
    "num_sender_decoy_booths": 2,
    "decoy_booths_fixed": 1,
    "use_intermediate_reward": False,
    "use_mi_shaping": False,
    "use_mi_loss": False,
    "use_oh_token": False
}

env_config_med =  {
    "lengths":(5,3),
    "starts": (2,1),
    "receiver_booth_loc":0,
    "booth_loc":4,
    "episode_limit": 12,
    "right_r": correct_reward,
    "wrong_r":wrong_reward,
    "num_sender_decoy_booths": 2,
    "decoy_booths_fixed": 1,
    "use_intermediate_reward": False,
    "use_mi_shaping": True,
    "use_mi_loss": True,
    "use_oh_token": True
}


iql_env_config_med  =  {
    "lengths":(5,3),
    "starts": (2,1),
    "receiver_booth_loc":0,
    "booth_loc":4,
    "episode_limit": 12,
    "right_r": correct_reward,
    "wrong_r":wrong_reward,
    "num_sender_decoy_booths": 2,
    "decoy_booths_fixed": 1,
    "use_intermediate_reward": False,
    "use_mi_shaping": False,
    "use_mi_loss": False,
    "use_oh_token": True
}


env_config_small =  {
    "lengths":(2,3),
    "starts": (0,1),
    "receiver_booth_loc":0,
    "booth_loc":1,
    "episode_limit": 10,
    "right_r": correct_reward,
    "wrong_r":wrong_reward,
    "num_sender_decoy_booths": 2,
    "decoy_booths_fixed": 1,
    "use_intermediate_reward": False,
    "use_mi_shaping": True,
    "use_mi_loss": True,
    "use_oh_token": True
}


iql_env_config_small  =  {
    "lengths":(2,3),
    "starts": (0,1),
    "receiver_booth_loc":0,
    "booth_loc":1,
    "episode_limit": 10,
    "right_r": correct_reward,
    "wrong_r":wrong_reward,
    "num_sender_decoy_booths": 2,
    "decoy_booths_fixed": 1,
    "use_intermediate_reward": False,
    "use_mi_shaping": False,
    "use_mi_loss": False,
    "use_oh_token": True
}

multi_env_config =  {
    "lengths":(5,3),
    "starts": (3,1),
    "receiver_booth_loc":0,
    # (x, y, cost)
    # "booth_locs":[(4, 0, -0.4, 0), (1, 0, 0.0, 0), (3, -1, 0, 0.1)],
    # "booth_locs":[(4, 0, -0.4, 0), (1, 0, 0.0, 0), (3, -1, 0, 0.3)],
    "booth_locs":[(4, 0, -0.4, 0), (1, 0, 0.0, 0), (3, -1, 0, 0.5)],
    "episode_limit": 20,
    "right_r": correct_reward,
    "wrong_r":wrong_reward,
    "num_sender_decoy_booths": 0,
    "decoy_booths_fixed": 1,
    "use_intermediate_reward": False,
    "use_mi_shaping": True,
    "use_mi_loss": True,
    "use_oh_token": True
}

multi_iql_env_config =  {
    "lengths":(5,3),
    "starts": (3,1),
    "receiver_booth_loc":0,
    # "booth_locs":[(4, 0, -0.4, 0), (1, 0, 0.0, 0), (3, -1, 0, 0.1)],
    # "booth_locs":[(4, 0, -0.4, 0), (1, 0, 0.0, 0), (3, -1, 0, 0.3)],
    "booth_locs":[(4, 0, -0.4, 0), (1, 0, 0.0, 0), (3, -1, 0, 0.5)],
    "episode_limit": 20,
    "right_r": correct_reward,
    "wrong_r":wrong_reward,
    "num_sender_decoy_booths": 0,
    "decoy_booths_fixed": 1,
    "use_intermediate_reward": False,
    "use_mi_shaping": False,
    "use_mi_loss": False,
    "use_oh_token": True
}
