environment:
    type: "Passive_T_Maze_Flag"
    name: "Passive_T_Maze_Flag"
    seed: 1337
    reset_params:
    start-seed: 0
    num-seeds: 100000
    agent_scale: 0.25
    cardinal_origin_choice: [0, 1, 2, 3]
    show_origin: False
    show_goal: False
    visual_feedback: True
    reward_goal: 1.0
    reward_fall_off: 0.0
    reward_path_progress: 0.0

    episode_timeout: 15
    corridor_length: 13
    goal_reward: 1
    penalty: -0.0714


log_name: 'Passive_T_Maze_Flag/GTXL/GTXL_Passive_T_Maze_Flag_LONG_TERM_dense'
logger: 'tensorboard'
device: 'cuda:0'
eval_episodes: 100
# eval_seeds: [0,2,3,4,6,9,13,15,18,24,25,31,3,40,41,42,43,44,48,49,50,
#               51,62,63,64,65,66,69,70,72,73,74,75,83,84,85,86,87,88,91,
#               92,95,96,97,98,100,102,105,106,107,1,5,7,8,10,11,12,14,16,
#               17,19,20,21,22,23,26,27,28,29,30,32,34,35,36,37,38,39,45,
#               46,47,52,53,54,55,56,57,58,59,60,61,67,68,71,76,77,78,79,80,81,82]

save_model: True
save_model_frequency: 30
save_best_model: True

gamma: 0.996
lamda: 0.95
updates: 50000000
save_model_steps: 100000000000
# eval_steps: 200
epochs: 10
n_workers: 16 #64 #256
worker_steps: 512
n_mini_batch: 8
value_loss_coefficient: 0.5
hidden_layer_size: 128
max_grad_norm: 0.5
transformer:
    embed_per_obs_dim: 8
    num_blocks: 6
    embed_dim: 128
    num_heads: 8
    memory_length: 5 # 32
    positional_encoding: "relative" # options: "" "relative" "learned"
    layer_norm: "pre" # options: "" "pre" "post"
    gtrxl: True
    gtrxl_bias: 0.0
learning_rate_schedule: 
    initial: 3.0e-4
    final: 3.0e-4
    power: 1.0
    max_decay_steps: 200
beta_schedule:
    initial: 0.001
    final: 0.0001
    power: 1.0
    max_decay_steps: 200
clip_range_schedule:
    initial: 0.2
    final: 0.2
    power: 1.0
    max_decay_steps: 200