environment:
    type: "MinigridMemory"
    name: "MiniGrid-MemoryS13Random-v0"
    length: 21
    reset_params:
    start-seed: 0
    num-seeds: 100000
    agent_scale: 0.25
    cardinal_origin_choice: [0, 1, 2, 3]
    show_origin: False
    show_goal: False
    visual_feedback: True
    reward_goal: 1.0
    reward_fall_off: 0.0
    reward_path_progress: 0.0

project_name: 'online_transformers'
type: 'MinigridMemory'
log_name: 'MinigridMemory/MinigridMemory/GTXL/ICLR_exp_2_paper_random'
logger: 'tensorboard'
device: 'cuda:0'
eval_episodes: 50

save_model: True
save_model_frequency: 3
save_best_model: True


gamma: 0.995
lamda: 0.95
updates: 100000000
epochs: 5
n_workers: 16
worker_steps: 512
n_mini_batch: 8
value_loss_coefficient: 0.5
hidden_layer_size: 128
max_grad_norm: 0.5
transformer:
    embed_per_obs_dim: 8
    num_blocks: 6
    embed_dim: 128
    num_heads: 8
    memory_length: 9 # 32
    positional_encoding: "relative" # options: "" "relative" "learned"
    layer_norm: "pre" # options: "" "pre" "post"
    gtrxl: True
    gtrxl_bias: 0.0
learning_rate_schedule:
    initial: 3.5e-4
    final: 1.0e-4
    power: 1.0
    max_decay_steps: 250
beta_schedule:
    initial: 0.001
    final: 0.001
    power: 1.0
    max_decay_steps: 10000
clip_range_schedule:
    initial: 0.1
    final: 0.1
    power: 1.0
    max_decay_steps: 10000