self_play_experiments:
  TTT_LGAN_DQN_24_mean:
    game: tic_tac_toe
    agent: LGAN_DQN
    gamma: 0.9
    batch_size: 32
    train_group_size: 1000
    num_layers: 3
    epsilon_start: 1
    epsilon_decay_duration: 20000
    epsilon_end: 0.1
    buffer_size: 10000
    d_model: 512
    num_heads: 4
    k_embedding_dim: 24
    lr: 0.0003
  Breakthrough_LGAN_A2C_512_4_24_mean:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_A2C
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    average_func: mean
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 24
    num_layers: 1
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_A2C_512_4_16_mean:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_A2C
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    average_func: mean
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 16
    num_layers: 1
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_2_16_mean:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 2
    average_func: mean
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 16
    num_layers: 1
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_4_16_mean:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    average_func: mean
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 16
    num_layers: 1
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_4_24_mean:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    average_func: mean
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 24
    num_layers: 3
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_4_16_3D_mean:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    average_func: mean
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 16
    num_layers: 3
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_4_24_3D_mean:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    average_func: mean
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 24
    num_layers: 3
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_4_16:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 16
    num_layers: 1
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_2_16:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 2
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 16
    num_layers: 1
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_4_16_3D:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 16
    num_layers: 3
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_4_24:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 24
    num_layers: 1
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_4_24_3D:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    buffer_size: 100000
    lr: 0.001
    k_embedding_dim: 24
    num_layers: 3
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_LGAN_DQN_512_4:
    game: breakthrough(rows=8,columns=8)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 100000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    buffer_size: 100000
    lr: 0.001
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_Masked_DQN:
    game: breakthrough(rows=8,columns=8)
    agent: Masked_DQN
    train_group_size: 500
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 100000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 128
    buffer_size: 100000
    lr: 0.001
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_Unmasked_DQN:
    game: breakthrough(rows=8,columns=8)
    agent: Unmasked_DQN
    train_group_size: 500
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 100000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 128
    buffer_size: 100000
    lr: 0.001
    train_interval: 10
    target_net_update_interval: 1000
  Breakthrough_A2C:
    game: breakthrough(rows=8,columns=8)
    agent: A2C
    train_group_size: 1000
    gamma: 0.99
    lam: 0.95
    batch_size: 128
    d_model: 128
    lr: 0.0003
    entropy_coef: 0.01
    value_loss_coef: 0.8

  Go_LGAN_DQN_24_mlp:
    game: go(board_size=7,komi=4.5)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    k_embedding_dim: 24
    buffer_size: 100000
    lr: 0.0003
    train_interval: 10
    target_net_update_interval: 1000
  Go_LGAN_DQN_24_mean_3D:
    game: go(board_size=7,komi=4.5)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    num_layers: 4
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    k_embedding_dim: 24
    average_func: mean
    buffer_size: 100000
    lr: 0.0003
    train_interval: 10
    target_net_update_interval: 1000
  Go_LGAN_DQN_24_mean:
    game: go(board_size=7,komi=4.5)
    agent: LGAN_DQN
    train_group_size: 5000
    gamma: 0.99
    num_layers: 4
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 512
    num_heads: 4
    k_embedding_dim: 24
    average_func: mean
    buffer_size: 100000
    lr: 0.001
    train_interval: 10
    target_net_update_interval: 1000
  Go_LGAN_A2C_24_mean:
    game: go(board_size=7,komi=4.5)
    agent: LGAN_A2C
    train_group_size: 5000
    gamma: 0.99
    lam: 0.95
    batch_size: 128
    d_model: 512
    num_heads: 4
    k_embedding_dim: 24
    average_func: mean
    lr: 0.0001
    entropy_coef: 0.1
    value_loss_coef: 0.5
  Go_RES_A2C:
    game: go(board_size=7,komi=4.5)
    agent: RES_A2C
    train_group_size: 1000
    gamma: 0.99
    lam: 0.95
    batch_size: 128
    d_model: 128
    lr: 0.0001
    entropy_coef: 0.1
    value_loss_coef: 0.5
  Go_RES_DQN:
    game: go(board_size=7,komi=4.5)
    agent: RES_DQN
    train_group_size: 5000
    gamma: 0.99
    epsilon_start: 1
    epsilon_decay_duration: 1000000
    epsilon_end: 0.1
    batch_size: 128
    d_model: 128
    buffer_size: 100000
    lr: 0.0001
    train_interval: 10
    target_net_update_interval: 1000
