# Connect Four Lesson 3
# Train against random agent: 'random', weak opponent: 'weak', strong opponent: 'strong', or use self-play: 'self'
opponent: strong
opponent_pool_size: # Size of opponent pool for self-play
opponent_upgrade: # Epoch frequency to update opponent pool
eval_opponent: strong # 'random', 'weak' or 'strong'
pretrained_path: ./env/Connect4Env/weight/lesson2_agent.pt # Path to pretrained model weights
save_path: ./env/Connect4Env/weight/lesson3_agent.pt # Path to save trained model
max_train_episodes: 200000 # Maximum number of training episodes in environment

## Game specific:
buffer_warm_up: false # Fill replay buffer with random experiences
warm_up_opponent: # Difficulty level of warm up experiences
agent_warm_up: 0 # Number of epochs to warm up agent by training on random experiences
block_vert_coef: 1 # How many times more likely to block vertically
rewards: # Rewards for different outcomes
  win: 1
  vertical_win: 1
  three_in_row: 0.02
  opp_three_in_row: -0.02
  lose: -1
  play_continues: 0
