# Connect Four Lesson 1
# Train against random agent: 'random', weak opponent: 'weak', strong opponent: 'strong', or use self-play: 'self'
opponent: random
opponent_pool_size: # Size of opponent pool for self-play
opponent_upgrade: # Epoch frequency to update opponent pool
eval_opponent: # 'random', 'weak' or 'strong'
pretrained_path: # Path to pretrained model weights
save_path: ./env/Connect4Env/weight/lesson1_agent.pt # Path to save trained model
max_train_episodes: 0 # Maximum number of training episodes in environment

## Game specific:
buffer_warm_up: true # Fill replay buffer with random experiences
warm_up_opponent: random # Difficulty level of warm up experiences
agent_warm_up: 30000 # Number of epochs to warm up agent by training on random experiences
block_vert_coef: 4 # How many times more likely to block vertically
rewards: # Rewards for different outcomes
  win: 1
  vertical_win: 0.7
  three_in_row: 0.05
  opp_three_in_row: -0.05
  lose: -1
  play_continues: 0
