# @package _global_
defaults:
    # - overide /models: Encoder_v2
    - overide /data@dataset: tt_dataset
    - overide /loss@policy_loss: KLDivLoss
    - overide /data@buffer: ReplayBuffer

epochs: 1000
training_steps: 1000
start_training_buffer_size: 1000
batch_size: 128
pretrained: true
mlflow_uri: http://localhost:5000
path: runs/pre_ShortCircuit_emb=256_heads=16_v0001/
model_name: model_270

max_nodes: 30
# const_node: True
seed: 2024

# Data Collectors:
data_collectors_global_world_size: 7
data_collectors_local_world_size: 7
data_collectors_rank_offset: 1

env:
  const_node: true
  reward_type: simple
  negative_reward: -1.0

AZ:
  num_simulations: 64
  simulation_max_steps: 20 #30 for chess
  c_puct: 2.0
  b_puct: 0.1
  dirichlet_alpha: 0.03
  use_value_network: true
  reutilize_tree: true

dataset:
  path: data/truth_tables/extracted/8_inputs.txt # Example
