algorithm_name: escher  # Name of the algorithm.
iters: 100000000  # Number of iterations.
learning_rate: 1e-3  # Learning rate
num_traversals: 100  # Number of traversals per iteration.
num_val_fn_traversals: 100  # Number of valuation function traversals.
eval_every: 1  # Evaluate every n iterations.
num_random_games: 100  # Number of episodes to sample from the policy.
regret_train_steps: 50  # Regret network training steps.
val_train_steps: 50  # Value network training steps.
policy_net_train_steps: 100  # Policy network training steps.
batch_size_regret: 2048  # Batch size for training regret network.
batch_size_val: 2048  # Batch size for training value network.
train_device: "cpu"  # Training device (cpu or cuda).
infer_device: "cpu"  # Inference device (cpu or cuda).
num_cpus: 8
num_workers: 8
