defaults:
  - _self_

# env setting
model_name: dp
adv_num: 1
agent_num: 1
traj_length: 6
action_dim: 4
act_steps: 1
obs_steps: 2 # prev + current
state_dim_list: [72, 72]
player_list: [agent_0, agent_1]
player_type: agent_0
player_type_idx: 0

# device
env_name: holdem
device: cuda:0
seed: 0

# path
data_path: ./data
emb_path: ./model/_weight
strength: 500

# Training
gradient_steps: 1000
sample_steps: 15
batch_size: 256
diffusion_x: true
extra_sample_steps: 5
lr: 5e-4
temperature: 1.0
eval_freq: 100
log_freq: 100
save_freq: 100

# model
model:
  noise_schedule: cosine
  beta_schedule: cosine

  # network params
  latent_dim: 32
  hidden_dim: 256
  x_max: 3.0
  x_min: -3.0

# Inference
eval:
  IsAgent: True # [True, False]
  episodes: 100
  rounds: 10

  nfsp_path: ./env/holdem/result/model_1000.pth # 4500, 10000, 1000
  gif_path: ./evaluation/videos/holdem/
  result_path: ./evaluation/result

# hydra
hydra:
  run:
    dir: .
  output_subdir: null
