# AntMaze Medium-Play Dataset - AntMaze Environment
# MMD-QL configuration aligned with reference/offline.py diffusion hyperparameters

env_name: antmaze-medium-play-v0
model: flow_ql
ms: offline
exp: 2
seed: 42
device: cuda
dir: ./logs/antmaze-medium-play-v0
save_best_model: true
eval_freq: 50
num_steps_per_epoch: 1000
num_epochs: 1000
batch_size: 256
lr_decay: true
early_stop: false
eval_episodes: 100
top_k: 1

model_args:
  - model: MLP
  - lr: 0.00005
  - eta: 0.005
  - T: 0.9994
  - q_norm: true           # 稀疏奖励环境必须标准化
  - max_q_backup: true
  - reward_tune: cql_antmaze
  - gn: 2.0
  # Adam 优化器参数调优
  - adam_beta1: 0.9            # 标准 Adam beta1
  - adam_beta2: 0.99           # 稍低 beta2，适合稀疏奖励

  - noise_schedule: "fm"
  - sigma_data: 0.2         # 较小尺度，AntMaze稀疏环境需要精细控制
  - f_type: "euler_fm"
  - eps: 0.0001
  - temb_type: "identity"
  - time_scale: 1000.0
  - mmd_sigma: 0.8          # 标准MMD核带宽
  - sample_t_mode: "lognormal"
  - P_mean: -0.8            # 较高均值，适合稀疏奖励环境
  - P_std: 1.2              # 较低方差，集中时间采样
  - matrix_size: 16
  - sample_repeat: 1
  - k: 8               # 较粗粒度，AntMaze稀疏奖励环境
  - a: 1.5             # 适中信号权重，导航任务
  - b: 3               # 较低偏置，适合稀疏数据
  - min_tr_gap: null