# @package _global_

exp:
  mode: train # 'train', 'eval', 'plan'
  name:
  seed: 42
  gpus: 1 # Number of GPUs (0 for CPU)
  max_epochs: 10 # Adjust based on desired training length (original: 8000 iterations)
  patience: 100 # Early stopping patience
  batch_size: 1024 # Training batch size
  log_freq: 1000 # Log every N steps
  val_check_interval: 1 # Check validation every epoch
  logging: False
  logger_type: tensorboard # 'tensorboard' or 'mlflow'
  mlflow_uri: "http://localhost:5000" # If using mlflow
  # processed_data_dir: data/processed/cip/ # Relative path from CWD where processed pkl lives
  load_data: False # Whether to load pre-processed data
  save_processed_data: False # Whether to save dataset if processed during run
  evaluation_mode: 'cip' # Evaluation mode ('cip' or other)
  eval_episodes: 2000 # Number of samples for evaluation
  run_planning: False
  tau: 6 # Prediction horizon length

model:
  # _target_: src.gift.gift_agent.GiftAgent
  name: gift # Model identifier including experiment name
  hidden_dim: 72 # Hidden dimension size
  hiddens_sac:
    - 128

  hiddens_enc:
    - 64
  
  hiddens_bhvr:
    - 128
  
  # 数据维度参数
  output_dim: 1
  treatment_dim: 2
  static_dim: 1
  baserl: IQL

  # Attention parameters
  use_attention: False
  attention_heads: 4
  attention_dropout: 0.1

  # SAC parameters
  sac_params:
    lr: 1e-3
    beta: 0.005 # Renamed from tau (soft update rate)
    discount: 0.9
    alpha: 0.2
    use_automatic_entropy: False
    target_entropy: -2.0
    DR: True
    recover: False
    action_diff: False
    use_cql: False

  # HER parameters
  her_params:
    target_hit_ratio: 0.8
    buffer_size: 500000
    buffer_sample_size: 500000
    k_future: 5
    goal_threshold: 5e-3
    reward_mode: 'binary'
    # distance, binary
    min_history_length: 10
    # max_history_length: ${subtract:${dataset.max_seq_length},${model.her_params.future_length}}
    max_history_length: 15
    future_length: 6 # Match HER future sampling with dataset tau