# @package _global_

exp:
  name: gift_sac_dr_test # Descriptive name (e.g., based on DR/RECOVER/DIFF settings)
  mode: train # 'train', 'eval', 'plan'
  seed: 42
  gpus: 1 # Number of GPUs (0 for CPU)
  max_epochs: 10 # Adjust based on desired training length (original: 8000 iterations)
  patience: 100 # Early stopping patience
  batch_size: 1024 # Training batch size
  log_freq: 1000 # Log every N steps
  val_check_interval: 1 # Check validation every epoch
  logging: False
  logger_type: tensorboard # 'tensorboard' or 'mlflow'
  mlflow_uri: "http://localhost:5000" # If using mlflow
  # processed_data_dir: data/processed/cip/ # Relative path from CWD where processed pkl lives
  load_data: False # Whether to load pre-processed data
  save_processed_data: False # Whether to save dataset if processed during run
  evaluation_mode: 'cip' # Evaluation mode ('cip' or other)
  eval_episodes: 2000 # Number of samples for evaluation
  run_planning: False
  tau: 6 # Prediction horizon length

model:
  _target_: src.gift.gift_agent.GiftAgent
  name: gift # Model identifier including experiment name
  # 数据维度参数
  output_dim: 2
  treatment_dim: 3
  static_dim: 44

  # Attention parameters
  use_attention: False
  attention_heads: 4
  attention_dropout: 0.1
  # hidden_dim: 72
  # SAC parameters
  sac_params:
    # lr: 3e-4
    beta: 0.005 # Renamed from tau (soft update rate)
    discount: 0.7
    alpha: 0.2
    use_automatic_entropy: False
    target_entropy: -2.0
    DR: True
    recover: False
    action_diff: False
    use_cql: False
  # HER parameters
  her_params:
    target_hit_ratio: 0.45
    buffer_size: 500000
    buffer_sample_size: 500000
    k_future: 5
    goal_threshold: 0.1
    reward_mode: 'binary'
    # distance, binary
    min_history_length: 10
    max_history_length: 20
    future_length: 6 # Match HER future sampling with dataset tau
    max_epochs: 30