# @package _global_

exp:
  name: gift_scrl_test # Descriptive name (e.g., based on DR/RECOVER/DIFF settings)
  mode: train # 'train', 'eval', 'plan'
  seed: 42
  gpus: 1 # Number of GPUs (0 for CPU)
  max_epochs: 10 # Adjust based on desired training length (original: 8000 iterations)
  patience: 100 # Early stopping patience
  batch_size: 2048 # Training batch size
  log_freq: 1000 # Log every N steps
  val_check_interval: 1 # Check validation every epoch
  logging: False
  logger_type: tensorboard # 'tensorboard' or 'mlflow'
  mlflow_uri: "http://localhost:5000" # If using mlflow
  # processed_data_dir: data/processed/cip/ # Relative path from CWD where processed pkl lives
  load_data: False # Whether to load pre-processed data
  save_processed_data: False # Whether to save dataset if processed during run
  evaluation_mode: 'cip' # Evaluation mode ('cip' or other)
  eval_episodes: 2000 # Number of samples for evaluation
  run_planning: False
  tau: 6 # Prediction horizon length

model:
  name: scrl # Model identifier
  
  # Network dimensions
  hiddens_sac: # Used for Actor and SCRL Critic MLPs
    - 128
    - 128
    - 128
  hiddens_enc: # Used for HistoryEncoder (LSTM dim)
    - 64
  
  baserl: SCRL # To identify which agent to use

  # Attention parameters (for HistoryEncoder)
  use_attention: False
  attention_heads: 4
  attention_dropout: 0.1

  # SCRL parameters (replaces sac_params)
  scrl_params:
    lr: 3e-4          # From SCRL paper (Table 2)
    discount: 0.7    # From SCRL paper (Table 2)
    use_data_aug: True  # For (D5), (default in scrl_agent.py)
    bc_reg_lambda: 1

  # HER parameters (re-used by SCRL_Agent)
  her_params:
    target_hit_ratio: 0.45
    buffer_size: 500000
    buffer_sample_size: 500000
    k_future: 5
    goal_threshold: 0.1
    reward_mode: 'binary'
    # distance, binary
    min_history_length: 10
    max_history_length: 20
    future_length: 6 # Match HER future sampling with dataset tau
    max_epochs: 30

  sac_params:
    lr: 1e-3
    beta: 0.005 # Renamed from tau (soft update rate)
    discount: 0.7
    alpha: 0.2
    use_automatic_entropy: False
    target_entropy: -2.0
