# @package _global_

exp:
  mode: train # 'train', 'eval', 'plan'
  name: scrl_experiment # Changed name
  seed: 42
  gpus: 1 # Number of GPUs (0 for CPU)
  max_epochs: 10 # Adjust based on desired training length
  patience: 100 # Early stopping patience
  batch_size: 2048 # Changed to 2048 per SCRL paper (D2, D5, F.13)
  log_freq: 1000 # Log every N steps
  val_check_interval: 1 # Check validation every epoch
  logging: False
  logger_type: tensorboard # 'tensorboard' or 'mlflow'
  mlflow_uri: "http://localhost:5000" # If using mlflow
  load_data: False # Whether to load pre-processed data
  save_processed_data: False # Whether to save dataset if processed during run
  evaluation_mode: 'cip' # Evaluation mode ('cip' or other)
  eval_episodes: 2000 # Number of samples for evaluation
  run_planning: False
  tau: 6 # Prediction horizon length

model:
  name: scrl # Model identifier
  
  # Network dimensions
  hiddens_sac: # Used for Actor and SCRL Critic MLPs
    - 128
    - 128
    - 128
  hiddens_enc: # Used for HistoryEncoder (LSTM dim)
    - 64
  representation_dim: 32
  baserl: SCRL # To identify which agent to use

  # Attention parameters (for HistoryEncoder)
  use_attention: False
  attention_heads: 4
  attention_dropout: 0.1

  # SCRL parameters (replaces sac_params)
  scrl_params:
    lr: 3e-4          # From SCRL paper (Table 2)
    discount: 0.9    # From SCRL paper (Table 2)
    bc_reg_lambda: 0.5  # For Eq. 4, (default in scrl_agent.py)
    use_data_aug: True  # For (D5), (default in scrl_agent.py)

  # HER parameters (re-used by SCRL_Agent)
  her_params:
    target_hit_ratio: 0.45
    buffer_size: 500000
    buffer_sample_size: 500000
    k_future: 5
    goal_threshold: 0.1
    reward_mode: 'binary'
    # distance, binary
    min_history_length: 10
    max_history_length: 20
    future_length: 6 # Match HER future sampling with dataset tau
    max_epochs: 30

  sac_params:
    lr: 1e-3
    beta: 0.005 # Renamed from tau (soft update rate)
    discount: 0.9
    alpha: 0.2
    use_automatic_entropy: False
    target_entropy: -2.0