# src/benchrl/configs/algorithm/pbac.yaml
# Configuration for PAC-Bayesian Actor-Critic (PBAC)
# Based on Tasdighi et al. "Deep Exploration with PAC-Bayes"

name: pbac
_target_: benchrl.algorithms.pbac.PBAC

# Network architecture bindings
model_bindings:
  actor_network: multi_headed_mlp
  critic_network: mlp

# General RL hyperparameters
total_timesteps: 1000000
buffer_size: 100000
learning_starts: 5000
batch_size: 256
gamma: 0.99
tau: 0.005  # Polyak averaging coefficient
train_freq: 1
gradient_steps: 1
policy_frequency: 2  # Update actor every N steps (TD3 style delay)

# PBAC-specific hyperparameters (from paper)
n_critics: 10  # Number of critics in ensemble
posterior_sampling_rate: 5  # Resample actor head every N steps
bootstrap_rate: 0.05  # Bootstrap masking rate for PAC-Bayes loss
prior_variance: 5.0

# Learning rates
actor_lr: 3e-4
critic_lr: 3e-4

# Entropy coefficient (auto for automatic tuning, or float value)
ent_coeff: auto
target_entropy: null  # If null, will use -dim(action_space)
