from ml_collections import ConfigDict
from typing import Any, Mapping


def get_config(updates: Mapping[str, Any] | None = None) -> ConfigDict:

    config = ConfigDict()

    # Standard algorithm parameters
    config.discount = 0.99
    config.soft_target_update_rate = 5e-3

    # TD3-specific parameters (inherited by ReBRAC)
    config.target_policy_noise = 0.2      # Target smoothing noise
    config.target_noise_clip = 0.5        # Noise clipping for target policy
    config.exploration_noise = 0.1        # Exploration noise during training
    config.policy_delay = 2               # Delayed policy updates

    # ReBRAC-specific parameters
    config.actor_bc_coef = 0.01            # BC regularization coefficient for actor
    config.critic_bc_coef = 0.001           # BC regularization coefficient for critic  
    config.normalize_q = True             # Q normalization for loss balancing

    # Twin critics (ReBRAC always uses exactly 2, same as TD3)
    config.critic_ensemble_size = 2
    config.critic_subsample_size = None

    # Network architectures
    config.critic_network_kwargs = ConfigDict(
        {
            "hidden_dims": [256, 256, 256],
            "activate_final": True,
            "use_layer_norm": True,  # Often used for stability in offline RL
        }
    )

    config.policy_network_kwargs = ConfigDict(
        {
            "hidden_dims": [256, 256, 256],
            "activate_final": True,
            "use_layer_norm": True,
        }
    )

    # Policy configuration for deterministic ReBRAC
    config.policy_kwargs = ConfigDict(
        {
            "tanh_squash_distribution": True,   # Standard for continuous control
            "std_parameterization": "fixed",   # ReBRAC uses deterministic policy
            "fixed_std": 0.0,                  # Deterministic (no intrinsic noise)
        }
    )

    # Optimizers (standard learning rates for ReBRAC)
    config.actor_optimizer_kwargs = ConfigDict({"learning_rate": 1e-3})     # Actor learning rate
    config.critic_optimizer_kwargs = ConfigDict({"learning_rate": 1e-3})    # Critic learning rate

    # Additional ReBRAC settings
    config.shared_encoder = True          # Whether to share encoder between actor and critic

    if updates is not None:
        config.update(ConfigDict(updates).copy_and_resolve_references())

    return config
