from typing import Any


class Config:
    device: str = "auto"
    verbose: int = 0
    seed: int | None = None

    # PPO Configs
    learning_rate: float = 3e-4
    n_steps: int = 2048
    n_envs: int = 1
    batch_size: int = 64
    n_epochs: int = 10
    gamma: float = 0.99
    gae_lambda: float = 0.95
    clip_range: float = 0.2
    clip_range_vf: float | None = None
    normalize_advantage: bool = True
    ent_coef: float = 0.0
    vf_coef: float = 0.5
    max_grad_norm: float = 0.5
    tensorboard_log: str | None = None
    policy_kwargs: dict[str, Any] = {}

    # EAPO
    tau: float = 0.1
    c2: float = 0.5
    e_gamma: float | None = None
    e_lambda: float | None = None

    pop_art_beta: float = 3.0e-4
    handle_timeout: bool = True
    entropy_advantage: bool = True
    augmented_reward: bool = False
