_current_progress_remaining:
    value: 1
_custom_logger:
    value: "False"
_episode_num:
    value: 0
_last_episode_starts:
    value: '[ True]'
_last_obs:
    value: |-
        [[0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1.
          0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1.]]
_last_original_obs:
    value: None
_logger:
    value: <stable_baselines3.common.logger.Logger object at 0x3020c5450>
_n_updates:
    value: 0
_num_timesteps_at_start:
    value: 0
_total_timesteps:
    value: 1000000
_vec_normalize_env:
    value: None
_wandb:
    value:
        cli_version: 0.19.10
        m: []
        python_version: 3.10.14
        t:
            "1":
                - 1
                - 55
            "2":
                - 1
                - 55
            "3":
                - 13
                - 22
                - 23
                - 35
                - 55
            "4": 3.10.14
            "5": 0.19.10
            "8":
                - 5
                - 9
            "12": 0.19.10
            "13": darwin-arm64
action_noise:
    value: None
action_space:
    value: Discrete(6)
algo:
    value: PPO
batch_size:
    value: 64
clip_range:
    value: <function constant_fn.<locals>.func at 0x3020f2170>
clip_range_vf:
    value: None
device:
    value: cpu
ent_coef:
    value: 0
env:
    value: <stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv object at 0x30204ed70>
ep_info_buffer:
    value: deque([], maxlen=100)
ep_success_buffer:
    value: deque([], maxlen=100)
eval_env:
    value: None
gae_lambda:
    value: 0.95
gamma:
    value: 0.99
learning_rate:
    value: 0.0003
lr_schedule:
    value: <function constant_fn.<locals>.func at 0x30935cc10>
max_grad_norm:
    value: 0.5
n_envs:
    value: 1
n_epochs:
    value: 10
n_steps:
    value: 2048
normalize_advantage:
    value: "True"
num_timesteps:
    value: 0
observation_space:
    value: |-
        Box([0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
         0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
         1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.], (40,), float32)
policy:
    value: |-
        ActorCriticPolicy(
          (features_extractor): FlattenExtractor(
            (flatten): Flatten(start_dim=1, end_dim=-1)
          )
          (mlp_extractor): MlpExtractor(
            (shared_net): Sequential(
              (0): Linear(in_features=40, out_features=64, bias=True)
              (1): Tanh()
              (2): Linear(in_features=64, out_features=64, bias=True)
              (3): Tanh()
            )
            (policy_net): Sequential()
            (value_net): Sequential()
          )
          (action_net): Linear(in_features=64, out_features=6, bias=True)
          (value_net): Linear(in_features=64, out_features=1, bias=True)
        )
policy_class:
    value: <class 'stable_baselines3.common.policies.ActorCriticPolicy'>
policy_kwargs:
    value: '{''net_arch'': [64, 64]}'
rollout_buffer:
    value: <stable_baselines3.common.buffers.RolloutBuffer object at 0x30204cd30>
sde_sample_freq:
    value: -1
seed:
    value: 94
start_time:
    value: 1747936229605021000
target_kl:
    value: None
tensorboard_log:
    value: /Users/ebates/yawning_titan/agents/logs/tensorboard
use_sde:
    value: "False"
verbose:
    value: 0
vf_coef:
    value: 0.5
