algorithm_config: {'share_param_critic': True, 'clip_epsilon': 0.2, 'entropy_coef': 0.0, 'critic_coef': 1.0, 'loss_critic_type': 'l2', 'lmbda': 0.9, 'scale_mapping': 'biased_softplus_1.0', 'use_tanh_normal': True, 'minibatch_advantage': False}
algorithm_name: mappo
continuous_actions: True
critic_model_config: {'num_cells': [256, 256], 'layer_class': <class 'torch.nn.modules.linear.Linear'>, 'activation_class': <class 'torch.nn.modules.activation.Tanh'>, 'activation_kwargs': None, 'norm_class': None, 'norm_kwargs': None, '_is_critic': True}
critic_model_name: mlp
environment_name: vmas
experiment_config: {'sampling_device': 'cpu', 'train_device': 'cpu', 'buffer_device': 'cpu', 'share_policy_params': True, 'prefer_continuous_actions': True, 'collect_with_grad': False, 'parallel_collection': False, 'gamma': 0.99, 'lr': 5e-05, 'adam_eps': 1e-06, 'clip_grad_norm': True, 'clip_grad_val': 5.0, 'soft_target_update': True, 'polyak_tau': 0.005, 'hard_target_update_frequency': 5, 'exploration_eps_init': 0.8, 'exploration_eps_end': 0.01, 'exploration_anneal_frames': None, 'max_n_iters': None, 'max_n_frames': 3000000, 'on_policy_collected_frames_per_batch': 6000, 'on_policy_n_envs_per_worker': 10, 'on_policy_n_minibatch_iters': 45, 'on_policy_minibatch_size': 400, 'off_policy_collected_frames_per_batch': 6000, 'off_policy_n_envs_per_worker': 10, 'off_policy_n_optimizer_steps': 1000, 'off_policy_train_batch_size': 128, 'off_policy_memory_size': 1000000, 'off_policy_init_random_frames': 0, 'off_policy_use_prioritized_replay_buffer': False, 'off_policy_prb_alpha': 0.6, 'off_policy_prb_beta': 0.4, 'evaluation': True, 'render': True, 'evaluation_interval': 120000, 'evaluation_episodes': 1, 'evaluation_deterministic_actions': True, 'evaluation_static': False, 'loggers': ['csv'], 'project_name': 'benchmarl', 'create_json': True, 'save_folder': None, 'restore_file': None, 'restore_map_location': None, 'checkpoint_interval': 0, 'checkpoint_at_end': False, 'keep_checkpoints_num': 3}
model_config: {'num_cells': [256, 256], 'layer_class': <class 'torch.nn.modules.linear.Linear'>, 'activation_class': <class 'torch.nn.modules.activation.Tanh'>, 'activation_kwargs': None, 'norm_class': None, 'norm_kwargs': None, '_is_critic': False}
model_name: mlp
on_policy: True
seed: 0
task_config: {'max_steps': 100, 'n_agents': 4, 'random_package_pos_on_line': True, 'package_mass': 5.0}
task_name: balance