usage: train_mpe_trsyn.py [-h] [--algorithm_name ALGORITHM_NAME]
                          [--experiment_name EXPERIMENT_NAME] [--seed SEED]
                          [--cuda] [--cuda_deterministic]
                          [--n_training_threads N_TRAINING_THREADS]
                          [--n_rollout_threads N_ROLLOUT_THREADS]
                          [--n_eval_rollout_threads N_EVAL_ROLLOUT_THREADS]
                          [--n_render_rollout_threads N_RENDER_ROLLOUT_THREADS]
                          [--num_env_steps NUM_ENV_STEPS]
                          [--user_name USER_NAME] [--use_wandb]
                          [--env_name ENV_NAME] [--use_obs_instead_of_state]
                          [--episode_length EPISODE_LENGTH] [--share_policy]
                          [--use_centralized_V]
                          [--stacked_frames STACKED_FRAMES]
                          [--use_stacked_frames] [--hidden_size HIDDEN_SIZE]
                          [--layer_N LAYER_N] [--use_ReLU] [--use_popart]
                          [--use_valuenorm] [--use_feature_normalization]
                          [--use_orthogonal] [--gain GAIN]
                          [--use_naive_recurrent_policy]
                          [--use_recurrent_policy] [--recurrent_N RECURRENT_N]
                          [--data_chunk_length DATA_CHUNK_LENGTH] [--lr LR]
                          [--critic_lr CRITIC_LR] [--opti_eps OPTI_EPS]
                          [--weight_decay WEIGHT_DECAY]
                          [--ppo_epoch PPO_EPOCH] [--use_clipped_value_loss]
                          [--clip_param CLIP_PARAM]
                          [--num_mini_batch NUM_MINI_BATCH]
                          [--entropy_coef ENTROPY_COEF]
                          [--value_loss_coef VALUE_LOSS_COEF]
                          [--use_max_grad_norm]
                          [--max_grad_norm MAX_GRAD_NORM] [--use_gae]
                          [--gamma GAMMA] [--gae_lambda GAE_LAMBDA]
                          [--use_proper_time_limits] [--use_huber_loss]
                          [--use_value_active_masks]
                          [--use_policy_active_masks]
                          [--huber_delta HUBER_DELTA] [--use_linear_lr_decay]
                          [--save_interval SAVE_INTERVAL]
                          [--log_interval LOG_INTERVAL] [--use_eval]
                          [--eval_interval EVAL_INTERVAL]
                          [--eval_episodes EVAL_EPISODES] [--save_gifs]
                          [--use_render] [--render_episodes RENDER_EPISODES]
                          [--ifi IFI] [--model_dir MODEL_DIR]
                          [--output_use_tanh] [--action_scale ACTION_SCALE]
                          [--std_seperated] [--std_fixed]
                          [--policy_use_same_init] [--action_use_clip]
                          [--trsyn_use_refine] [--team_use_entropy]
                          [--idv_clip_use_adv] [--idv_clip_use_min]
                          [--trsyn_use_imp] [--CMT] [--idv_clip_update_refine]
                          [--idv_clip_flag IDV_CLIP_FLAG]
                          [--idv_clip_flag_refine IDV_CLIP_FLAG_REFINE]
                          [--idv_clip_use_time IDV_CLIP_USE_TIME]
                          [--idv_kl_use_time IDV_KL_USE_TIME]
                          [--team_kl_use_time TEAM_KL_USE_TIME]
                          [--idv_kl_coef_use_exp] [--team_kl_coef_use_exp]
                          [--idv_critic_ratio IDV_CRITIC_RATIO]
                          [--idv_critic_end_ratio IDV_CRITIC_END_RATIO]
                          [--idv_critic_episode IDV_CRITIC_EPISODE]
                          [--team_critic_ratio TEAM_CRITIC_RATIO]
                          [--team_critic_end_ratio TEAM_CRITIC_END_RATIO]
                          [--team_critic_episode TEAM_CRITIC_EPISODE]
                          [--ep_adv_surgery] [--ep_adv_use_ratio]
                          [--ir_no_contain_tr] [--reward_only_positive]
                          [--scenario_name SCENARIO_NAME]
                          [--num_landmarks NUM_LANDMARKS]
                          [--num_agents NUM_AGENTS]
                          [--num_good_agents NUM_GOOD_AGENTS]
                          [--num_adversaries NUM_ADVERSARIES]
                          [--collaborative] [--reward_shaping]
                          [--agent_view_radius AGENT_VIEW_RADIUS]
                          [--use_partial_obs] [--rew_bound]
                          [--game_mode GAME_MODE] [--discrete_action]
                          [--wandb_group WANDB_GROUP]
                          [--wandb_exp_name WANDB_EXP_NAME]
                          [--wandb_project WANDB_PROJECT]
                          [--eval_episode_length EVAL_EPISODE_LENGTH]
                          [--change_reward]
                          [--change_reward_episode CHANGE_REWARD_EPISODE]
                          [--change_use_policy CHANGE_USE_POLICY]
                          [--entropy_end_coef ENTROPY_END_COEF]
                          [--entropy_change_episode ENTROPY_CHANGE_EPISODE]
                          [--idv_use_shared_obs]
                          [--idv_clip_ratio IDV_CLIP_RATIO]
                          [--idv_end_clip_ratio IDV_END_CLIP_RATIO]
                          [--idv_clip_episodes IDV_CLIP_EPISODES]
                          [--team_clip_ratio TEAM_CLIP_RATIO]
                          [--team_end_clip_ratio TEAM_END_CLIP_RATIO]
                          [--team_clip_episodes TEAM_CLIP_EPISODES]
                          [--idv_use_two_clip] [--idv_use_kl_loss]
                          [--idv_use_cross_entropy]
                          [--idv_kl_coef IDV_KL_COEF]
                          [--idv_kl_end_coef IDV_KL_END_COEF]
                          [--idv_kl_episodes IDV_KL_EPISODES]
                          [--init_alpha INIT_ALPHA] [--alpha_lr ALPHA_LR]
                          [--alpha_g_clip ALPHA_G_CLIP]
                          [--alpha_clip ALPHA_CLIP] [--team_use_clip]
                          [--team_use_kl_loss] [--team_use_cross_entropy]
                          [--team_kl_coef TEAM_KL_COEF]
                          [--team_kl_end_coef TEAM_KL_END_COEF]
                          [--team_kl_episodes TEAM_KL_EPISODES]
                          [--idv_kl_loss_use_present]
                          [--team_kl_loss_use_present]
                          [--idv_clip_use_present] [--team_clip_use_present]
                          [--scenario_has_diff_rewards] [--sparse_reward]
                          [--agent_policy AGENT_POLICY]
train_mpe_trsyn.py: error: argument --init_alpha: invalid float value: '1--idv_clip_use_min'
