mode:train
env_name:ControlSlideEnv
lr_actor:0.0003
lr_critic:0.001
random_seed:0
action_std:0.4
action_std_decay_rate:0.04
min_action_std:0.01
action_std_decay_freq:2500
K_epochs:80
max_episode_length:10
max_training_epochs:5000000
ATTACK:False
attack_method:black
ls:1.0
p:0.5
rs_piece:32
ra_piece:32
attack_target_model:./TargetModel/
delta:0.05
isWeak:False
multiples_of_v:4
lrs:1
describe:PPO正常训练ControlSlideEnv
