policy:TD3
mode:train
env_name:CarFindFlagMEnv
seed:0
start_timesteps:2000
eval_freq:10000
max_timesteps:1000000000.0
expl_noise:0.3
batch_size:64
discount:0.99
tau:0.005
policy_noise:0.02
noise_clip:0.05
policy_freq:2
save_model:True
load_model:
directory:./results/CarFindFlagMEnv-run180/
max_episode_length:10
ATTACK:False
attack_method:black
ls:1.0
p:0.5
rs_piece:16
ra_piece:9
attack_target_model:./TargetModel/
delta:0.05
isWeak:False
lrs:1
multiples_of_v:4
describe:TD3 正常训练 CarFindFlagMEnv
