{"episode_reward_max": -20.3406, "episode_reward_min": -21.15109999999999, "episode_reward_mean": -20.69086125, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.7298, -20.83, -20.635099999999994, -20.73629999999999, -20.7565, -20.973900000000004, -20.655800000000003, -20.6964, -20.8288, -20.739700000000006, -20.668499999999995, -20.40630000000001, -20.846800000000005, -20.379000000000005, -20.718299999999996, -20.371899999999997, -20.421899999999997, -20.7872, -20.618100000000002, -20.874700000000004, -20.500000000000004, -20.580199999999998, -20.615599999999997, -20.570299999999996, -20.626699999999996, -20.50770000000001, -20.535200000000003, -20.617499999999993, -20.683199999999996, -20.6885, -20.6126, -20.722299999999997, -20.648599999999995, -20.645300000000006, -20.550399999999993, -20.7104, -20.6328, -20.947399999999995, -20.9246, -20.752499999999998, -20.785199999999993, -21.15109999999999, -20.511, -20.7126, -20.596600000000002, -20.633499999999998, -20.8748, -20.433200000000003, -20.8718, -20.628500000000003, -20.646700000000003, -20.960500000000007, -20.710799999999995, -20.5978, -20.636800000000008, -20.955500000000004, -20.833899999999993, -20.923299999999998, -20.629599999999996, -20.3406, -20.805600000000005, -20.8593, -20.369499999999995, -20.840700000000002, -20.64409999999999, -20.422099999999997, -20.6245, -20.935200000000002, -20.704399999999996, -20.899900000000006, -20.589400000000005, -20.6231, -20.615800000000004, -20.629500000000007, -20.635499999999997, -20.9527, -20.707900000000002, -20.896900000000002, -20.7916, -20.540599999999994], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.374498798739341, "mean_inference_ms": 146.50838883392095, "mean_action_processing_ms": 0.05156223370533709, "mean_env_wait_ms": 0.7651206166468331, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 4000, "agent_timesteps_total": 4000, "timers": {"sample_time_ms": 591276.663, "sample_throughput": 6.765, "learn_time_ms": 1294470.556, "learn_throughput": 3.09, "update_time_ms": 13.525}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.2, "cur_lr": 5e-05, "total_loss": 22.590385377407074, "policy_loss": -0.2513504714006558, "vf_loss": 22.837784230709076, "vf_explained_var": -5.869194865226746e-06, "kl": 0.019758088048547506, "entropy": 693.1238212585449, "entropy_coeff": 0.0}}}, "num_steps_sampled": 4000, "num_agent_steps_sampled": 4000, "num_steps_trained": 4000}, "done": false, "episodes_total": 80, "training_iteration": 1, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_16-18-33", "timestamp": 1618409913, "time_this_iter_s": 1885.7688109874725, "time_total_s": 1885.7688109874725, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 1885.7688109874725, "timesteps_since_restore": 0, "iterations_since_restore": 1, "perf": {"cpu_util_percent": 27.100185873605948, "ram_util_percent": 26.49791821561338}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.318900000000006, "episode_reward_min": -21.092699999999994, "episode_reward_mean": -20.659207000000002, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.805600000000005, -20.8593, -20.369499999999995, -20.840700000000002, -20.64409999999999, -20.422099999999997, -20.6245, -20.935200000000002, -20.704399999999996, -20.899900000000006, -20.589400000000005, -20.6231, -20.615800000000004, -20.629500000000007, -20.635499999999997, -20.9527, -20.707900000000002, -20.896900000000002, -20.7916, -20.540599999999994, -20.552200000000006, -21.046699999999998, -20.7691, -20.5135, -20.688899999999997, -20.735200000000003, -20.670300000000005, -20.6153, -20.646999999999995, -20.490199999999994, -20.345399999999998, -20.440500000000004, -20.483800000000002, -20.350400000000008, -20.599000000000007, -20.6843, -20.360999999999997, -20.603800000000003, -20.5153, -20.3914, -20.447799999999994, -20.889399999999995, -20.674400000000002, -20.699200000000005, -20.5629, -20.717000000000002, -20.701899999999995, -20.756700000000002, -20.704500000000007, -20.82960000000001, -20.657000000000004, -20.881800000000002, -20.5739, -20.5194, -20.5664, -20.929800000000004, -20.631299999999996, -20.6978, -20.784199999999995, -20.652799999999996, -20.547599999999996, -20.7387, -20.5217, -20.877, -20.609100000000005, -20.61830000000001, -20.6991, -20.759299999999996, -20.68650000000001, -20.830199999999998, -20.712100000000003, -20.919399999999996, -20.4196, -20.591399999999993, -20.8614, -20.686599999999988, -20.624100000000002, -20.606800000000003, -20.894499999999997, -21.092699999999994, -20.495100000000004, -20.8093, -20.842300000000005, -20.440500000000004, -20.318900000000006, -20.4424, -21.043400000000005, -20.721000000000004, -20.451800000000002, -20.6338, -20.555699999999998, -20.581999999999997, -20.6271, -20.491900000000005, -20.5921, -20.591499999999996, -20.9181, -20.631600000000002, -20.6085, -20.358199999999993], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3741286229627814, "mean_inference_ms": 146.5020659224579, "mean_action_processing_ms": 0.05188847513272108, "mean_env_wait_ms": 0.7848975202887803, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 8000, "agent_timesteps_total": 8000, "timers": {"sample_time_ms": 591240.345, "sample_throughput": 6.765, "learn_time_ms": 1294175.629, "learn_throughput": 3.091, "update_time_ms": 13.536}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.2, "cur_lr": 5e-05, "total_loss": 21.67828905582428, "policy_loss": -0.2622592162806541, "vf_loss": 21.93628066778183, "vf_explained_var": -3.548339009284973e-06, "kl": 0.021337210666388273, "entropy": 693.1142234802246, "entropy_coeff": 0.0}}}, "num_steps_sampled": 8000, "num_agent_steps_sampled": 8000, "num_steps_trained": 8000}, "done": false, "episodes_total": 160, "training_iteration": 2, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_16-49-58", "timestamp": 1618411798, "time_this_iter_s": 1885.1068572998047, "time_total_s": 3770.875668287277, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 3770.875668287277, "timesteps_since_restore": 0, "iterations_since_restore": 2, "perf": {"cpu_util_percent": 26.774637411677205, "ram_util_percent": 29.363108962439565}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.318900000000006, "episode_reward_min": -21.043400000000005, "episode_reward_mean": -20.643323, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.495100000000004, -20.8093, -20.842300000000005, -20.440500000000004, -20.318900000000006, -20.4424, -21.043400000000005, -20.721000000000004, -20.451800000000002, -20.6338, -20.555699999999998, -20.581999999999997, -20.6271, -20.491900000000005, -20.5921, -20.591499999999996, -20.9181, -20.631600000000002, -20.6085, -20.358199999999993, -20.5539, -20.637300000000003, -20.666999999999994, -20.716800000000003, -20.550800000000006, -20.43700000000001, -20.573000000000004, -20.8437, -20.754299999999994, -20.4801, -20.502399999999998, -20.922199999999997, -20.862599999999997, -20.335099999999997, -20.8511, -20.69330000000001, -20.547100000000004, -20.568, -20.6468, -20.65909999999999, -20.940499999999997, -20.6391, -20.698499999999996, -20.369600000000002, -20.613299999999995, -20.693699999999996, -20.4974, -20.637100000000007, -20.5074, -20.6019, -20.745900000000006, -21.0257, -20.6988, -20.435200000000002, -20.50400000000001, -20.861700000000003, -20.823299999999996, -20.380599999999998, -20.75729999999999, -20.739299999999986, -20.8841, -20.952900000000007, -20.605500000000003, -20.7388, -20.5042, -20.4249, -20.846, -20.679999999999996, -20.7959, -20.521500000000003, -20.75899999999999, -20.6825, -20.692899999999998, -20.4956, -20.4489, -20.410500000000003, -20.81049999999999, -20.5975, -20.6833, -20.556699999999992, -20.527900000000006, -20.816899999999997, -20.5888, -20.933900000000005, -20.9057, -20.620600000000003, -20.741499999999995, -20.361500000000007, -20.620000000000005, -20.613099999999992, -20.7326, -20.8811, -20.733399999999996, -20.630300000000005, -20.436199999999996, -20.544, -20.554099999999995, -20.6442, -20.695999999999998, -20.532199999999996], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37356781484519813, "mean_inference_ms": 146.5477077644274, "mean_action_processing_ms": 0.05196823143216485, "mean_env_wait_ms": 0.7894390497405601, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 12000, "agent_timesteps_total": 12000, "timers": {"sample_time_ms": 591440.376, "sample_throughput": 6.763, "learn_time_ms": 1293859.13, "learn_throughput": 3.092, "update_time_ms": 13.527}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": 21.640603840351105, "policy_loss": -0.26185741019435227, "vf_loss": 21.897608041763306, "vf_explained_var": 1.4049932360649109e-05, "kl": 0.01617751896264963, "entropy": 693.1126041412354, "entropy_coeff": 0.0}}}, "num_steps_sampled": 12000, "num_agent_steps_sampled": 12000, "num_steps_trained": 12000}, "done": false, "episodes_total": 240, "training_iteration": 3, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_17-21-23", "timestamp": 1618413683, "time_this_iter_s": 1885.0884997844696, "time_total_s": 5655.964168071747, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 5655.964168071747, "timesteps_since_restore": 0, "iterations_since_restore": 3, "perf": {"cpu_util_percent": 27.02792859799182, "ram_util_percent": 29.615991074748987}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.017300000000002, "episode_reward_min": -21.056099999999997, "episode_reward_mean": -20.655766, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.527900000000006, -20.816899999999997, -20.5888, -20.933900000000005, -20.9057, -20.620600000000003, -20.741499999999995, -20.361500000000007, -20.620000000000005, -20.613099999999992, -20.7326, -20.8811, -20.733399999999996, -20.630300000000005, -20.436199999999996, -20.544, -20.554099999999995, -20.6442, -20.695999999999998, -20.532199999999996, -20.869199999999996, -20.706899999999997, -20.726, -20.555699999999998, -20.716400000000007, -20.894500000000004, -20.9021, -20.6008, -20.790899999999997, -20.688599999999997, -20.649400000000004, -20.860899999999994, -20.5269, -20.7615, -20.771700000000003, -20.6459, -20.3379, -20.42890000000001, -20.972100000000005, -20.622600000000002, -20.589899999999997, -20.779700000000002, -20.6615, -20.538199999999996, -20.88140000000001, -20.906, -20.6228, -20.646599999999996, -21.056099999999997, -20.439900000000005, -20.325499999999998, -20.643299999999993, -20.829500000000007, -20.776200000000003, -20.405199999999997, -20.6962, -20.310399999999998, -20.855499999999996, -20.604699999999998, -20.9775, -20.869199999999996, -20.408200000000004, -20.460400000000003, -20.677699999999994, -20.8813, -20.5873, -20.6335, -20.52690000000001, -20.672300000000003, -20.458200000000005, -20.6341, -20.6178, -20.5564, -20.913500000000003, -20.833300000000005, -20.7383, -20.750000000000007, -20.8307, -20.4673, -20.4731, -20.3602, -20.865699999999993, -20.415399999999995, -20.627299999999995, -20.625300000000003, -20.757299999999997, -20.739900000000002, -20.767700000000005, -20.846400000000003, -20.426199999999994, -20.017300000000002, -20.7784, -20.24580000000001, -20.723399999999998, -20.57539999999999, -20.6998, -20.56, -20.60030000000001, -20.6571, -20.609200000000005], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37320632688421557, "mean_inference_ms": 146.55592611952568, "mean_action_processing_ms": 0.0519811229525525, "mean_env_wait_ms": 0.7930791746055864, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 16000, "agent_timesteps_total": 16000, "timers": {"sample_time_ms": 591423.992, "sample_throughput": 6.763, "learn_time_ms": 1293904.335, "learn_throughput": 3.091, "update_time_ms": 13.568}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": 0.8834251258522272, "policy_loss": -0.2608082079095766, "vf_loss": 1.140888823196292, "vf_explained_var": 0.9517655372619629, "kl": 0.011148422752739862, "entropy": 693.1163578033447, "entropy_coeff": 0.0}}}, "num_steps_sampled": 16000, "num_agent_steps_sampled": 16000, "num_steps_trained": 16000}, "done": false, "episodes_total": 320, "training_iteration": 4, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_17-52-49", "timestamp": 1618415569, "time_this_iter_s": 1885.4371409416199, "time_total_s": 7541.401309013367, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 7541.401309013367, "timesteps_since_restore": 0, "iterations_since_restore": 4, "perf": {"cpu_util_percent": 26.91082186686501, "ram_util_percent": 29.44068426924507}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.017300000000002, "episode_reward_min": -20.980900000000002, "episode_reward_mean": -20.619270999999998, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.3602, -20.865699999999993, -20.415399999999995, -20.627299999999995, -20.625300000000003, -20.757299999999997, -20.739900000000002, -20.767700000000005, -20.846400000000003, -20.426199999999994, -20.017300000000002, -20.7784, -20.24580000000001, -20.723399999999998, -20.57539999999999, -20.6998, -20.56, -20.60030000000001, -20.6571, -20.609200000000005, -20.374999999999993, -20.719299999999993, -20.891399999999997, -20.487600000000008, -20.31989999999999, -20.599400000000003, -20.690600000000003, -20.62329999999999, -20.409599999999998, -20.607300000000002, -20.594099999999994, -20.517200000000006, -20.378499999999995, -20.5714, -20.5539, -20.65470000000001, -20.822699999999998, -20.980900000000002, -20.760199999999998, -20.784300000000005, -20.5177, -20.844800000000006, -20.8137, -20.6128, -20.615399999999998, -20.867599999999996, -20.6643, -20.7682, -20.7941, -20.919499999999996, -20.516000000000005, -20.795099999999998, -20.435999999999996, -20.68190000000001, -20.600700000000003, -20.2926, -20.556499999999996, -20.572000000000006, -20.611800000000002, -20.5496, -20.58629999999999, -20.7668, -20.585499999999996, -20.559100000000008, -20.736500000000003, -20.799399999999995, -20.463500000000003, -20.754300000000004, -20.787000000000003, -20.581600000000005, -20.666199999999996, -20.7834, -20.784999999999997, -20.509200000000003, -20.42319999999999, -20.305700000000005, -20.5168, -20.38809999999999, -20.525399999999998, -20.742899999999995, -20.825300000000002, -20.6694, -20.576300000000003, -20.333399999999994, -20.5427, -20.799199999999995, -20.779900000000005, -20.557599999999997, -20.5703, -20.502200000000006, -20.6239, -20.643899999999995, -20.492600000000003, -20.574300000000004, -20.768599999999992, -20.740099999999998, -20.634100000000004, -20.7181, -20.423299999999998, -20.6163], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37301429820552173, "mean_inference_ms": 146.55418253050703, "mean_action_processing_ms": 0.051919721734193854, "mean_env_wait_ms": 0.7922941113800541, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 20000, "agent_timesteps_total": 20000, "timers": {"sample_time_ms": 591399.424, "sample_throughput": 6.764, "learn_time_ms": 1293785.621, "learn_throughput": 3.092, "update_time_ms": 13.52}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.06562743347603828, "policy_loss": -0.24702570959925652, "vf_loss": 0.17873771605081856, "vf_explained_var": 0.9938496351242065, "kl": 0.008868555596563965, "entropy": 693.1191310882568, "entropy_coeff": 0.0}}}, "num_steps_sampled": 20000, "num_agent_steps_sampled": 20000, "num_steps_trained": 20000}, "done": false, "episodes_total": 400, "training_iteration": 5, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_18-24-14", "timestamp": 1618417454, "time_this_iter_s": 1884.634004354477, "time_total_s": 9426.035313367844, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 9426.035313367844, "timesteps_since_restore": 0, "iterations_since_restore": 5, "perf": {"cpu_util_percent": 26.88232886904762, "ram_util_percent": 29.64125744047619}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.261800000000004, "episode_reward_min": -21.043399999999995, "episode_reward_mean": -20.644312, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.825300000000002, -20.6694, -20.576300000000003, -20.333399999999994, -20.5427, -20.799199999999995, -20.779900000000005, -20.557599999999997, -20.5703, -20.502200000000006, -20.6239, -20.643899999999995, -20.492600000000003, -20.574300000000004, -20.768599999999992, -20.740099999999998, -20.634100000000004, -20.7181, -20.423299999999998, -20.6163, -20.5677, -20.7446, -20.849999999999994, -20.657299999999996, -20.261800000000004, -20.8349, -20.8057, -20.4271, -20.714999999999996, -20.8683, -20.593300000000006, -20.440799999999996, -20.420700000000004, -20.448000000000008, -20.664400000000004, -20.7457, -21.0004, -20.6238, -20.333600000000004, -20.576399999999996, -20.629699999999996, -20.504200000000004, -20.8601, -20.612, -20.734199999999998, -20.6419, -20.762400000000007, -20.744099999999996, -20.638199999999998, -20.852499999999996, -20.743800000000004, -20.7609, -20.7155, -20.920800000000003, -20.61039999999999, -20.488299999999995, -20.761599999999998, -20.569000000000003, -20.5386, -20.867900000000002, -20.511600000000005, -20.598999999999997, -20.734099999999998, -20.809200000000004, -20.847900000000003, -20.817699999999995, -20.819200000000002, -20.4966, -20.601600000000005, -20.742800000000003, -20.54370000000001, -20.858500000000003, -20.402799999999996, -20.7663, -20.4126, -20.730500000000003, -20.5577, -21.043399999999995, -20.688200000000005, -20.6749, -20.6906, -20.74930000000001, -20.460999999999995, -20.607899999999997, -20.720900000000007, -20.3093, -20.440399999999993, -20.389599999999998, -20.793400000000005, -20.507600000000004, -20.710599999999992, -20.614100000000004, -20.6616, -20.3661, -20.516699999999997, -20.3644, -21.030700000000003, -20.6195, -20.537499999999994, -20.754599999999996], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3729362990048145, "mean_inference_ms": 146.5515716973978, "mean_action_processing_ms": 0.05194108004720599, "mean_env_wait_ms": 0.792794481974758, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 24000, "agent_timesteps_total": 24000, "timers": {"sample_time_ms": 591385.225, "sample_throughput": 6.764, "learn_time_ms": 1293827.555, "learn_throughput": 3.092, "update_time_ms": 13.537}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": 0.04293747415067628, "policy_loss": -0.24430382065474987, "vf_loss": 0.2850038097240031, "vf_explained_var": 0.992125391960144, "kl": 0.007458280248101801, "entropy": 693.1215896606445, "entropy_coeff": 0.0}}}, "num_steps_sampled": 24000, "num_agent_steps_sampled": 24000, "num_steps_trained": 24000}, "done": false, "episodes_total": 480, "training_iteration": 6, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_18-55-39", "timestamp": 1618419339, "time_this_iter_s": 1885.3735439777374, "time_total_s": 11311.408857345581, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 11311.408857345581, "timesteps_since_restore": 0, "iterations_since_restore": 6, "perf": {"cpu_util_percent": 27.083451097062103, "ram_util_percent": 29.445146894756416}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.213199999999997, "episode_reward_min": -21.076399999999992, "episode_reward_mean": -20.639321999999996, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.6906, -20.74930000000001, -20.460999999999995, -20.607899999999997, -20.720900000000007, -20.3093, -20.440399999999993, -20.389599999999998, -20.793400000000005, -20.507600000000004, -20.710599999999992, -20.614100000000004, -20.6616, -20.3661, -20.516699999999997, -20.3644, -21.030700000000003, -20.6195, -20.537499999999994, -20.754599999999996, -20.910999999999998, -20.5903, -20.542200000000005, -20.6823, -20.664299999999997, -20.303900000000002, -20.8132, -20.6507, -20.67739999999999, -20.330000000000005, -20.77129999999999, -20.416899999999995, -20.497700000000005, -20.834300000000002, -20.660800000000002, -20.757699999999996, -20.36140000000001, -20.530499999999996, -20.358299999999993, -20.797899999999995, -20.7466, -20.6816, -20.909300000000005, -20.706, -20.645999999999997, -20.604699999999994, -20.74180000000001, -21.032, -20.588000000000005, -20.514599999999998, -20.73, -20.8166, -20.479599999999998, -20.570299999999996, -20.581399999999995, -20.840200000000003, -20.3469, -20.81050000000001, -21.076399999999992, -20.810799999999993, -20.5316, -20.3643, -20.540800000000004, -20.635999999999996, -20.676299999999994, -20.706200000000003, -20.879400000000004, -20.474600000000002, -20.845099999999995, -20.571399999999997, -20.612299999999998, -20.65819999999999, -20.5681, -20.786299999999997, -20.7108, -20.858800000000002, -20.213199999999997, -20.336700000000004, -20.483400000000003, -20.6781, -20.9047, -20.643499999999996, -20.7288, -20.456500000000002, -20.4668, -20.762299999999996, -20.906499999999998, -20.6433, -20.785800000000002, -20.707800000000002, -20.535800000000005, -20.7129, -20.480999999999995, -20.7057, -20.897900000000003, -20.767200000000003, -20.696500000000004, -20.3978, -20.729800000000004, -20.638800000000003], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3728577640723381, "mean_inference_ms": 146.5354282176752, "mean_action_processing_ms": 0.05193524705157976, "mean_env_wait_ms": 0.7915503396081445, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 28000, "agent_timesteps_total": 28000, "timers": {"sample_time_ms": 591295.412, "sample_throughput": 6.765, "learn_time_ms": 1293896.447, "learn_throughput": 3.091, "update_time_ms": 13.493}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.009663206292316318, "policy_loss": -0.24441900150850415, "vf_loss": 0.23277510463958606, "vf_explained_var": 0.9950058460235596, "kl": 0.006602299021324143, "entropy": 693.1216011047363, "entropy_coeff": 0.0}}}, "num_steps_sampled": 28000, "num_agent_steps_sampled": 28000, "num_steps_trained": 28000}, "done": false, "episodes_total": 560, "training_iteration": 7, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_19-27-04", "timestamp": 1618421224, "time_this_iter_s": 1885.08815741539, "time_total_s": 13196.497014760971, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 13196.497014760971, "timesteps_since_restore": 0, "iterations_since_restore": 7, "perf": {"cpu_util_percent": 26.769133506879886, "ram_util_percent": 29.637039791744147}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.261700000000005, "episode_reward_min": -21.059400000000004, "episode_reward_mean": -20.654457, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.9047, -20.643499999999996, -20.7288, -20.456500000000002, -20.4668, -20.762299999999996, -20.906499999999998, -20.6433, -20.785800000000002, -20.707800000000002, -20.535800000000005, -20.7129, -20.480999999999995, -20.7057, -20.897900000000003, -20.767200000000003, -20.696500000000004, -20.3978, -20.729800000000004, -20.638800000000003, -21.059400000000004, -20.765299999999996, -20.552799999999998, -20.6475, -20.720799999999997, -20.775699999999997, -20.344800000000006, -20.596, -20.8096, -20.840900000000005, -20.852099999999997, -20.8322, -20.602999999999998, -20.7038, -20.7062, -20.719200000000008, -20.520499999999995, -20.9589, -20.5376, -20.532100000000003, -20.492000000000008, -20.56759999999999, -20.635800000000003, -20.429099999999995, -20.705899999999996, -20.55249999999999, -20.66729999999999, -20.4464, -20.8693, -20.573600000000006, -20.8067, -20.5098, -20.685000000000006, -20.730799999999995, -20.2949, -20.854599999999998, -20.597500000000004, -20.400599999999997, -20.930499999999995, -20.7772, -20.94259999999999, -20.614400000000003, -20.640400000000003, -20.9278, -20.360000000000003, -20.5662, -20.7116, -20.771100000000004, -20.551400000000005, -20.602799999999995, -20.261700000000005, -20.726899999999993, -20.555100000000003, -20.7274, -20.489500000000003, -20.5276, -20.342200000000005, -20.883699999999997, -20.788200000000003, -20.700999999999997, -21.008899999999997, -20.3544, -20.659899999999997, -20.6418, -20.575199999999995, -20.440800000000003, -20.674500000000002, -20.649600000000007, -20.8947, -20.5341, -20.475000000000005, -20.745900000000006, -20.5056, -20.320500000000003, -20.526100000000003, -20.697100000000006, -20.835000000000004, -20.779999999999994, -20.459899999999998, -20.800200000000007], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37280588151081745, "mean_inference_ms": 146.53234741512892, "mean_action_processing_ms": 0.051938672055824496, "mean_env_wait_ms": 0.7920473167923342, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 32000, "agent_timesteps_total": 32000, "timers": {"sample_time_ms": 591300.152, "sample_throughput": 6.765, "learn_time_ms": 1294065.798, "learn_throughput": 3.091, "update_time_ms": 13.514}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.05133576493244618, "policy_loss": -0.24214444449171424, "vf_loss": 0.18878903635777533, "vf_explained_var": 0.995282769203186, "kl": 0.00673215847928077, "entropy": 693.1215286254883, "entropy_coeff": 0.0}}}, "num_steps_sampled": 32000, "num_agent_steps_sampled": 32000, "num_steps_trained": 32000}, "done": false, "episodes_total": 640, "training_iteration": 8, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_19-58-31", "timestamp": 1618423111, "time_this_iter_s": 1886.607561826706, "time_total_s": 15083.104576587677, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 15083.104576587677, "timesteps_since_restore": 0, "iterations_since_restore": 8, "perf": {"cpu_util_percent": 27.03894463024898, "ram_util_percent": 29.444258639910814}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.292699999999996, "episode_reward_min": -21.068799999999996, "episode_reward_mean": -20.641191, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-21.008899999999997, -20.3544, -20.659899999999997, -20.6418, -20.575199999999995, -20.440800000000003, -20.674500000000002, -20.649600000000007, -20.8947, -20.5341, -20.475000000000005, -20.745900000000006, -20.5056, -20.320500000000003, -20.526100000000003, -20.697100000000006, -20.835000000000004, -20.779999999999994, -20.459899999999998, -20.800200000000007, -20.6432, -20.869400000000006, -20.683899999999998, -20.819899999999993, -20.545699999999997, -20.565699999999996, -20.8024, -20.698000000000008, -20.437500000000004, -20.5203, -20.5965, -21.068799999999996, -20.4303, -20.690599999999993, -20.6762, -20.6899, -20.738, -20.841300000000004, -20.670000000000005, -20.477499999999996, -20.565199999999997, -20.8424, -20.784999999999997, -20.602299999999996, -20.6612, -20.603500000000004, -20.644699999999993, -20.727399999999996, -20.4245, -20.691100000000002, -20.5924, -20.603700000000003, -20.38050000000001, -20.838300000000004, -20.779100000000003, -20.4772, -20.6762, -20.774800000000003, -20.6513, -20.7799, -20.4284, -20.5096, -20.449800000000007, -20.671699999999994, -20.9356, -20.625100000000003, -20.459300000000006, -20.512700000000002, -20.6198, -20.364900000000006, -20.757899999999992, -20.492299999999993, -20.692899999999998, -20.5678, -20.773599999999995, -20.759400000000003, -20.648099999999996, -20.66059999999999, -20.948900000000002, -20.667499999999997, -20.640500000000007, -20.67320000000001, -20.5349, -20.487999999999996, -20.786900000000003, -20.631199999999996, -20.535299999999992, -20.5528, -20.681100000000004, -20.8134, -20.5764, -20.5066, -20.292699999999996, -20.941800000000004, -20.720900000000007, -20.5076, -20.5472, -20.9874, -20.356400000000008, -20.653899999999997], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37279631901646665, "mean_inference_ms": 146.53399794608296, "mean_action_processing_ms": 0.05192410510887189, "mean_env_wait_ms": 0.7911190362862406, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 36000, "agent_timesteps_total": 36000, "timers": {"sample_time_ms": 591298.652, "sample_throughput": 6.765, "learn_time_ms": 1294112.861, "learn_throughput": 3.091, "update_time_ms": 13.498}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": 0.010168157634325325, "policy_loss": -0.24486132385209203, "vf_loss": 0.2531338626286015, "vf_explained_var": 0.9941298365592957, "kl": 0.0063187157502397895, "entropy": 693.1211414337158, "entropy_coeff": 0.0}}}, "num_steps_sampled": 36000, "num_agent_steps_sampled": 36000, "num_steps_trained": 36000}, "done": false, "episodes_total": 720, "training_iteration": 9, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_20-29-57", "timestamp": 1618424997, "time_this_iter_s": 1885.7982337474823, "time_total_s": 16968.90281033516, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 16968.90281033516, "timesteps_since_restore": 0, "iterations_since_restore": 9, "perf": {"cpu_util_percent": 26.77285236147266, "ram_util_percent": 29.63767199702492}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.287300000000005, "episode_reward_min": -21.069999999999997, "episode_reward_mean": -20.663445000000003, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.640500000000007, -20.67320000000001, -20.5349, -20.487999999999996, -20.786900000000003, -20.631199999999996, -20.535299999999992, -20.5528, -20.681100000000004, -20.8134, -20.5764, -20.5066, -20.292699999999996, -20.941800000000004, -20.720900000000007, -20.5076, -20.5472, -20.9874, -20.356400000000008, -20.653899999999997, -20.5533, -20.606800000000003, -20.410699999999995, -20.697000000000013, -20.5105, -20.738900000000005, -20.8259, -20.50170000000001, -20.84670000000001, -20.699000000000005, -20.496899999999997, -20.79449999999999, -20.81500000000001, -20.670699999999997, -20.287300000000005, -20.648800000000005, -20.502599999999997, -20.530099999999997, -20.733199999999997, -20.7874, -20.879400000000004, -20.4788, -20.7981, -20.86610000000001, -20.520799999999994, -20.349400000000003, -20.440099999999997, -20.7897, -20.817800000000005, -20.699399999999997, -21.069999999999997, -20.9726, -20.583, -20.598399999999998, -20.573899999999995, -20.728900000000003, -20.412799999999994, -20.445600000000002, -20.656799999999993, -20.6035, -21.019200000000005, -20.82439999999999, -20.574099999999998, -20.746600000000004, -20.739400000000003, -20.733400000000007, -20.858700000000002, -20.772400000000008, -20.9625, -20.8083, -20.8462, -20.6826, -20.7426, -20.540000000000006, -20.8361, -20.483700000000006, -20.490899999999993, -20.871799999999997, -20.725999999999996, -20.8849, -20.827, -20.601000000000003, -20.717400000000005, -20.715899999999994, -20.754100000000005, -20.8042, -20.937800000000003, -20.580799999999996, -20.3848, -20.728, -20.7353, -20.4482, -20.623299999999997, -20.654499999999995, -20.510499999999997, -20.465200000000003, -20.350599999999996, -20.7289, -20.679100000000005, -20.583800000000007], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37266292897954584, "mean_inference_ms": 146.52862993763645, "mean_action_processing_ms": 0.05188207412005699, "mean_env_wait_ms": 0.786204125079433, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 40000, "agent_timesteps_total": 40000, "timers": {"sample_time_ms": 591243.304, "sample_throughput": 6.765, "learn_time_ms": 1294195.929, "learn_throughput": 3.091, "update_time_ms": 13.456}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.06340700999135152, "policy_loss": -0.23882530513219535, "vf_loss": 0.17349185660714284, "vf_explained_var": 0.9966383576393127, "kl": 0.0064214397862087935, "entropy": 693.1200160980225, "entropy_coeff": 0.0}}}, "num_steps_sampled": 40000, "num_agent_steps_sampled": 40000, "num_steps_trained": 40000}, "done": false, "episodes_total": 800, "training_iteration": 10, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_21-01-22", "timestamp": 1618426882, "time_this_iter_s": 1885.7106738090515, "time_total_s": 18854.61348414421, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 18854.61348414421, "timesteps_since_restore": 0, "iterations_since_restore": 10, "perf": {"cpu_util_percent": 27.013940520446095, "ram_util_percent": 29.44420074349442}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.2544, "episode_reward_min": -21.054199999999998, "episode_reward_mean": -20.644026999999998, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.827, -20.601000000000003, -20.717400000000005, -20.715899999999994, -20.754100000000005, -20.8042, -20.937800000000003, -20.580799999999996, -20.3848, -20.728, -20.7353, -20.4482, -20.623299999999997, -20.654499999999995, -20.510499999999997, -20.465200000000003, -20.350599999999996, -20.7289, -20.679100000000005, -20.583800000000007, -21.054199999999998, -20.547900000000002, -20.438600000000005, -20.6767, -20.506999999999998, -20.541500000000003, -20.548800000000007, -20.6707, -20.5266, -20.560300000000005, -20.7473, -20.425900000000002, -20.723200000000006, -20.815299999999993, -20.413799999999995, -20.865799999999993, -20.745000000000008, -20.754900000000006, -20.9723, -20.722, -20.7685, -20.911, -20.557100000000002, -20.735800000000005, -20.40420000000001, -20.38610000000001, -20.787000000000006, -20.902199999999997, -20.715, -20.469000000000005, -20.667800000000003, -20.559599999999996, -20.657600000000006, -20.597100000000008, -20.798299999999998, -20.2544, -20.4661, -20.605300000000003, -20.6077, -20.4993, -20.55699999999999, -20.428, -20.5993, -20.688100000000002, -20.700799999999994, -20.637999999999998, -20.487399999999997, -20.763199999999994, -20.5496, -20.694400000000005, -20.7962, -20.785999999999998, -20.563100000000006, -20.724000000000007, -20.737900000000007, -20.803499999999996, -20.412200000000006, -20.798699999999997, -20.890600000000003, -20.588999999999995, -20.70529999999999, -20.7616, -21.018600000000003, -20.890099999999997, -20.716099999999997, -20.652199999999997, -20.379, -20.681400000000004, -20.389599999999998, -20.373, -20.629100000000005, -20.4222, -20.877299999999988, -20.5608, -20.341699999999992, -20.665000000000003, -20.835399999999996, -20.597799999999992, -20.746799999999993, -20.5154], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37261569553103624, "mean_inference_ms": 146.5318105092311, "mean_action_processing_ms": 0.051925336978452794, "mean_env_wait_ms": 0.7865938242558213, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 44000, "agent_timesteps_total": 44000, "timers": {"sample_time_ms": 591271.923, "sample_throughput": 6.765, "learn_time_ms": 1294143.081, "learn_throughput": 3.091, "update_time_ms": 13.44}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.18876290251500905, "policy_loss": -0.24425451923161745, "vf_loss": 0.05366173683432862, "vf_explained_var": 0.9988695979118347, "kl": 0.00609958652057685, "entropy": 693.119062423706, "entropy_coeff": 0.0}}}, "num_steps_sampled": 44000, "num_agent_steps_sampled": 44000, "num_steps_trained": 44000}, "done": false, "episodes_total": 880, "training_iteration": 11, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_21-32-48", "timestamp": 1618428768, "time_this_iter_s": 1885.5269854068756, "time_total_s": 20740.140469551086, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 20740.140469551086, "timesteps_since_restore": 0, "iterations_since_restore": 11, "perf": {"cpu_util_percent": 26.7608776496839, "ram_util_percent": 29.64198586835255}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.180800000000005, "episode_reward_min": -21.073900000000002, "episode_reward_mean": -20.653986, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.70529999999999, -20.7616, -21.018600000000003, -20.890099999999997, -20.716099999999997, -20.652199999999997, -20.379, -20.681400000000004, -20.389599999999998, -20.373, -20.629100000000005, -20.4222, -20.877299999999988, -20.5608, -20.341699999999992, -20.665000000000003, -20.835399999999996, -20.597799999999992, -20.746799999999993, -20.5154, -20.649400000000007, -20.829899999999995, -20.780300000000004, -20.56329999999999, -20.686200000000007, -20.578699999999998, -20.729399999999995, -20.649599999999996, -20.774400000000007, -20.634200000000007, -20.941200000000006, -20.5699, -20.784399999999998, -20.549099999999996, -20.840699999999995, -20.756600000000002, -20.428800000000006, -20.762200000000004, -20.689599999999995, -20.919500000000003, -20.758099999999995, -20.633099999999992, -20.676199999999998, -20.709999999999997, -20.730200000000004, -20.78760000000001, -20.547199999999997, -20.674999999999994, -20.781299999999995, -20.425499999999996, -20.454000000000004, -20.515900000000002, -20.701600000000003, -20.180800000000005, -20.703500000000002, -20.200900000000004, -20.9167, -20.441500000000005, -20.595700000000004, -20.602500000000003, -20.8483, -20.7667, -20.6311, -20.705999999999996, -20.701000000000008, -20.498199999999997, -20.282000000000004, -21.073900000000002, -20.508599999999998, -20.664199999999994, -20.593, -20.669999999999995, -20.738099999999996, -20.924500000000005, -20.632899999999996, -20.6581, -20.632799999999996, -20.672899999999995, -20.615299999999998, -20.858299999999996, -20.8147, -20.195999999999998, -20.887299999999996, -20.4755, -20.7845, -20.6398, -20.448800000000006, -20.665800000000004, -20.824499999999997, -20.67709999999999, -20.880799999999997, -20.655099999999997, -20.792299999999997, -20.295799999999996, -20.821399999999997, -20.729799999999997, -20.7307, -20.470400000000005, -20.418199999999995, -20.6011], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37247722759402196, "mean_inference_ms": 146.52108658040373, "mean_action_processing_ms": 0.051904141230831265, "mean_env_wait_ms": 0.7833675574779484, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 48000, "agent_timesteps_total": 48000, "timers": {"sample_time_ms": 591183.134, "sample_throughput": 6.766, "learn_time_ms": 1294267.63, "learn_throughput": 3.091, "update_time_ms": 13.445}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.12352394766639918, "policy_loss": -0.23862713330890983, "vf_loss": 0.11311586349620484, "vf_explained_var": 0.9973416328430176, "kl": 0.006624426343478262, "entropy": 693.1159706115723, "entropy_coeff": 0.0}}}, "num_steps_sampled": 48000, "num_agent_steps_sampled": 48000, "num_steps_trained": 48000}, "done": false, "episodes_total": 960, "training_iteration": 12, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_22-04-13", "timestamp": 1618430653, "time_this_iter_s": 1885.4643595218658, "time_total_s": 22625.604829072952, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 22625.604829072952, "timesteps_since_restore": 0, "iterations_since_restore": 12, "perf": {"cpu_util_percent": 27.08661212346597, "ram_util_percent": 29.448791372257347}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.1261, "episode_reward_min": -21.030499999999996, "episode_reward_mean": -20.670356, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.8147, -20.195999999999998, -20.887299999999996, -20.4755, -20.7845, -20.6398, -20.448800000000006, -20.665800000000004, -20.824499999999997, -20.67709999999999, -20.880799999999997, -20.655099999999997, -20.792299999999997, -20.295799999999996, -20.821399999999997, -20.729799999999997, -20.7307, -20.470400000000005, -20.418199999999995, -20.6011, -20.480299999999993, -20.525600000000004, -20.551699999999993, -20.728800000000007, -20.6844, -20.6553, -20.7719, -20.765900000000006, -20.6984, -20.8282, -20.601899999999997, -20.705699999999997, -21.030499999999996, -20.578899999999997, -20.699499999999997, -20.582199999999997, -20.733900000000002, -20.736900000000006, -20.759900000000002, -20.702699999999997, -20.772399999999998, -20.1261, -20.618299999999998, -20.650300000000005, -20.480400000000003, -20.910499999999992, -20.86, -20.6706, -20.6607, -20.420199999999998, -20.779, -20.74440000000001, -20.878300000000003, -20.882100000000005, -20.474199999999996, -20.5008, -20.596699999999988, -20.6365, -20.391999999999996, -20.898400000000006, -20.597099999999998, -20.57870000000001, -20.519200000000005, -20.8084, -20.837799999999998, -20.7999, -20.752899999999997, -21.018, -20.610200000000003, -20.758000000000006, -20.3962, -20.7271, -20.8576, -20.4326, -20.623599999999993, -20.697400000000002, -20.7187, -20.556700000000003, -20.935499999999998, -20.7398, -20.383300000000002, -20.447500000000005, -20.4106, -20.5106, -20.804899999999996, -20.8194, -21.029500000000002, -20.824400000000004, -20.6219, -20.50839999999999, -20.567899999999995, -20.8663, -20.688, -20.7854, -20.895000000000003, -20.763400000000004, -20.7227, -20.586799999999997, -20.768199999999997, -20.4819], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3724136294386539, "mean_inference_ms": 146.51517866800054, "mean_action_processing_ms": 0.05185722044167441, "mean_env_wait_ms": 0.7821987297558748, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 52000, "agent_timesteps_total": 52000, "timers": {"sample_time_ms": 591095.712, "sample_throughput": 6.767, "learn_time_ms": 1294363.401, "learn_throughput": 3.09, "update_time_ms": 13.449}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.17005631909705698, "policy_loss": -0.25071325129829347, "vf_loss": 0.07873027327877935, "vf_explained_var": 0.9985706210136414, "kl": 0.006422200502129272, "entropy": 693.1151695251465, "entropy_coeff": 0.0}}}, "num_steps_sampled": 52000, "num_agent_steps_sampled": 52000, "num_steps_trained": 52000}, "done": false, "episodes_total": 1040, "training_iteration": 13, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_22-35-39", "timestamp": 1618432539, "time_this_iter_s": 1885.1722784042358, "time_total_s": 24510.777107477188, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 24510.777107477188, "timesteps_since_restore": 0, "iterations_since_restore": 13, "perf": {"cpu_util_percent": 26.77560431387133, "ram_util_percent": 29.636853849014503}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.276000000000003, "episode_reward_min": -21.029500000000002, "episode_reward_mean": -20.657196, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.383300000000002, -20.447500000000005, -20.4106, -20.5106, -20.804899999999996, -20.8194, -21.029500000000002, -20.824400000000004, -20.6219, -20.50839999999999, -20.567899999999995, -20.8663, -20.688, -20.7854, -20.895000000000003, -20.763400000000004, -20.7227, -20.586799999999997, -20.768199999999997, -20.4819, -20.577999999999996, -20.660199999999996, -20.538600000000002, -20.3554, -20.715700000000002, -20.635199999999998, -20.3373, -20.609099999999998, -20.8438, -20.666999999999998, -20.67920000000001, -20.6462, -20.631499999999996, -20.459000000000003, -20.599099999999996, -20.572499999999994, -20.561900000000005, -20.538500000000003, -20.832899999999995, -20.782600000000002, -20.534799999999997, -21.008000000000003, -20.6393, -20.430599999999995, -20.4736, -20.782799999999995, -20.852199999999996, -20.743699999999997, -20.702600000000007, -20.776600000000002, -20.779900000000012, -20.910899999999998, -20.7107, -20.353699999999993, -20.681799999999996, -20.577, -20.865700000000004, -20.793899999999994, -20.7516, -20.780100000000004, -20.8863, -20.535000000000004, -20.6795, -20.557100000000002, -21.019800000000007, -20.539, -20.276000000000003, -20.542399999999994, -20.596799999999995, -20.821700000000007, -20.790899999999997, -20.666099999999997, -20.698200000000007, -20.8215, -20.625000000000004, -20.456899999999994, -20.454399999999996, -20.518399999999996, -20.56359999999999, -21.014700000000005, -20.624399999999998, -20.6412, -20.428599999999996, -20.839999999999996, -20.8548, -20.86320000000001, -20.879, -20.504600000000003, -20.694100000000002, -20.447699999999994, -20.5912, -20.82040000000001, -20.869400000000002, -20.824500000000004, -20.3956, -20.841500000000007, -20.3923, -20.3302, -20.520500000000006, -20.4138], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37246213350671087, "mean_inference_ms": 146.51577334821602, "mean_action_processing_ms": 0.05185620830541226, "mean_env_wait_ms": 0.7834344202320166, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 56000, "agent_timesteps_total": 56000, "timers": {"sample_time_ms": 591093.015, "sample_throughput": 6.767, "learn_time_ms": 1294424.469, "learn_throughput": 3.09, "update_time_ms": 13.442}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.17935174744343385, "policy_loss": -0.24064396508038044, "vf_loss": 0.059378106274380116, "vf_explained_var": 0.9987779855728149, "kl": 0.006380370905390009, "entropy": 693.1135063171387, "entropy_coeff": 0.0}}}, "num_steps_sampled": 56000, "num_agent_steps_sampled": 56000, "num_steps_trained": 56000}, "done": false, "episodes_total": 1120, "training_iteration": 14, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_23-07-05", "timestamp": 1618434425, "time_this_iter_s": 1886.020852804184, "time_total_s": 26396.797960281372, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 26396.797960281372, "timesteps_since_restore": 0, "iterations_since_restore": 14, "perf": {"cpu_util_percent": 27.002824228911187, "ram_util_percent": 29.44336677814939}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.187099999999994, "episode_reward_min": -21.044, "episode_reward_mean": -20.6604, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.624399999999998, -20.6412, -20.428599999999996, -20.839999999999996, -20.8548, -20.86320000000001, -20.879, -20.504600000000003, -20.694100000000002, -20.447699999999994, -20.5912, -20.82040000000001, -20.869400000000002, -20.824500000000004, -20.3956, -20.841500000000007, -20.3923, -20.3302, -20.520500000000006, -20.4138, -20.918399999999995, -20.687199999999997, -20.5187, -20.911399999999997, -20.5661, -20.781099999999995, -20.998499999999996, -20.511400000000005, -20.90800000000001, -20.574100000000005, -20.895599999999995, -20.8095, -20.644500000000004, -20.7045, -20.4124, -20.666700000000002, -20.95539999999999, -20.560600000000004, -20.627, -20.7983, -20.6742, -20.5922, -20.755300000000005, -20.396499999999993, -20.797, -20.535200000000007, -20.648099999999992, -20.647500000000008, -20.6788, -20.6027, -20.7213, -20.4888, -20.7433, -20.5469, -20.854999999999993, -20.727600000000006, -20.796800000000005, -20.503, -20.761599999999998, -20.5203, -20.6079, -20.6306, -20.835300000000004, -20.524799999999995, -20.462599999999995, -20.5708, -20.7232, -20.658900000000003, -20.630200000000006, -20.488299999999995, -20.454900000000002, -20.5683, -20.4392, -20.732499999999998, -20.489400000000003, -20.192700000000002, -20.543400000000002, -20.6806, -20.785699999999995, -20.792499999999997, -20.260700000000007, -20.772399999999994, -20.386800000000004, -20.80530000000001, -20.8345, -20.75250000000001, -20.187099999999994, -20.6785, -21.020799999999998, -20.8486, -21.044, -20.586999999999996, -20.473899999999997, -20.848899999999997, -20.807600000000004, -20.922799999999995, -20.67229999999999, -20.428400000000003, -20.882499999999997, -20.795099999999994], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37238352057159574, "mean_inference_ms": 146.50952435578776, "mean_action_processing_ms": 0.0518077280223399, "mean_env_wait_ms": 0.780412995328119, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 60000, "agent_timesteps_total": 60000, "timers": {"sample_time_ms": 591004.766, "sample_throughput": 6.768, "learn_time_ms": 1294517.686, "learn_throughput": 3.09, "update_time_ms": 13.432}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.13723472959827632, "policy_loss": -0.23648779070936143, "vf_loss": 0.0973084110992204, "vf_explained_var": 0.9981292486190796, "kl": 0.0064821658306755126, "entropy": 693.113302230835, "entropy_coeff": 0.0}}}, "num_steps_sampled": 60000, "num_agent_steps_sampled": 60000, "num_steps_trained": 60000}, "done": false, "episodes_total": 1200, "training_iteration": 15, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-14_23-38-29", "timestamp": 1618436309, "time_this_iter_s": 1884.683174610138, "time_total_s": 28281.48113489151, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 28281.48113489151, "timesteps_since_restore": 0, "iterations_since_restore": 15, "perf": {"cpu_util_percent": 26.766778273809525, "ram_util_percent": 29.639918154761908}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.187099999999994, "episode_reward_min": -21.128600000000006, "episode_reward_mean": -20.665166000000003, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.260700000000007, -20.772399999999994, -20.386800000000004, -20.80530000000001, -20.8345, -20.75250000000001, -20.187099999999994, -20.6785, -21.020799999999998, -20.8486, -21.044, -20.586999999999996, -20.473899999999997, -20.848899999999997, -20.807600000000004, -20.922799999999995, -20.67229999999999, -20.428400000000003, -20.882499999999997, -20.795099999999994, -21.128600000000006, -20.462400000000002, -20.743599999999997, -20.828299999999995, -20.6025, -20.901000000000007, -20.554900000000007, -20.473900000000004, -20.521300000000004, -20.365999999999996, -20.536499999999997, -20.572700000000008, -20.647199999999998, -20.650399999999998, -20.445399999999996, -20.483999999999998, -20.740299999999998, -20.728299999999997, -20.527199999999997, -20.783899999999992, -20.709099999999996, -20.542299999999997, -20.558800000000012, -20.82650000000001, -20.832700000000003, -20.747299999999996, -20.20999999999999, -20.658, -20.298600000000004, -20.716600000000003, -20.713100000000004, -20.640499999999996, -20.813100000000006, -20.359099999999998, -20.6099, -20.442, -20.3925, -20.600700000000003, -20.839000000000002, -20.416199999999996, -20.7655, -20.8507, -20.536900000000003, -20.5915, -20.362999999999996, -20.926200000000005, -20.673099999999998, -20.901600000000002, -20.895000000000003, -20.8451, -20.681900000000002, -20.52399999999999, -20.6361, -20.7701, -20.642500000000005, -20.623, -20.7676, -20.841199999999994, -20.922800000000002, -20.5895, -20.84850000000001, -20.92049999999999, -20.400199999999998, -20.782999999999998, -20.8139, -20.4272, -20.6295, -20.698400000000003, -20.745000000000005, -20.70019999999999, -20.760299999999997, -20.757199999999997, -20.521600000000003, -20.7489, -20.708700000000004, -20.6604, -20.518600000000003, -20.496100000000002, -20.711, -20.685999999999996], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37233048901771304, "mean_inference_ms": 146.50369725143716, "mean_action_processing_ms": 0.05179244648718884, "mean_env_wait_ms": 0.7786232809481295, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 64000, "agent_timesteps_total": 64000, "timers": {"sample_time_ms": 590944.012, "sample_throughput": 6.769, "learn_time_ms": 1294580.904, "learn_throughput": 3.09, "update_time_ms": 13.396}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2132619711337611, "policy_loss": -0.24487905739806592, "vf_loss": 0.029608261766952637, "vf_explained_var": 0.9994328618049622, "kl": 0.00669609202304855, "entropy": 693.1110744476318, "entropy_coeff": 0.0}}}, "num_steps_sampled": 64000, "num_agent_steps_sampled": 64000, "num_steps_trained": 64000}, "done": false, "episodes_total": 1280, "training_iteration": 16, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_00-09-55", "timestamp": 1618438195, "time_this_iter_s": 1885.3981354236603, "time_total_s": 30166.87927031517, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 30166.87927031517, "timesteps_since_restore": 0, "iterations_since_restore": 16, "perf": {"cpu_util_percent": 27.064596504276683, "ram_util_percent": 29.44611379695054}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.2856, "episode_reward_min": -21.059499999999996, "episode_reward_mean": -20.655941, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.84850000000001, -20.92049999999999, -20.400199999999998, -20.782999999999998, -20.8139, -20.4272, -20.6295, -20.698400000000003, -20.745000000000005, -20.70019999999999, -20.760299999999997, -20.757199999999997, -20.521600000000003, -20.7489, -20.708700000000004, -20.6604, -20.518600000000003, -20.496100000000002, -20.711, -20.685999999999996, -20.7039, -20.955700000000004, -20.508000000000003, -20.7909, -20.586300000000005, -20.4425, -20.415599999999998, -20.623399999999997, -20.603400000000008, -20.603900000000003, -20.5551, -20.662200000000002, -20.594000000000005, -20.837, -21.059499999999996, -20.850800000000003, -20.47340000000001, -20.6584, -20.493600000000008, -20.588499999999996, -20.882899999999996, -20.4445, -20.316599999999998, -20.7456, -20.7999, -20.919200000000004, -20.790499999999998, -20.566300000000005, -20.2856, -20.862900000000007, -20.759899999999995, -20.630500000000005, -20.555200000000003, -20.5495, -20.501900000000003, -20.8077, -20.435, -20.5227, -20.740099999999998, -20.682899999999997, -20.9855, -20.9918, -20.770099999999996, -20.420699999999993, -20.657500000000002, -20.9131, -20.799, -20.905599999999996, -20.661100000000005, -20.621599999999997, -20.855999999999998, -20.453999999999997, -20.4846, -20.882999999999996, -20.772000000000002, -20.347899999999996, -20.392999999999997, -20.901, -20.925899999999995, -20.4483, -20.59959999999999, -20.551799999999997, -20.4898, -20.6644, -20.370999999999995, -20.428999999999995, -20.502100000000002, -20.6447, -20.698000000000004, -20.706799999999998, -20.695300000000003, -20.3748, -20.5006, -20.672700000000003, -20.5842, -20.984299999999998, -20.535199999999996, -20.6721, -20.662099999999995, -20.7232], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3723272663295565, "mean_inference_ms": 146.5001316908647, "mean_action_processing_ms": 0.05180158453136819, "mean_env_wait_ms": 0.7780729318481906, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 68000, "agent_timesteps_total": 68000, "timers": {"sample_time_ms": 590954.149, "sample_throughput": 6.769, "learn_time_ms": 1294567.697, "learn_throughput": 3.09, "update_time_ms": 13.423}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.22286047833040357, "policy_loss": -0.24884735810337588, "vf_loss": 0.02405950706997828, "vf_explained_var": 0.9993934631347656, "kl": 0.0064245678804581985, "entropy": 693.1100730895996, "entropy_coeff": 0.0}}}, "num_steps_sampled": 68000, "num_agent_steps_sampled": 68000, "num_steps_trained": 68000}, "done": false, "episodes_total": 1360, "training_iteration": 17, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_00-41-20", "timestamp": 1618440080, "time_this_iter_s": 1885.0580430030823, "time_total_s": 32051.937313318253, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 32051.937313318253, "timesteps_since_restore": 0, "iterations_since_restore": 17, "perf": {"cpu_util_percent": 26.77865377463741, "ram_util_percent": 29.637039791744147}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.1933, "episode_reward_min": -21.049099999999996, "episode_reward_mean": -20.649545, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.59959999999999, -20.551799999999997, -20.4898, -20.6644, -20.370999999999995, -20.428999999999995, -20.502100000000002, -20.6447, -20.698000000000004, -20.706799999999998, -20.695300000000003, -20.3748, -20.5006, -20.672700000000003, -20.5842, -20.984299999999998, -20.535199999999996, -20.6721, -20.662099999999995, -20.7232, -20.636500000000005, -20.813100000000002, -20.746899999999997, -20.541, -20.469099999999994, -20.641599999999997, -20.548, -20.587099999999992, -20.44589999999999, -20.570299999999996, -20.7698, -20.602799999999995, -20.771400000000003, -20.431199999999993, -20.747200000000007, -20.798399999999994, -20.643099999999993, -20.707000000000004, -20.732400000000002, -20.9283, -20.8013, -20.839400000000005, -20.664099999999994, -20.774, -20.2149, -20.6543, -20.688999999999997, -20.8601, -20.347499999999997, -20.686700000000002, -20.686700000000002, -20.819600000000005, -20.655600000000003, -20.9143, -20.413700000000006, -20.630200000000006, -20.6046, -20.608700000000006, -20.406299999999998, -20.6801, -20.7456, -20.919099999999997, -20.872299999999992, -20.7953, -20.753300000000014, -20.6539, -20.5554, -20.531, -20.464700000000008, -20.3485, -20.659300000000005, -20.815499999999997, -20.866500000000006, -20.3826, -20.716500000000007, -20.372099999999996, -20.634300000000003, -20.7995, -20.763599999999993, -20.657000000000004, -20.7314, -20.67610000000001, -20.420199999999998, -20.7298, -20.1933, -21.049099999999996, -20.680700000000005, -20.586700000000004, -20.488399999999995, -20.6299, -20.696699999999996, -20.651700000000005, -20.9678, -20.8247, -20.786000000000005, -20.963900000000006, -20.549500000000002, -20.357999999999997, -20.888100000000005, -20.6326], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3723458257826696, "mean_inference_ms": 146.49820638246194, "mean_action_processing_ms": 0.05180891152380825, "mean_env_wait_ms": 0.7786111572849238, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 72000, "agent_timesteps_total": 72000, "timers": {"sample_time_ms": 590923.191, "sample_throughput": 6.769, "learn_time_ms": 1294578.855, "learn_throughput": 3.09, "update_time_ms": 13.403}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.22315283655188978, "policy_loss": -0.24295901460573077, "vf_loss": 0.01771352900823331, "vf_explained_var": 0.9996328353881836, "kl": 0.006975485564908013, "entropy": 693.1086883544922, "entropy_coeff": 0.0}}}, "num_steps_sampled": 72000, "num_agent_steps_sampled": 72000, "num_steps_trained": 72000}, "done": false, "episodes_total": 1440, "training_iteration": 18, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_01-12-46", "timestamp": 1618441966, "time_this_iter_s": 1886.409081697464, "time_total_s": 33938.34639501572, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 33938.34639501572, "timesteps_since_restore": 0, "iterations_since_restore": 18, "perf": {"cpu_util_percent": 27.085055762081787, "ram_util_percent": 29.447546468401484}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.1933, "episode_reward_min": -21.200100000000003, "episode_reward_mean": -20.669468000000002, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.7314, -20.67610000000001, -20.420199999999998, -20.7298, -20.1933, -21.049099999999996, -20.680700000000005, -20.586700000000004, -20.488399999999995, -20.6299, -20.696699999999996, -20.651700000000005, -20.9678, -20.8247, -20.786000000000005, -20.963900000000006, -20.549500000000002, -20.357999999999997, -20.888100000000005, -20.6326, -20.475599999999993, -20.9397, -20.505, -20.593700000000002, -20.699099999999998, -20.519800000000004, -20.438699999999997, -20.749100000000006, -20.417399999999997, -20.580699999999993, -21.200100000000003, -20.679700000000008, -20.720599999999997, -20.6056, -20.635999999999992, -20.6425, -20.984399999999994, -20.600499999999993, -20.566600000000005, -20.741400000000006, -20.887499999999992, -20.523500000000002, -20.8404, -20.5102, -20.619799999999998, -20.678700000000006, -20.238299999999995, -20.753, -20.624999999999996, -20.759199999999996, -20.542900000000003, -20.3315, -20.544500000000006, -20.7065, -20.559800000000006, -20.879800000000003, -20.850099999999998, -20.613200000000003, -20.877499999999998, -21.0212, -20.689300000000003, -20.579700000000003, -20.4797, -20.871399999999998, -20.757399999999997, -20.7879, -20.899, -20.72650000000001, -20.702900000000003, -20.6253, -20.537699999999997, -20.652699999999996, -20.6306, -20.647800000000004, -20.727300000000003, -20.575499999999998, -20.810100000000006, -20.6276, -20.731000000000005, -20.457, -20.408600000000007, -20.8971, -20.926699999999993, -20.608899999999995, -20.544399999999996, -20.551300000000005, -20.896200000000004, -20.5599, -20.689100000000003, -20.693599999999993, -20.641699999999997, -20.588600000000003, -20.5334, -20.713599999999996, -20.508200000000002, -20.579, -20.9522, -20.888700000000007, -20.299899999999994, -20.78590000000001], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.372366144564319, "mean_inference_ms": 146.49421731854022, "mean_action_processing_ms": 0.05179824356831801, "mean_env_wait_ms": 0.7784880526659131, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 76000, "agent_timesteps_total": 76000, "timers": {"sample_time_ms": 590864.839, "sample_throughput": 6.77, "learn_time_ms": 1294557.027, "learn_throughput": 3.09, "update_time_ms": 13.434}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2249826507177204, "policy_loss": -0.2440562886185944, "vf_loss": 0.01715167991551425, "vf_explained_var": 0.9996123313903809, "kl": 0.006406521599274129, "entropy": 693.1071128845215, "entropy_coeff": 0.0}}}, "num_steps_sampled": 76000, "num_agent_steps_sampled": 76000, "num_steps_trained": 76000}, "done": false, "episodes_total": 1520, "training_iteration": 19, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_01-44-11", "timestamp": 1618443851, "time_this_iter_s": 1884.9966249465942, "time_total_s": 35823.34301996231, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 35823.34301996231, "timesteps_since_restore": 0, "iterations_since_restore": 19, "perf": {"cpu_util_percent": 26.774191149126064, "ram_util_percent": 29.637300111565644}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.085299999999997, "episode_reward_min": -21.247500000000002, "episode_reward_mean": -20.63794, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.408600000000007, -20.8971, -20.926699999999993, -20.608899999999995, -20.544399999999996, -20.551300000000005, -20.896200000000004, -20.5599, -20.689100000000003, -20.693599999999993, -20.641699999999997, -20.588600000000003, -20.5334, -20.713599999999996, -20.508200000000002, -20.579, -20.9522, -20.888700000000007, -20.299899999999994, -20.78590000000001, -20.4534, -20.517999999999997, -20.632300000000004, -20.824600000000007, -20.726199999999995, -20.4553, -20.734399999999997, -21.024699999999996, -20.264, -20.750299999999996, -20.952399999999994, -20.319499999999998, -20.783700000000003, -20.607200000000002, -20.302999999999997, -20.196599999999997, -20.910800000000002, -20.823900000000002, -20.7082, -20.443399999999997, -20.749500000000005, -20.587799999999994, -20.331700000000005, -20.88640000000001, -20.4576, -20.5944, -20.757099999999998, -20.739900000000006, -20.411000000000005, -20.730900000000005, -20.5996, -20.3565, -20.409399999999998, -20.631299999999996, -20.790000000000003, -20.66160000000001, -20.8017, -20.5955, -20.7361, -20.843499999999995, -20.4192, -20.533399999999993, -20.630799999999997, -20.623200000000008, -20.369199999999996, -20.085299999999997, -20.424600000000005, -20.534599999999994, -20.538, -20.6991, -20.298499999999997, -20.6802, -20.618899999999996, -20.873100000000004, -20.753100000000003, -20.77990000000001, -20.7453, -20.813599999999997, -20.676999999999996, -20.561999999999998, -20.6558, -21.247500000000002, -20.528799999999997, -20.605099999999997, -20.6787, -20.8163, -20.737899999999993, -20.719399999999997, -20.818299999999997, -20.4803, -20.547299999999996, -20.754500000000004, -20.390700000000002, -20.453200000000002, -20.631200000000003, -21.003799999999995, -20.8187, -20.469499999999996, -20.614599999999996, -20.79300000000001], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3723421117865924, "mean_inference_ms": 146.4975645899476, "mean_action_processing_ms": 0.0517870976964404, "mean_env_wait_ms": 0.7774141759462495, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 80000, "agent_timesteps_total": 80000, "timers": {"sample_time_ms": 590928.582, "sample_throughput": 6.769, "learn_time_ms": 1294511.51, "learn_throughput": 3.09, "update_time_ms": 13.489}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23100904701277614, "policy_loss": -0.24810071056708694, "vf_loss": 0.015045782372453687, "vf_explained_var": 0.9997512102127075, "kl": 0.006819599177106284, "entropy": 693.1062755584717, "entropy_coeff": 0.0}}}, "num_steps_sampled": 80000, "num_agent_steps_sampled": 80000, "num_steps_trained": 80000}, "done": false, "episodes_total": 1600, "training_iteration": 20, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_02-15-37", "timestamp": 1618445737, "time_this_iter_s": 1885.893117427826, "time_total_s": 37709.23613739014, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 37709.23613739014, "timesteps_since_restore": 0, "iterations_since_restore": 20, "perf": {"cpu_util_percent": 27.014052044609667, "ram_util_percent": 29.44453531598513}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.181000000000008, "episode_reward_min": -21.247500000000002, "episode_reward_mean": -20.654722000000003, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.6558, -21.247500000000002, -20.528799999999997, -20.605099999999997, -20.6787, -20.8163, -20.737899999999993, -20.719399999999997, -20.818299999999997, -20.4803, -20.547299999999996, -20.754500000000004, -20.390700000000002, -20.453200000000002, -20.631200000000003, -21.003799999999995, -20.8187, -20.469499999999996, -20.614599999999996, -20.79300000000001, -20.557099999999995, -21.052300000000002, -20.860100000000003, -20.431100000000008, -20.884800000000002, -20.611200000000004, -20.484299999999994, -20.8394, -21.002800000000004, -20.6194, -20.624300000000005, -20.727799999999988, -20.408199999999994, -20.570999999999998, -20.665699999999998, -20.704499999999996, -20.338899999999995, -20.944300000000002, -20.790999999999997, -20.78770000000001, -20.753700000000002, -20.7511, -20.181000000000008, -20.8331, -20.379900000000006, -20.5556, -20.818499999999997, -20.3376, -20.9264, -20.990299999999998, -20.432199999999998, -20.344899999999996, -20.6414, -20.525199999999998, -20.630399999999998, -20.7173, -20.742399999999996, -20.7744, -20.475, -20.6185, -20.503, -20.811200000000003, -20.798400000000008, -20.7165, -20.263799999999993, -20.462000000000003, -20.56010000000001, -20.447299999999995, -20.5089, -20.854099999999995, -20.701600000000006, -20.620599999999996, -20.7041, -20.629500000000004, -20.677200000000006, -20.239700000000003, -20.649499999999993, -20.698900000000002, -20.645099999999996, -20.426500000000008, -20.72699999999999, -20.621899999999993, -20.543699999999998, -20.440699999999996, -20.840700000000002, -20.58030000000001, -20.753199999999996, -20.931800000000003, -20.787599999999998, -20.650199999999998, -20.6708, -20.637599999999996, -20.394599999999997, -20.691499999999994, -20.675799999999995, -20.517200000000003, -20.724099999999996, -20.6558, -20.712600000000002, -20.8977], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37231814204982683, "mean_inference_ms": 146.4944298922982, "mean_action_processing_ms": 0.051748903471357986, "mean_env_wait_ms": 0.7760458067240851, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 84000, "agent_timesteps_total": 84000, "timers": {"sample_time_ms": 590823.621, "sample_throughput": 6.77, "learn_time_ms": 1294577.831, "learn_throughput": 3.09, "update_time_ms": 13.489}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23021459608571604, "policy_loss": -0.24111864436417818, "vf_loss": 0.008742399437551285, "vf_explained_var": 0.9998021125793457, "kl": 0.007205489528132603, "entropy": 693.1045742034912, "entropy_coeff": 0.0}}}, "num_steps_sampled": 84000, "num_agent_steps_sampled": 84000, "num_steps_trained": 84000}, "done": false, "episodes_total": 1680, "training_iteration": 21, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_02-47-02", "timestamp": 1618447622, "time_this_iter_s": 1885.1406903266907, "time_total_s": 39594.37682771683, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 39594.37682771683, "timesteps_since_restore": 0, "iterations_since_restore": 21, "perf": {"cpu_util_percent": 26.775604313871323, "ram_util_percent": 29.63756043138713}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.2949, "episode_reward_min": -21.080000000000002, "episode_reward_mean": -20.651984000000002, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.72699999999999, -20.621899999999993, -20.543699999999998, -20.440699999999996, -20.840700000000002, -20.58030000000001, -20.753199999999996, -20.931800000000003, -20.787599999999998, -20.650199999999998, -20.6708, -20.637599999999996, -20.394599999999997, -20.691499999999994, -20.675799999999995, -20.517200000000003, -20.724099999999996, -20.6558, -20.712600000000002, -20.8977, -20.905799999999996, -20.88510000000001, -20.849999999999998, -20.725900000000006, -20.495499999999993, -20.550299999999996, -20.5313, -20.453399999999995, -20.6975, -20.454500000000003, -20.374100000000002, -20.705000000000005, -20.5562, -20.653600000000004, -20.6207, -20.602799999999995, -20.803299999999997, -20.408700000000007, -20.6155, -20.439799999999995, -20.813800000000004, -20.586199999999998, -20.4962, -20.708399999999994, -20.5795, -20.652399999999997, -20.613999999999997, -20.553200000000004, -20.555, -20.736400000000003, -20.4092, -20.604499999999994, -21.080000000000002, -20.734, -20.480899999999995, -20.350399999999997, -20.56959999999999, -20.3506, -20.778000000000002, -20.612399999999997, -20.377699999999994, -20.752000000000006, -20.6306, -20.347800000000003, -20.703200000000002, -20.639400000000002, -20.6298, -20.711700000000004, -20.792000000000005, -20.76309999999999, -20.90930000000001, -20.764699999999994, -20.768799999999995, -20.8385, -20.909599999999998, -20.488100000000003, -20.899600000000003, -20.2949, -20.382599999999996, -20.8241, -20.9891, -20.94130000000001, -20.611400000000003, -20.6568, -20.568300000000008, -20.503499999999995, -20.63349999999999, -20.5703, -20.481299999999994, -20.7006, -20.634899999999995, -20.7672, -20.790500000000005, -20.747700000000005, -20.9361, -20.45040000000001, -20.450499999999998, -20.773400000000002, -20.913999999999998, -20.601599999999994], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37220076654045364, "mean_inference_ms": 146.49244860530933, "mean_action_processing_ms": 0.051695074157213236, "mean_env_wait_ms": 0.7722438259696495, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 88000, "agent_timesteps_total": 88000, "timers": {"sample_time_ms": 590846.168, "sample_throughput": 6.77, "learn_time_ms": 1294480.117, "learn_throughput": 3.09, "update_time_ms": 13.482}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.22913688281551003, "policy_loss": -0.2447925927117467, "vf_loss": 0.013742832656589599, "vf_explained_var": 0.999664306640625, "kl": 0.006376264849677682, "entropy": 693.1039714813232, "entropy_coeff": 0.0}}}, "num_steps_sampled": 88000, "num_agent_steps_sampled": 88000, "num_steps_trained": 88000}, "done": false, "episodes_total": 1760, "training_iteration": 22, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_03-18-27", "timestamp": 1618449507, "time_this_iter_s": 1884.712951183319, "time_total_s": 41479.08977890015, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 41479.08977890015, "timesteps_since_restore": 0, "iterations_since_restore": 22, "perf": {"cpu_util_percent": 27.00397917441428, "ram_util_percent": 29.44421718110822}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.2747, "episode_reward_min": -21.059899999999995, "episode_reward_mean": -20.651522, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.9891, -20.94130000000001, -20.611400000000003, -20.6568, -20.568300000000008, -20.503499999999995, -20.63349999999999, -20.5703, -20.481299999999994, -20.7006, -20.634899999999995, -20.7672, -20.790500000000005, -20.747700000000005, -20.9361, -20.45040000000001, -20.450499999999998, -20.773400000000002, -20.913999999999998, -20.601599999999994, -20.627, -20.90020000000001, -20.811599999999995, -20.434500000000003, -20.510900000000007, -20.360699999999994, -20.376100000000005, -20.334500000000006, -20.4259, -20.441699999999997, -20.462800000000005, -20.777899999999995, -20.640800000000002, -20.911500000000007, -20.2863, -20.712799999999998, -20.524800000000003, -20.7699, -20.9131, -20.628399999999996, -20.468899999999994, -20.5118, -21.059899999999995, -20.7822, -20.5084, -20.6039, -20.717399999999994, -20.652100000000004, -20.409, -20.5721, -20.2747, -20.7767, -20.996199999999995, -20.3958, -20.7538, -20.8908, -20.645699999999998, -20.6294, -20.639799999999997, -20.361600000000006, -20.4818, -20.712500000000002, -20.524199999999997, -20.808900000000005, -20.814600000000002, -20.328, -20.828699999999998, -20.389799999999997, -20.839399999999994, -20.935500000000005, -20.310600000000008, -20.6445, -20.412899999999997, -20.692000000000004, -20.514599999999998, -20.6826, -20.69189999999999, -20.591500000000003, -20.709100000000003, -20.843699999999995, -20.734399999999994, -20.6555, -20.895199999999996, -20.872099999999993, -20.5582, -20.58279999999999, -20.692899999999998, -20.778200000000005, -20.540800000000004, -20.664600000000004, -20.761799999999997, -20.636799999999997, -20.709900000000005, -20.528599999999997, -20.79220000000001, -20.932300000000005, -20.780999999999995, -20.77520000000001, -20.571300000000004, -20.707599999999996], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37207648190559295, "mean_inference_ms": 146.4836185377606, "mean_action_processing_ms": 0.051660624355717104, "mean_env_wait_ms": 0.7680276402162951, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 92000, "agent_timesteps_total": 92000, "timers": {"sample_time_ms": 590713.665, "sample_throughput": 6.771, "learn_time_ms": 1294515.584, "learn_throughput": 3.09, "update_time_ms": 13.49}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2204457934712991, "policy_loss": -0.23550260055344552, "vf_loss": 0.013037414355153487, "vf_explained_var": 0.9998165369033813, "kl": 0.006731304645654745, "entropy": 693.103437423706, "entropy_coeff": 0.0}}}, "num_steps_sampled": 92000, "num_agent_steps_sampled": 92000, "num_steps_trained": 92000}, "done": false, "episodes_total": 1840, "training_iteration": 23, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_03-49-51", "timestamp": 1618451391, "time_this_iter_s": 1884.2020180225372, "time_total_s": 43363.291796922684, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 43363.291796922684, "timesteps_since_restore": 0, "iterations_since_restore": 23, "perf": {"cpu_util_percent": 26.843452380952375, "ram_util_percent": 29.639880952380953}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.250099999999996, "episode_reward_min": -20.9645, "episode_reward_mean": -20.646427000000003, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.734399999999994, -20.6555, -20.895199999999996, -20.872099999999993, -20.5582, -20.58279999999999, -20.692899999999998, -20.778200000000005, -20.540800000000004, -20.664600000000004, -20.761799999999997, -20.636799999999997, -20.709900000000005, -20.528599999999997, -20.79220000000001, -20.932300000000005, -20.780999999999995, -20.77520000000001, -20.571300000000004, -20.707599999999996, -20.807, -20.405400000000004, -20.793400000000002, -20.6638, -20.524299999999997, -20.568399999999997, -20.638299999999994, -20.590400000000006, -20.9645, -20.781800000000004, -20.5452, -20.6964, -20.607900000000004, -20.700600000000005, -20.940299999999997, -20.337100000000003, -20.755, -20.510299999999997, -20.838699999999996, -20.9504, -20.6723, -20.4955, -20.534100000000006, -20.5922, -20.5429, -20.793400000000002, -20.6867, -20.7712, -20.294099999999997, -20.765299999999996, -20.392899999999994, -20.616200000000003, -20.612500000000008, -20.777300000000004, -20.7539, -20.685, -20.510299999999997, -20.5454, -20.566100000000006, -20.581100000000003, -20.5265, -20.5183, -20.851100000000002, -20.579399999999996, -20.655700000000007, -20.767300000000002, -20.679999999999996, -20.576299999999996, -20.914300000000008, -20.625499999999995, -20.572999999999993, -20.835399999999996, -20.677699999999998, -20.950299999999995, -20.492900000000002, -20.548, -20.528699999999994, -20.642800000000005, -20.621000000000002, -20.250099999999996, -20.513399999999997, -20.722000000000005, -20.473599999999998, -20.5183, -20.456100000000003, -20.6833, -20.6152, -20.522999999999996, -20.66930000000001, -20.6982, -20.698999999999998, -20.559700000000007, -20.751400000000004, -20.5525, -20.495000000000005, -20.7327, -20.58020000000001, -20.403500000000008, -20.724299999999996, -20.478700000000003], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3720181321412698, "mean_inference_ms": 146.4825154364355, "mean_action_processing_ms": 0.05163975554895129, "mean_env_wait_ms": 0.7658623055343395, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 96000, "agent_timesteps_total": 96000, "timers": {"sample_time_ms": 590671.241, "sample_throughput": 6.772, "learn_time_ms": 1294425.078, "learn_throughput": 3.09, "update_time_ms": 13.468}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.22703030903358012, "policy_loss": -0.2456137929111719, "vf_loss": 0.016736622318944683, "vf_explained_var": 0.9997180700302124, "kl": 0.006156224117148668, "entropy": 693.103328704834, "entropy_coeff": 0.0}}}, "num_steps_sampled": 96000, "num_agent_steps_sampled": 96000, "num_steps_trained": 96000}, "done": false, "episodes_total": 1920, "training_iteration": 24, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_04-21-16", "timestamp": 1618453276, "time_this_iter_s": 1884.6912705898285, "time_total_s": 45247.98306751251, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 45247.98306751251, "timesteps_since_restore": 0, "iterations_since_restore": 24, "perf": {"cpu_util_percent": 27.08958333333333, "ram_util_percent": 29.447209821428572}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.306900000000002, "episode_reward_min": -21.026100000000003, "episode_reward_mean": -20.653594000000002, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.513399999999997, -20.722000000000005, -20.473599999999998, -20.5183, -20.456100000000003, -20.6833, -20.6152, -20.522999999999996, -20.66930000000001, -20.6982, -20.698999999999998, -20.559700000000007, -20.751400000000004, -20.5525, -20.495000000000005, -20.7327, -20.58020000000001, -20.403500000000008, -20.724299999999996, -20.478700000000003, -20.735899999999997, -20.513600000000004, -20.5212, -20.623800000000003, -20.862700000000004, -20.908899999999996, -20.779200000000003, -20.749700000000008, -20.543699999999998, -20.544, -20.5393, -20.717800000000004, -20.546300000000002, -20.376200000000004, -20.7978, -20.709799999999998, -20.736400000000003, -20.8875, -20.69, -20.4827, -20.951899999999995, -20.7215, -20.785, -20.889100000000006, -20.8068, -20.699400000000004, -20.6416, -20.450600000000005, -20.487500000000004, -20.577399999999997, -20.625800000000005, -20.795100000000005, -20.905899999999995, -20.792000000000005, -20.590799999999998, -20.997099999999996, -20.7341, -20.651500000000006, -20.7268, -20.562999999999995, -20.458599999999997, -20.8479, -20.59509999999999, -20.549699999999998, -20.737100000000005, -20.4093, -20.731699999999993, -20.776899999999998, -21.026100000000003, -20.306900000000002, -20.653599999999997, -20.85, -20.6657, -20.541699999999995, -20.662399999999998, -20.526599999999995, -20.4875, -20.6178, -20.5532, -20.7443, -20.544700000000002, -20.707599999999996, -20.5616, -20.534999999999997, -20.669900000000005, -20.837699999999998, -20.646100000000004, -20.767999999999997, -20.669100000000004, -20.722500000000004, -20.511599999999994, -20.608900000000002, -20.65650000000001, -20.358899999999995, -20.694300000000002, -20.6197, -20.75690000000001, -20.4818, -20.973099999999995, -20.7846], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37197766664006465, "mean_inference_ms": 146.4774145898023, "mean_action_processing_ms": 0.051631167602646695, "mean_env_wait_ms": 0.7641696705807862, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 100000, "agent_timesteps_total": 100000, "timers": {"sample_time_ms": 590641.438, "sample_throughput": 6.772, "learn_time_ms": 1294629.659, "learn_throughput": 3.09, "update_time_ms": 13.471}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.24505892605520785, "policy_loss": -0.25163770373910666, "vf_loss": 0.0043459544943971196, "vf_explained_var": 0.9999750852584839, "kl": 0.0074427525396458805, "entropy": 693.1005153656006, "entropy_coeff": 0.0}}}, "num_steps_sampled": 100000, "num_agent_steps_sampled": 100000, "num_steps_trained": 100000}, "done": false, "episodes_total": 2000, "training_iteration": 25, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_04-52-42", "timestamp": 1618455162, "time_this_iter_s": 1886.431224822998, "time_total_s": 47134.41429233551, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 47134.41429233551, "timesteps_since_restore": 0, "iterations_since_restore": 25, "perf": {"cpu_util_percent": 26.831772575250838, "ram_util_percent": 29.639873652917128}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.1638, "episode_reward_min": -21.070999999999998, "episode_reward_mean": -20.647373, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.544700000000002, -20.707599999999996, -20.5616, -20.534999999999997, -20.669900000000005, -20.837699999999998, -20.646100000000004, -20.767999999999997, -20.669100000000004, -20.722500000000004, -20.511599999999994, -20.608900000000002, -20.65650000000001, -20.358899999999995, -20.694300000000002, -20.6197, -20.75690000000001, -20.4818, -20.973099999999995, -20.7846, -20.555100000000003, -20.887500000000003, -20.619900000000005, -20.765400000000007, -20.539099999999998, -20.575, -20.896000000000008, -20.649099999999997, -20.4386, -20.613400000000006, -20.52060000000001, -20.7533, -20.475800000000007, -20.591500000000003, -20.615500000000004, -20.5718, -21.070999999999998, -20.4907, -20.928499999999996, -20.791000000000007, -20.749400000000005, -20.703300000000002, -20.8434, -20.8607, -20.746100000000002, -20.735699999999998, -20.749899999999997, -20.65559999999999, -20.70079999999999, -20.6497, -20.838, -20.354899999999997, -20.484299999999998, -20.897799999999997, -20.6049, -20.4099, -20.8222, -20.710000000000008, -20.805699999999998, -20.560000000000002, -20.5287, -20.453000000000003, -20.6977, -20.637299999999996, -20.311999999999998, -20.8679, -20.792900000000003, -20.511800000000004, -20.406000000000002, -20.523200000000003, -20.495800000000003, -20.1638, -20.5302, -20.607199999999995, -20.733200000000004, -20.868100000000002, -20.799599999999998, -20.469100000000005, -20.8894, -20.70530000000001, -20.52949999999999, -20.695099999999996, -20.825100000000003, -20.623599999999993, -20.563100000000006, -20.340099999999993, -20.713200000000004, -20.756400000000006, -20.492900000000002, -20.436099999999993, -20.9083, -20.8438, -20.298799999999996, -20.732300000000002, -20.4938, -20.695500000000003, -20.580699999999997, -20.613699999999998, -20.6023, -20.6612], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3719153969666842, "mean_inference_ms": 146.47397799088122, "mean_action_processing_ms": 0.05161963447962034, "mean_env_wait_ms": 0.7619337913719016, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 104000, "agent_timesteps_total": 104000, "timers": {"sample_time_ms": 590610.64, "sample_throughput": 6.773, "learn_time_ms": 1294502.102, "learn_throughput": 3.09, "update_time_ms": 13.487}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23351323208771646, "policy_loss": -0.24198565888218582, "vf_loss": 0.006278803933128074, "vf_explained_var": 0.9999083280563354, "kl": 0.007312076704693027, "entropy": 693.0985622406006, "entropy_coeff": 0.0}}}, "num_steps_sampled": 104000, "num_agent_steps_sampled": 104000, "num_steps_trained": 104000}, "done": false, "episodes_total": 2080, "training_iteration": 26, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_05-24-06", "timestamp": 1618457046, "time_this_iter_s": 1883.8147802352905, "time_total_s": 49018.2290725708, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 49018.2290725708, "timesteps_since_restore": 0, "iterations_since_restore": 26, "perf": {"cpu_util_percent": 26.970476545048403, "ram_util_percent": 29.445085629188384}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.162900000000008, "episode_reward_min": -21.027199999999993, "episode_reward_mean": -20.658039000000002, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.52949999999999, -20.695099999999996, -20.825100000000003, -20.623599999999993, -20.563100000000006, -20.340099999999993, -20.713200000000004, -20.756400000000006, -20.492900000000002, -20.436099999999993, -20.9083, -20.8438, -20.298799999999996, -20.732300000000002, -20.4938, -20.695500000000003, -20.580699999999997, -20.613699999999998, -20.6023, -20.6612, -20.605400000000007, -20.606200000000005, -20.423200000000005, -20.7179, -20.738999999999997, -20.922499999999992, -20.6632, -20.545600000000004, -20.534499999999998, -20.770300000000006, -20.84130000000001, -20.503700000000006, -20.6631, -20.862600000000004, -20.6029, -20.6202, -20.575300000000002, -20.625099999999996, -20.162900000000008, -20.7805, -20.911599999999993, -20.497899999999998, -20.5343, -20.724699999999995, -20.665600000000005, -20.635000000000005, -20.3462, -20.967, -20.490599999999997, -20.569699999999997, -20.7309, -20.8107, -20.3881, -20.925600000000003, -20.761599999999998, -20.781999999999996, -20.520199999999992, -20.575800000000005, -20.552599999999998, -20.674500000000002, -20.472900000000006, -20.643900000000002, -20.6021, -20.630800000000004, -20.744699999999995, -20.5946, -20.627300000000005, -20.526999999999997, -20.751300000000004, -20.882500000000004, -20.834500000000013, -20.7641, -20.812099999999997, -20.550200000000004, -20.843999999999998, -20.597800000000003, -20.7717, -20.291100000000007, -20.842200000000002, -20.658700000000007, -20.689099999999996, -20.8179, -20.9966, -20.5302, -20.5492, -20.674100000000003, -20.577099999999998, -20.540900000000008, -20.8093, -20.445100000000007, -20.7704, -20.782699999999995, -21.027199999999993, -20.671400000000002, -20.644, -20.6123, -20.825400000000005, -21.0096, -20.6964, -20.456000000000003], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37189553798685315, "mean_inference_ms": 146.47269351387718, "mean_action_processing_ms": 0.051628252188133425, "mean_env_wait_ms": 0.7612565252318769, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 108000, "agent_timesteps_total": 108000, "timers": {"sample_time_ms": 590603.165, "sample_throughput": 6.773, "learn_time_ms": 1294467.489, "learn_throughput": 3.09, "update_time_ms": 13.51}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2271666378946975, "policy_loss": -0.23536799545399845, "vf_loss": 0.006127870509942568, "vf_explained_var": 0.9998664855957031, "kl": 0.006911609903909266, "entropy": 693.0976657867432, "entropy_coeff": 0.0}}}, "num_steps_sampled": 108000, "num_agent_steps_sampled": 108000, "num_steps_trained": 108000}, "done": false, "episodes_total": 2160, "training_iteration": 27, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_05-55-31", "timestamp": 1618458931, "time_this_iter_s": 1884.6371097564697, "time_total_s": 50902.86618232727, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 50902.86618232727, "timesteps_since_restore": 0, "iterations_since_restore": 27, "perf": {"cpu_util_percent": 26.984598214285718, "ram_util_percent": 29.64534970238096}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.271499999999996, "episode_reward_min": -21.0479, "episode_reward_mean": -20.651186, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.689099999999996, -20.8179, -20.9966, -20.5302, -20.5492, -20.674100000000003, -20.577099999999998, -20.540900000000008, -20.8093, -20.445100000000007, -20.7704, -20.782699999999995, -21.027199999999993, -20.671400000000002, -20.644, -20.6123, -20.825400000000005, -21.0096, -20.6964, -20.456000000000003, -20.528900000000004, -20.553799999999995, -20.271499999999996, -20.753600000000002, -20.534299999999998, -20.5148, -20.688299999999998, -20.664699999999993, -20.894299999999994, -20.518900000000006, -21.0479, -20.691599999999994, -20.788000000000004, -20.673200000000005, -20.7389, -20.452400000000004, -20.7225, -20.611, -20.606899999999992, -20.578200000000002, -20.6855, -20.55290000000001, -20.501499999999993, -20.53140000000001, -20.3272, -20.421199999999995, -20.690699999999993, -20.594599999999993, -20.444399999999998, -20.671100000000003, -20.787899999999997, -20.772499999999997, -20.7276, -20.518399999999996, -20.616399999999995, -20.577800000000007, -20.41819999999999, -20.696100000000005, -20.737599999999997, -20.765099999999997, -20.336999999999996, -20.697400000000005, -20.7081, -20.997400000000003, -20.547800000000002, -20.5982, -20.6952, -20.5272, -20.43449999999999, -20.758900000000008, -20.851100000000002, -20.796099999999992, -20.572000000000003, -20.3553, -20.514799999999994, -20.5193, -20.83640000000001, -20.504, -20.708, -20.975899999999996, -20.6539, -20.5016, -20.8585, -20.7377, -20.505, -20.425100000000004, -20.713200000000004, -20.711699999999993, -20.5312, -20.824299999999994, -20.721000000000004, -20.699799999999993, -20.702999999999992, -20.618199999999998, -20.599900000000005, -20.9962, -20.5024, -20.5589, -20.615399999999994, -20.7103], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37191083426048477, "mean_inference_ms": 146.47502770617092, "mean_action_processing_ms": 0.051648534265781816, "mean_env_wait_ms": 0.7618702181543923, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 112000, "agent_timesteps_total": 112000, "timers": {"sample_time_ms": 590636.73, "sample_throughput": 6.772, "learn_time_ms": 1294418.434, "learn_throughput": 3.09, "update_time_ms": 13.523}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.22437080671079457, "policy_loss": -0.23887600330635905, "vf_loss": 0.012328344345519326, "vf_explained_var": 0.9998055100440979, "kl": 0.007256159296957776, "entropy": 693.0950126647949, "entropy_coeff": 0.0}}}, "num_steps_sampled": 112000, "num_agent_steps_sampled": 112000, "num_steps_trained": 112000}, "done": false, "episodes_total": 2240, "training_iteration": 28, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_06-26-57", "timestamp": 1618460817, "time_this_iter_s": 1886.25368475914, "time_total_s": 52789.11986708641, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 52789.11986708641, "timesteps_since_restore": 0, "iterations_since_restore": 28, "perf": {"cpu_util_percent": 26.77447045707915, "ram_util_percent": 29.437495354886654}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.21200000000001, "episode_reward_min": -21.102599999999995, "episode_reward_mean": -20.645587999999996, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.6539, -20.5016, -20.8585, -20.7377, -20.505, -20.425100000000004, -20.713200000000004, -20.711699999999993, -20.5312, -20.824299999999994, -20.721000000000004, -20.699799999999993, -20.702999999999992, -20.618199999999998, -20.599900000000005, -20.9962, -20.5024, -20.5589, -20.615399999999994, -20.7103, -20.2999, -20.5845, -20.316799999999997, -20.992499999999996, -20.643000000000004, -20.964000000000002, -20.463799999999996, -20.662499999999998, -20.909099999999995, -20.596, -20.587600000000005, -21.102599999999995, -20.532200000000003, -20.626199999999994, -21.0096, -20.621000000000002, -20.728099999999994, -20.746199999999998, -20.5831, -20.770699999999994, -20.57520000000001, -20.21200000000001, -20.411299999999997, -20.41539999999999, -20.343200000000007, -20.7048, -20.5277, -20.9486, -20.664199999999997, -20.872600000000006, -20.337800000000005, -20.6966, -20.451000000000004, -20.3813, -20.658900000000003, -20.8479, -20.620500000000003, -20.637999999999998, -20.7021, -20.863100000000003, -20.9478, -20.798900000000007, -20.8589, -20.46779999999999, -20.524099999999997, -20.741600000000005, -20.8222, -20.556099999999994, -20.8504, -20.7848, -20.576500000000006, -20.571199999999997, -20.725500000000004, -20.329000000000004, -20.620000000000005, -20.3525, -20.532999999999994, -20.641099999999998, -20.583400000000005, -20.532099999999996, -20.246100000000006, -20.737699999999997, -20.620000000000005, -20.96699999999999, -20.508499999999994, -20.735099999999992, -20.636699999999998, -20.515399999999993, -20.7405, -20.841600000000003, -20.595999999999993, -20.4998, -20.4108, -20.541600000000003, -20.7304, -20.804000000000002, -20.825499999999998, -20.6091, -20.739199999999997, -20.637999999999998], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37195476938472055, "mean_inference_ms": 146.4861464011315, "mean_action_processing_ms": 0.05166549823905425, "mean_env_wait_ms": 0.7625802972881245, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 116000, "agent_timesteps_total": 116000, "timers": {"sample_time_ms": 590819.741, "sample_throughput": 6.77, "learn_time_ms": 1294479.603, "learn_throughput": 3.09, "update_time_ms": 13.496}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.22803771495819092, "policy_loss": -0.24155639577656984, "vf_loss": 0.01152498211973807, "vf_explained_var": 0.9998551607131958, "kl": 0.006645675748586655, "entropy": 693.0952110290527, "entropy_coeff": 0.0}}}, "num_steps_sampled": 116000, "num_agent_steps_sampled": 116000, "num_steps_trained": 116000}, "done": false, "episodes_total": 2320, "training_iteration": 29, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_06-58-25", "timestamp": 1618462705, "time_this_iter_s": 1887.4379031658173, "time_total_s": 54676.55777025223, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 54676.55777025223, "timesteps_since_restore": 0, "iterations_since_restore": 29, "perf": {"cpu_util_percent": 27.569205052005945, "ram_util_percent": 29.65583209509659}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.226, "episode_reward_min": -20.96699999999999, "episode_reward_mean": -20.630081, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.246100000000006, -20.737699999999997, -20.620000000000005, -20.96699999999999, -20.508499999999994, -20.735099999999992, -20.636699999999998, -20.515399999999993, -20.7405, -20.841600000000003, -20.595999999999993, -20.4998, -20.4108, -20.541600000000003, -20.7304, -20.804000000000002, -20.825499999999998, -20.6091, -20.739199999999997, -20.637999999999998, -20.546400000000006, -20.6222, -20.606499999999997, -20.656, -20.6119, -20.226, -20.779800000000005, -20.591700000000003, -20.6043, -20.4491, -20.458700000000004, -20.836000000000002, -20.5581, -20.536900000000003, -20.5657, -20.934299999999997, -20.656100000000002, -20.820999999999998, -20.939999999999994, -20.641799999999996, -20.597700000000003, -20.457799999999995, -20.845100000000013, -20.763399999999997, -20.686100000000007, -20.867900000000002, -20.6344, -20.635800000000003, -20.565199999999997, -20.578499999999995, -20.659099999999995, -20.707199999999997, -20.864900000000002, -20.317499999999995, -20.668599999999998, -20.424400000000002, -20.5346, -20.7709, -20.3914, -20.650800000000004, -20.7058, -20.5503, -20.710900000000002, -20.723099999999995, -20.602000000000004, -20.584600000000012, -20.381600000000002, -20.848499999999994, -20.608599999999996, -20.553900000000006, -20.5257, -20.7845, -20.6438, -20.5725, -20.607300000000006, -20.958000000000002, -20.816999999999997, -20.6786, -20.602999999999998, -20.796899999999997, -20.541499999999996, -20.555300000000003, -20.950200000000006, -20.5739, -20.584400000000002, -20.298700000000004, -20.514300000000002, -20.451999999999998, -20.6056, -20.7993, -20.6982, -20.6855, -20.625699999999995, -20.258000000000003, -20.757699999999996, -20.417399999999994, -20.5616, -20.282900000000005, -20.661800000000003, -20.720699999999997], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3718943086902016, "mean_inference_ms": 146.48624443301705, "mean_action_processing_ms": 0.051639500890381634, "mean_env_wait_ms": 0.760039904780308, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 120000, "agent_timesteps_total": 120000, "timers": {"sample_time_ms": 590699.732, "sample_throughput": 6.772, "learn_time_ms": 1294603.234, "learn_throughput": 3.09, "update_time_ms": 13.484}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23356322071049362, "policy_loss": -0.24178679031319916, "vf_loss": 0.006181207423878732, "vf_explained_var": 0.999911367893219, "kl": 0.006807861267589033, "entropy": 693.0959243774414, "entropy_coeff": 0.0}}}, "num_steps_sampled": 120000, "num_agent_steps_sampled": 120000, "num_steps_trained": 120000}, "done": false, "episodes_total": 2400, "training_iteration": 30, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_07-29-51", "timestamp": 1618464591, "time_this_iter_s": 1885.9294044971466, "time_total_s": 56562.487174749374, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 56562.487174749374, "timesteps_since_restore": 0, "iterations_since_restore": 30, "perf": {"cpu_util_percent": 26.782000743770915, "ram_util_percent": 29.4387132763109}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.152399999999997, "episode_reward_min": -21.055300000000006, "episode_reward_mean": -20.62201, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.541499999999996, -20.555300000000003, -20.950200000000006, -20.5739, -20.584400000000002, -20.298700000000004, -20.514300000000002, -20.451999999999998, -20.6056, -20.7993, -20.6982, -20.6855, -20.625699999999995, -20.258000000000003, -20.757699999999996, -20.417399999999994, -20.5616, -20.282900000000005, -20.661800000000003, -20.720699999999997, -20.5394, -20.445499999999996, -20.517799999999994, -20.8352, -20.759900000000005, -20.85260000000001, -20.8855, -20.99170000000001, -20.7612, -20.264599999999998, -20.5907, -20.486, -20.644499999999994, -20.7472, -20.6339, -20.549500000000002, -20.6392, -20.474900000000005, -20.4381, -20.6468, -20.575699999999998, -20.545699999999997, -20.772800000000004, -20.455700000000007, -20.947299999999995, -20.5237, -20.8185, -20.686199999999996, -20.783200000000004, -20.642599999999998, -20.3179, -20.721299999999992, -20.669999999999998, -20.779799999999998, -20.309299999999997, -20.505499999999998, -20.550800000000002, -20.213, -20.3414, -20.933399999999995, -20.8091, -20.428700000000003, -20.538900000000005, -20.906000000000002, -20.495500000000007, -20.511499999999995, -20.700999999999997, -20.681400000000004, -20.527699999999996, -20.78849999999999, -20.4166, -20.724399999999996, -20.6956, -20.7078, -21.055300000000006, -20.522200000000005, -20.6732, -20.1767, -20.87369999999999, -20.152399999999997, -20.717200000000002, -20.578699999999998, -20.733699999999995, -20.460600000000003, -20.691300000000005, -20.7368, -20.5988, -20.770700000000005, -20.689099999999996, -20.927700000000005, -20.6413, -20.643100000000004, -20.6838, -20.616100000000007, -20.604999999999993, -20.595600000000005, -20.806199999999997, -20.933500000000002, -20.5035, -20.539399999999993], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3718804121057323, "mean_inference_ms": 146.48616796362495, "mean_action_processing_ms": 0.05162338718050135, "mean_env_wait_ms": 0.7589205297277304, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 124000, "agent_timesteps_total": 124000, "timers": {"sample_time_ms": 590744.976, "sample_throughput": 6.771, "learn_time_ms": 1294572.125, "learn_throughput": 3.09, "update_time_ms": 13.499}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.24098415474873036, "policy_loss": -0.245600143680349, "vf_loss": 0.0024413660881918986, "vf_explained_var": 0.999980092048645, "kl": 0.007248725029057823, "entropy": 693.0940208435059, "entropy_coeff": 0.0}}}, "num_steps_sampled": 124000, "num_agent_steps_sampled": 124000, "num_steps_trained": 124000}, "done": false, "episodes_total": 2480, "training_iteration": 31, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_08-01-16", "timestamp": 1618466476, "time_this_iter_s": 1885.282462835312, "time_total_s": 58447.769637584686, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 58447.769637584686, "timesteps_since_restore": 0, "iterations_since_restore": 31, "perf": {"cpu_util_percent": 26.98252788104089, "ram_util_percent": 29.642899628252785}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -19.977499999999996, "episode_reward_min": -21.0362, "episode_reward_mean": -20.645734, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.717200000000002, -20.578699999999998, -20.733699999999995, -20.460600000000003, -20.691300000000005, -20.7368, -20.5988, -20.770700000000005, -20.689099999999996, -20.927700000000005, -20.6413, -20.643100000000004, -20.6838, -20.616100000000007, -20.604999999999993, -20.595600000000005, -20.806199999999997, -20.933500000000002, -20.5035, -20.539399999999993, -20.7804, -20.509500000000003, -20.777400000000004, -20.8085, -20.745400000000004, -20.727999999999998, -20.573000000000004, -20.802999999999997, -20.6271, -20.617400000000004, -20.29140000000001, -20.6356, -20.8457, -20.5781, -20.578399999999995, -20.950400000000002, -20.775, -20.855099999999997, -20.7033, -20.600700000000003, -20.568799999999996, -20.598, -20.589799999999997, -20.426999999999996, -20.7894, -20.800799999999995, -21.011900000000004, -20.430200000000003, -20.5091, -20.642599999999995, -21.0362, -20.77, -20.233600000000003, -20.767100000000006, -20.556099999999994, -20.791700000000006, -20.651999999999994, -20.4256, -20.5178, -20.632299999999997, -20.756400000000006, -20.589899999999997, -20.879700000000003, -20.45680000000001, -20.804400000000008, -20.976799999999997, -20.758100000000002, -20.8751, -20.650600000000008, -20.713900000000006, -20.444799999999994, -20.454700000000003, -20.7469, -20.745099999999994, -20.477000000000007, -20.587499999999995, -20.6436, -20.6749, -20.748700000000007, -20.682500000000005, -20.868700000000004, -20.4481, -20.592, -20.381200000000003, -20.485300000000002, -20.6806, -20.5068, -19.977499999999996, -20.427999999999994, -20.5495, -20.508200000000002, -20.480900000000002, -20.856, -20.4844, -20.789300000000004, -20.429899999999993, -20.623900000000003, -20.407900000000005, -20.683099999999996, -20.719200000000008], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3718800446515243, "mean_inference_ms": 146.4882603809952, "mean_action_processing_ms": 0.05162081872151596, "mean_env_wait_ms": 0.7589376315352115, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 128000, "agent_timesteps_total": 128000, "timers": {"sample_time_ms": 590819.542, "sample_throughput": 6.77, "learn_time_ms": 1294609.528, "learn_throughput": 3.09, "update_time_ms": 13.468}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2359600713243708, "policy_loss": -0.24043913558125496, "vf_loss": 0.002357352882697228, "vf_explained_var": 0.9999706745147705, "kl": 0.007072385531500913, "entropy": 693.0943641662598, "entropy_coeff": 0.0}}}, "num_steps_sampled": 128000, "num_agent_steps_sampled": 128000, "num_steps_trained": 128000}, "done": false, "episodes_total": 2560, "training_iteration": 32, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_08-32-42", "timestamp": 1618468362, "time_this_iter_s": 1885.831934928894, "time_total_s": 60333.60157251358, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 60333.60157251358, "timesteps_since_restore": 0, "iterations_since_restore": 32, "perf": {"cpu_util_percent": 26.784163568773238, "ram_util_percent": 29.43758364312268}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -19.977499999999996, "episode_reward_min": -21.0845, "episode_reward_mean": -20.643293, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.868700000000004, -20.4481, -20.592, -20.381200000000003, -20.485300000000002, -20.6806, -20.5068, -19.977499999999996, -20.427999999999994, -20.5495, -20.508200000000002, -20.480900000000002, -20.856, -20.4844, -20.789300000000004, -20.429899999999993, -20.623900000000003, -20.407900000000005, -20.683099999999996, -20.719200000000008, -20.707300000000004, -20.874900000000004, -20.43759999999999, -20.669500000000003, -20.940999999999995, -20.38019999999999, -20.687299999999997, -20.796300000000002, -20.639799999999997, -20.752799999999997, -20.549700000000005, -20.601300000000002, -21.0845, -20.7452, -20.540100000000006, -20.609, -20.552199999999996, -20.97379999999999, -20.5487, -20.639600000000005, -20.61589999999999, -20.23999999999999, -20.609700000000007, -20.6876, -20.558700000000005, -20.717899999999997, -20.864399999999996, -20.606499999999997, -20.7214, -20.8402, -20.926899999999996, -20.447999999999993, -20.5915, -20.563499999999998, -20.235300000000002, -20.9252, -20.509999999999987, -20.398600000000005, -20.7282, -20.574300000000004, -20.969300000000004, -20.524999999999995, -20.56620000000001, -20.49940000000001, -20.622, -20.770799999999998, -20.705999999999996, -20.6761, -20.490599999999997, -20.64, -20.710299999999997, -20.4398, -20.4676, -20.3626, -20.769499999999997, -20.514100000000003, -20.480800000000002, -20.813499999999998, -20.348699999999994, -20.7147, -20.6766, -20.6091, -20.938399999999998, -20.708999999999993, -20.9782, -20.7891, -20.939600000000002, -20.971999999999994, -20.632499999999993, -20.827099999999998, -20.5111, -20.9131, -20.7309, -20.838500000000003, -20.874799999999997, -20.843299999999992, -20.380300000000002, -20.663600000000002, -20.623000000000005, -20.797000000000004], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37188675345173167, "mean_inference_ms": 146.49017984034091, "mean_action_processing_ms": 0.05162005625274174, "mean_env_wait_ms": 0.7593177601822154, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 132000, "agent_timesteps_total": 132000, "timers": {"sample_time_ms": 590981.02, "sample_throughput": 6.768, "learn_time_ms": 1294724.918, "learn_throughput": 3.089, "update_time_ms": 13.455}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.234774524345994, "policy_loss": -0.23885373584926128, "vf_loss": 0.001946693868433158, "vf_explained_var": 0.9999779462814331, "kl": 0.007108403107849881, "entropy": 693.0925235748291, "entropy_coeff": 0.0}}}, "num_steps_sampled": 132000, "num_agent_steps_sampled": 132000, "num_steps_trained": 132000}, "done": false, "episodes_total": 2640, "training_iteration": 33, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_09-04-09", "timestamp": 1618470249, "time_this_iter_s": 1886.9706511497498, "time_total_s": 62220.57222366333, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 62220.57222366333, "timesteps_since_restore": 0, "iterations_since_restore": 33, "perf": {"cpu_util_percent": 27.019620958751396, "ram_util_percent": 29.64604236343367}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.2089, "episode_reward_min": -21.061700000000002, "episode_reward_mean": -20.702777, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.6766, -20.6091, -20.938399999999998, -20.708999999999993, -20.9782, -20.7891, -20.939600000000002, -20.971999999999994, -20.632499999999993, -20.827099999999998, -20.5111, -20.9131, -20.7309, -20.838500000000003, -20.874799999999997, -20.843299999999992, -20.380300000000002, -20.663600000000002, -20.623000000000005, -20.797000000000004, -20.642699999999998, -20.749599999999997, -20.619199999999992, -20.615899999999996, -20.9453, -20.7062, -20.6149, -20.734200000000005, -20.597400000000004, -20.686400000000003, -20.616199999999992, -20.724199999999993, -20.658699999999996, -20.770100000000006, -21.035500000000003, -20.7365, -20.821299999999997, -20.589999999999996, -20.405099999999997, -20.6821, -20.639100000000003, -20.809599999999996, -20.6396, -20.601200000000002, -20.697000000000003, -20.692899999999998, -20.927000000000003, -20.9845, -20.739900000000002, -20.806800000000006, -20.962699999999998, -20.5703, -20.400700000000008, -20.43569999999999, -20.927799999999994, -20.879199999999994, -20.480800000000006, -20.69110000000001, -20.92850000000001, -20.725099999999994, -20.527900000000002, -20.386000000000003, -20.6214, -20.552799999999994, -20.733500000000003, -20.446099999999994, -21.061700000000002, -20.573799999999995, -20.419400000000003, -20.7762, -20.4517, -20.4713, -20.764200000000002, -20.7655, -20.9589, -20.2089, -20.6276, -20.790900000000008, -20.97840000000001, -20.491400000000002, -20.600200000000005, -20.586000000000002, -20.8918, -20.557099999999995, -20.746299999999994, -20.9583, -20.796900000000004, -20.729400000000002, -20.625, -20.570899999999998, -20.72509999999999, -20.6412, -20.567600000000002, -20.658, -20.673199999999987, -20.706999999999997, -20.882399999999997, -20.658900000000006, -20.667699999999996, -20.6889], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3718609029391504, "mean_inference_ms": 146.4879346519725, "mean_action_processing_ms": 0.05160436167189674, "mean_env_wait_ms": 0.7578940290878041, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 136000, "agent_timesteps_total": 136000, "timers": {"sample_time_ms": 590914.494, "sample_throughput": 6.769, "learn_time_ms": 1294727.786, "learn_throughput": 3.089, "update_time_ms": 13.461}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.238634996348992, "policy_loss": -0.24191599106416106, "vf_loss": 0.0012020758932465014, "vf_explained_var": 0.9999827146530151, "kl": 0.006929741895874031, "entropy": 693.0900058746338, "entropy_coeff": 0.0}}}, "num_steps_sampled": 136000, "num_agent_steps_sampled": 136000, "num_steps_trained": 136000}, "done": false, "episodes_total": 2720, "training_iteration": 34, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_09-35-33", "timestamp": 1618472133, "time_this_iter_s": 1884.054898262024, "time_total_s": 64104.627121925354, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 64104.627121925354, "timesteps_since_restore": 0, "iterations_since_restore": 34, "perf": {"cpu_util_percent": 26.750725716412354, "ram_util_percent": 29.437848902121324}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.232300000000013, "episode_reward_min": -21.031599999999997, "episode_reward_mean": -20.68259, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.600200000000005, -20.586000000000002, -20.8918, -20.557099999999995, -20.746299999999994, -20.9583, -20.796900000000004, -20.729400000000002, -20.625, -20.570899999999998, -20.72509999999999, -20.6412, -20.567600000000002, -20.658, -20.673199999999987, -20.706999999999997, -20.882399999999997, -20.658900000000006, -20.667699999999996, -20.6889, -20.762900000000005, -20.939, -20.87660000000001, -20.4835, -20.618100000000005, -20.939199999999996, -20.724200000000003, -20.696900000000003, -20.7555, -20.6208, -20.8452, -20.493400000000005, -20.468100000000007, -20.933600000000006, -20.358600000000003, -20.645300000000002, -20.539900000000006, -20.523999999999997, -20.726100000000002, -20.657999999999998, -20.629, -20.864800000000002, -20.590099999999996, -20.703699999999994, -20.7235, -20.631500000000003, -20.773199999999996, -20.6727, -20.580700000000007, -20.413800000000002, -20.656199999999995, -20.3036, -20.858199999999997, -20.473999999999993, -20.518099999999997, -20.232300000000013, -20.8094, -20.583899999999996, -20.509900000000005, -20.477700000000002, -20.636000000000003, -20.8778, -20.760999999999996, -20.720700000000008, -20.7967, -20.59240000000001, -20.664599999999997, -20.504399999999997, -20.549899999999997, -20.834199999999996, -20.901599999999995, -20.6549, -20.591799999999992, -20.689200000000007, -20.831400000000002, -20.849500000000003, -20.701, -20.6967, -20.84230000000001, -20.634400000000003, -20.768300000000004, -20.696300000000004, -20.673900000000003, -20.66689999999999, -20.681799999999996, -20.904500000000006, -20.687899999999996, -20.554800000000004, -20.350700000000003, -20.945099999999993, -20.8168, -20.9407, -20.525500000000005, -20.70460000000001, -20.902099999999997, -20.600300000000004, -21.031599999999997, -20.5065, -20.649300000000004, -20.773800000000005], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37189264629160573, "mean_inference_ms": 146.49089725451532, "mean_action_processing_ms": 0.05160277373810222, "mean_env_wait_ms": 0.7582165202808665, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 140000, "agent_timesteps_total": 140000, "timers": {"sample_time_ms": 591073.895, "sample_throughput": 6.767, "learn_time_ms": 1294636.839, "learn_throughput": 3.09, "update_time_ms": 13.482}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.24194179032929242, "policy_loss": -0.2456299535697326, "vf_loss": 0.0015039521427979707, "vf_explained_var": 0.9999885559082031, "kl": 0.007280703794094734, "entropy": 693.0890846252441, "entropy_coeff": 0.0}}}, "num_steps_sampled": 140000, "num_agent_steps_sampled": 140000, "num_steps_trained": 140000}, "done": false, "episodes_total": 2800, "training_iteration": 35, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_10-07-00", "timestamp": 1618474020, "time_this_iter_s": 1887.1161501407623, "time_total_s": 65991.74327206612, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 65991.74327206612, "timesteps_since_restore": 0, "iterations_since_restore": 35, "perf": {"cpu_util_percent": 27.1092867756315, "ram_util_percent": 29.64706537890045}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.128700000000006, "episode_reward_min": -21.031599999999997, "episode_reward_mean": -20.649520000000003, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.768300000000004, -20.696300000000004, -20.673900000000003, -20.66689999999999, -20.681799999999996, -20.904500000000006, -20.687899999999996, -20.554800000000004, -20.350700000000003, -20.945099999999993, -20.8168, -20.9407, -20.525500000000005, -20.70460000000001, -20.902099999999997, -20.600300000000004, -21.031599999999997, -20.5065, -20.649300000000004, -20.773800000000005, -20.801100000000005, -20.666700000000006, -20.452500000000004, -20.6779, -20.6444, -20.5796, -20.800500000000007, -20.6216, -20.637500000000003, -20.351299999999995, -20.628700000000006, -20.837799999999994, -20.3665, -20.480399999999992, -20.4007, -20.571500000000004, -20.863899999999997, -20.433600000000002, -20.417899999999992, -20.710300000000004, -20.281000000000002, -20.6366, -20.434699999999992, -20.6394, -20.767000000000007, -20.979200000000002, -20.4082, -20.62619999999999, -20.257600000000007, -20.5965, -20.756200000000003, -20.7451, -20.775100000000002, -20.5169, -20.4573, -20.824199999999998, -20.5565, -20.574299999999994, -20.865399999999998, -20.525000000000002, -20.322499999999998, -20.542199999999998, -20.8959, -20.5938, -20.423900000000007, -20.8805, -20.97019999999999, -20.572400000000005, -20.863399999999995, -20.645100000000003, -20.678199999999997, -20.732400000000002, -20.570300000000003, -20.128700000000006, -20.9372, -20.912000000000003, -20.480199999999996, -20.546699999999994, -20.800300000000004, -20.906100000000002, -20.71549999999999, -20.862399999999997, -20.868199999999995, -20.575100000000003, -20.832600000000003, -20.734299999999994, -20.6044, -20.525499999999994, -20.483500000000003, -20.568500000000004, -20.5399, -20.631099999999996, -20.4469, -20.7992, -20.709100000000003, -20.554499999999994, -20.415399999999995, -20.544299999999993, -20.796100000000003, -20.7953], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37190149526029403, "mean_inference_ms": 146.49088131065594, "mean_action_processing_ms": 0.05162148208113738, "mean_env_wait_ms": 0.7587467964915874, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 144000, "agent_timesteps_total": 144000, "timers": {"sample_time_ms": 591121.188, "sample_throughput": 6.767, "learn_time_ms": 1294712.336, "learn_throughput": 3.089, "update_time_ms": 13.49}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23753666505217552, "policy_loss": -0.24480001372285187, "vf_loss": 0.005224795582819297, "vf_explained_var": 0.9999656081199646, "kl": 0.0067951743403682485, "entropy": 693.0892314910889, "entropy_coeff": 0.0}}}, "num_steps_sampled": 144000, "num_agent_steps_sampled": 144000, "num_steps_trained": 144000}, "done": false, "episodes_total": 2880, "training_iteration": 36, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_10-38-25", "timestamp": 1618475905, "time_this_iter_s": 1885.0424230098724, "time_total_s": 67876.78569507599, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 67876.78569507599, "timesteps_since_restore": 0, "iterations_since_restore": 36, "perf": {"cpu_util_percent": 26.766158423205656, "ram_util_percent": 29.438192636667907}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.308299999999996, "episode_reward_min": -21.201199999999996, "episode_reward_mean": -20.66196, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.71549999999999, -20.862399999999997, -20.868199999999995, -20.575100000000003, -20.832600000000003, -20.734299999999994, -20.6044, -20.525499999999994, -20.483500000000003, -20.568500000000004, -20.5399, -20.631099999999996, -20.4469, -20.7992, -20.709100000000003, -20.554499999999994, -20.415399999999995, -20.544299999999993, -20.796100000000003, -20.7953, -20.4511, -20.8394, -20.8919, -20.554400000000005, -20.6074, -20.788500000000006, -20.519300000000005, -20.632799999999996, -20.9103, -20.475500000000004, -20.438199999999995, -20.63629999999999, -20.587300000000003, -20.688000000000006, -20.655399999999986, -20.3135, -20.4463, -20.719100000000005, -20.583599999999993, -20.72609999999999, -20.767400000000006, -20.458, -20.468, -20.5603, -20.665499999999998, -20.931100000000008, -20.492600000000003, -20.531900000000004, -20.973899999999993, -20.599899999999998, -20.764200000000006, -20.7933, -20.308299999999996, -20.588799999999996, -21.201199999999996, -20.857600000000005, -20.844199999999994, -20.762500000000003, -20.9612, -20.806, -20.516100000000005, -20.8056, -20.3761, -20.536900000000003, -20.741500000000002, -20.4472, -20.667199999999998, -20.762900000000002, -20.6207, -20.7163, -21.079099999999993, -20.553599999999992, -20.616500000000002, -20.506099999999993, -20.613400000000002, -20.705399999999997, -20.4788, -20.676500000000004, -20.768300000000004, -20.872800000000005, -20.930799999999994, -20.7703, -20.9539, -21.088699999999996, -20.575100000000003, -20.599700000000002, -20.643099999999997, -20.5996, -20.975600000000004, -20.582300000000004, -20.4966, -20.445200000000007, -20.370199999999993, -20.7193, -20.7069, -20.608600000000003, -20.6224, -20.336499999999997, -20.932299999999998, -20.375799999999995], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3719148982822959, "mean_inference_ms": 146.49296509185325, "mean_action_processing_ms": 0.051629203099044944, "mean_env_wait_ms": 0.7593955134020703, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 148000, "agent_timesteps_total": 148000, "timers": {"sample_time_ms": 591193.516, "sample_throughput": 6.766, "learn_time_ms": 1294808.195, "learn_throughput": 3.089, "update_time_ms": 13.447}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2319785028230399, "policy_loss": -0.24056889419443905, "vf_loss": 0.006408478571017895, "vf_explained_var": 0.9998952150344849, "kl": 0.007273042210726999, "entropy": 693.0875625610352, "entropy_coeff": 0.0}}}, "num_steps_sampled": 148000, "num_agent_steps_sampled": 148000, "num_steps_trained": 148000}, "done": false, "episodes_total": 2960, "training_iteration": 37, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_11-09-51", "timestamp": 1618477791, "time_this_iter_s": 1886.3187766075134, "time_total_s": 69763.1044716835, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 69763.1044716835, "timesteps_since_restore": 0, "iterations_since_restore": 37, "perf": {"cpu_util_percent": 26.98784838350056, "ram_util_percent": 29.64541062801933}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.332399999999996, "episode_reward_min": -21.088699999999996, "episode_reward_mean": -20.64734, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.930799999999994, -20.7703, -20.9539, -21.088699999999996, -20.575100000000003, -20.599700000000002, -20.643099999999997, -20.5996, -20.975600000000004, -20.582300000000004, -20.4966, -20.445200000000007, -20.370199999999993, -20.7193, -20.7069, -20.608600000000003, -20.6224, -20.336499999999997, -20.932299999999998, -20.375799999999995, -20.458, -20.660500000000006, -20.9125, -20.7707, -20.463299999999993, -20.703000000000003, -20.8675, -20.6934, -20.4929, -20.700700000000005, -20.548900000000007, -20.781200000000005, -20.63770000000001, -20.6305, -20.8052, -20.571000000000005, -20.720399999999998, -20.4447, -20.835600000000003, -20.552400000000002, -20.7349, -20.544600000000003, -20.50090000000001, -20.775299999999998, -20.332399999999996, -20.715700000000002, -20.497299999999996, -20.703099999999996, -20.539, -20.434099999999994, -20.7747, -20.3895, -20.6309, -20.5086, -20.55860000000001, -20.842099999999995, -20.51739999999999, -20.424899999999997, -20.589999999999996, -20.662999999999993, -20.642799999999998, -20.863699999999998, -20.6908, -20.532299999999992, -20.4128, -20.75669999999999, -20.837899999999994, -20.5991, -20.441199999999995, -20.604400000000002, -20.704900000000002, -20.426999999999996, -20.73, -20.661399999999993, -20.765300000000003, -20.797100000000004, -20.495000000000008, -20.808200000000003, -20.753800000000002, -20.712500000000002, -20.682, -20.568600000000007, -20.477199999999996, -20.78159999999999, -20.595499999999998, -20.71299999999999, -20.803099999999993, -20.42459999999999, -20.724199999999996, -20.9538, -20.4395, -20.438499999999994, -20.618799999999997, -20.863400000000002, -20.688699999999994, -20.73429999999999, -20.582, -20.58839999999999, -20.6679, -20.892000000000003], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37192647920994887, "mean_inference_ms": 146.4937276217914, "mean_action_processing_ms": 0.05162549677510668, "mean_env_wait_ms": 0.7592415510137124, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 152000, "agent_timesteps_total": 152000, "timers": {"sample_time_ms": 591153.812, "sample_throughput": 6.766, "learn_time_ms": 1294708.523, "learn_throughput": 3.089, "update_time_ms": 13.465}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2501127205323428, "policy_loss": -0.2534285488072783, "vf_loss": 0.0010796988630659143, "vf_explained_var": 0.9999680519104004, "kl": 0.007453753496520221, "entropy": 693.0846652984619, "entropy_coeff": 0.0}}}, "num_steps_sampled": 152000, "num_agent_steps_sampled": 152000, "num_steps_trained": 152000}, "done": false, "episodes_total": 3040, "training_iteration": 38, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_11-41-16", "timestamp": 1618479676, "time_this_iter_s": 1884.8604135513306, "time_total_s": 71647.96488523483, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 71647.96488523483, "timesteps_since_restore": 0, "iterations_since_restore": 38, "perf": {"cpu_util_percent": 26.758519345238092, "ram_util_percent": 29.43716517857143}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.2048, "episode_reward_min": -21.008299999999995, "episode_reward_mean": -20.661611999999995, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.682, -20.568600000000007, -20.477199999999996, -20.78159999999999, -20.595499999999998, -20.71299999999999, -20.803099999999993, -20.42459999999999, -20.724199999999996, -20.9538, -20.4395, -20.438499999999994, -20.618799999999997, -20.863400000000002, -20.688699999999994, -20.73429999999999, -20.582, -20.58839999999999, -20.6679, -20.892000000000003, -20.544400000000007, -20.6085, -20.554199999999998, -20.627100000000002, -20.509700000000002, -20.872100000000003, -20.706299999999995, -20.665300000000006, -20.43399999999999, -20.375000000000004, -20.733499999999996, -20.484200000000005, -20.6935, -21.008299999999995, -20.6688, -20.887699999999995, -20.855999999999998, -20.7799, -20.7631, -20.371799999999997, -20.95920000000001, -20.367599999999996, -20.807000000000006, -20.9354, -20.8854, -20.554999999999996, -20.792800000000007, -20.8817, -20.8371, -20.9215, -20.533100000000005, -20.651300000000006, -20.873200000000004, -20.687099999999997, -21.002399999999998, -20.542099999999998, -20.686500000000006, -20.550099999999997, -20.3612, -20.496999999999996, -20.405399999999993, -20.504400000000008, -20.691299999999995, -20.716500000000003, -20.692399999999992, -20.660800000000002, -20.37289999999999, -20.92579999999999, -20.314499999999995, -20.7511, -20.667299999999997, -20.4897, -20.923699999999997, -20.5537, -20.802100000000006, -20.5939, -20.621799999999997, -20.799400000000002, -20.6358, -20.760600000000004, -20.7562, -20.617900000000002, -20.81219999999999, -20.650099999999995, -20.8996, -20.966200000000004, -20.7189, -20.955099999999995, -20.636000000000006, -20.3944, -20.558099999999996, -20.5065, -20.345599999999997, -20.6018, -20.608699999999995, -20.51110000000001, -20.576999999999998, -20.497500000000006, -20.2048, -20.7532], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37192255607121416, "mean_inference_ms": 146.49314597273832, "mean_action_processing_ms": 0.05162118693137325, "mean_env_wait_ms": 0.7590018435027226, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 156000, "agent_timesteps_total": 156000, "timers": {"sample_time_ms": 590983.28, "sample_throughput": 6.768, "learn_time_ms": 1294789.21, "learn_throughput": 3.089, "update_time_ms": 13.473}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23343120608478785, "policy_loss": -0.23972313525155187, "vf_loss": 0.004081871698133455, "vf_explained_var": 0.9999557733535767, "kl": 0.007366869962424971, "entropy": 693.085636138916, "entropy_coeff": 0.0}}}, "num_steps_sampled": 156000, "num_agent_steps_sampled": 156000, "num_steps_trained": 156000}, "done": false, "episodes_total": 3120, "training_iteration": 39, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_12-12-43", "timestamp": 1618481563, "time_this_iter_s": 1886.539827823639, "time_total_s": 73534.50471305847, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 73534.50471305847, "timesteps_since_restore": 0, "iterations_since_restore": 39, "perf": {"cpu_util_percent": 26.99368264585656, "ram_util_percent": 29.6463768115942}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.167899999999996, "episode_reward_min": -21.055899999999994, "episode_reward_mean": -20.637561999999992, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.7562, -20.617900000000002, -20.81219999999999, -20.650099999999995, -20.8996, -20.966200000000004, -20.7189, -20.955099999999995, -20.636000000000006, -20.3944, -20.558099999999996, -20.5065, -20.345599999999997, -20.6018, -20.608699999999995, -20.51110000000001, -20.576999999999998, -20.497500000000006, -20.2048, -20.7532, -20.591, -20.732300000000002, -20.605700000000006, -20.914700000000003, -20.468599999999995, -20.531200000000005, -20.621299999999998, -20.8658, -20.6011, -20.890699999999995, -20.779100000000003, -20.7305, -20.818100000000005, -20.860099999999996, -20.778999999999993, -20.814199999999992, -20.8304, -20.7662, -20.4008, -20.27659999999999, -20.5624, -20.167899999999996, -20.7528, -20.401500000000002, -20.7949, -20.6898, -20.782500000000006, -20.4237, -20.6605, -20.465800000000005, -20.5619, -20.756400000000003, -20.7415, -20.4917, -20.656399999999998, -20.458199999999998, -20.786399999999997, -20.5155, -20.574200000000005, -20.4619, -20.690199999999994, -20.448800000000006, -20.595699999999997, -20.515000000000004, -20.793899999999994, -20.5604, -20.766299999999998, -20.7549, -20.682899999999993, -21.055899999999994, -20.730799999999995, -20.6848, -20.5353, -20.645100000000006, -20.688899999999997, -20.7562, -20.552999999999994, -20.5344, -20.878699999999995, -20.649700000000006, -20.54740000000001, -20.7748, -20.657600000000006, -20.828200000000002, -20.78150000000001, -20.728299999999994, -20.8766, -20.507800000000003, -20.624499999999994, -20.815500000000007, -20.556899999999995, -20.4246, -20.289399999999993, -20.6689, -20.3978, -20.482800000000008, -20.3935, -20.826500000000006, -20.367099999999997, -20.531399999999994], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3718973916093519, "mean_inference_ms": 146.48910731958543, "mean_action_processing_ms": 0.051611408018691754, "mean_env_wait_ms": 0.7575303590659136, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 160000, "agent_timesteps_total": 160000, "timers": {"sample_time_ms": 590956.057, "sample_throughput": 6.769, "learn_time_ms": 1294644.226, "learn_throughput": 3.09, "update_time_ms": 13.466}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2410966765601188, "policy_loss": -0.2443203017610358, "vf_loss": 0.0010090292980891036, "vf_explained_var": 0.9999884963035583, "kl": 0.007381958988844417, "entropy": 693.0845203399658, "entropy_coeff": 0.0}}}, "num_steps_sampled": 160000, "num_agent_steps_sampled": 160000, "num_steps_trained": 160000}, "done": false, "episodes_total": 3200, "training_iteration": 40, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_12-44-07", "timestamp": 1618483447, "time_this_iter_s": 1884.2071990966797, "time_total_s": 75418.71191215515, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 75418.71191215515, "timesteps_since_restore": 0, "iterations_since_restore": 40, "perf": {"cpu_util_percent": 26.760364719017495, "ram_util_percent": 29.440714551544477}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.1917, "episode_reward_min": -21.130299999999995, "episode_reward_mean": -20.615099, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.54740000000001, -20.7748, -20.657600000000006, -20.828200000000002, -20.78150000000001, -20.728299999999994, -20.8766, -20.507800000000003, -20.624499999999994, -20.815500000000007, -20.556899999999995, -20.4246, -20.289399999999993, -20.6689, -20.3978, -20.482800000000008, -20.3935, -20.826500000000006, -20.367099999999997, -20.531399999999994, -20.428499999999996, -20.7243, -20.3693, -20.6045, -20.5377, -21.130299999999995, -20.505000000000006, -20.450399999999995, -20.744699999999995, -20.731100000000005, -20.671000000000003, -20.456700000000005, -20.792899999999996, -20.6924, -20.849800000000002, -20.5667, -20.5743, -20.5989, -20.8234, -20.476399999999998, -20.766200000000005, -20.3539, -20.376299999999993, -20.663999999999998, -20.5719, -20.57530000000001, -20.611700000000003, -20.618699999999997, -20.82679999999999, -20.7678, -20.744899999999994, -20.5505, -20.283799999999996, -20.519899999999996, -20.643199999999993, -20.3224, -20.582899999999995, -20.809900000000003, -20.5536, -20.810699999999997, -20.44739999999999, -20.5025, -20.614700000000003, -20.4222, -20.7301, -20.6167, -20.949999999999996, -20.623399999999997, -20.584799999999998, -20.702299999999997, -20.619500000000013, -20.650499999999994, -20.4295, -20.562299999999997, -20.78060000000001, -20.7442, -20.814199999999996, -20.7048, -20.796100000000003, -20.561799999999998, -20.476499999999994, -20.647700000000007, -20.582900000000002, -20.7907, -20.601399999999995, -20.721999999999998, -20.63180000000001, -20.7085, -20.6813, -20.7775, -20.5555, -20.8492, -20.349999999999994, -20.510599999999997, -20.486200000000004, -20.73339999999999, -20.3595, -20.1917, -20.752399999999994, -20.47970000000001], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3718803590922717, "mean_inference_ms": 146.48777744991773, "mean_action_processing_ms": 0.051606429146012475, "mean_env_wait_ms": 0.756946918027354, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 164000, "agent_timesteps_total": 164000, "timers": {"sample_time_ms": 590942.127, "sample_throughput": 6.769, "learn_time_ms": 1294609.056, "learn_throughput": 3.09, "update_time_ms": 13.475}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23821589129511267, "policy_loss": -0.24172608507797122, "vf_loss": 0.0013303172063672264, "vf_explained_var": 0.9999839067459106, "kl": 0.0072662549500819296, "entropy": 693.0842895507812, "entropy_coeff": 0.0}}}, "num_steps_sampled": 164000, "num_agent_steps_sampled": 164000, "num_steps_trained": 164000}, "done": false, "episodes_total": 3280, "training_iteration": 41, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_13-15-32", "timestamp": 1618485332, "time_this_iter_s": 1884.7915451526642, "time_total_s": 77303.50345730782, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 77303.50345730782, "timesteps_since_restore": 0, "iterations_since_restore": 41, "perf": {"cpu_util_percent": 27.066046857567866, "ram_util_percent": 29.647861658609152}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.109700000000007, "episode_reward_min": -21.077399999999997, "episode_reward_mean": -20.623353999999996, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.476499999999994, -20.647700000000007, -20.582900000000002, -20.7907, -20.601399999999995, -20.721999999999998, -20.63180000000001, -20.7085, -20.6813, -20.7775, -20.5555, -20.8492, -20.349999999999994, -20.510599999999997, -20.486200000000004, -20.73339999999999, -20.3595, -20.1917, -20.752399999999994, -20.47970000000001, -20.5486, -20.5555, -20.529300000000003, -20.725199999999997, -20.8038, -20.732599999999998, -20.533900000000006, -20.5424, -20.488800000000005, -20.582600000000003, -20.557599999999997, -20.3334, -20.441999999999997, -20.553199999999997, -20.635799999999996, -20.52700000000001, -20.518399999999993, -20.9118, -20.771800000000006, -20.9536, -20.78260000000001, -20.750300000000003, -20.600800000000003, -20.876699999999992, -20.9108, -20.661200000000004, -20.523800000000012, -20.9, -20.5, -20.7766, -20.645499999999995, -20.581900000000008, -20.601599999999998, -20.920299999999997, -20.311000000000003, -20.5325, -20.9378, -20.436700000000002, -20.549999999999997, -20.6278, -20.6606, -20.541999999999994, -20.5064, -20.370199999999997, -20.443999999999996, -20.9244, -20.8368, -20.589000000000002, -20.746099999999995, -20.5669, -20.1603, -20.509299999999996, -20.535999999999998, -20.544199999999996, -20.744300000000006, -20.434500000000003, -20.668300000000002, -20.780299999999997, -20.803100000000004, -20.7251, -20.5548, -20.550300000000004, -20.788100000000004, -20.672199999999993, -20.700300000000002, -21.037099999999995, -20.6727, -20.6677, -20.7673, -20.109700000000007, -20.6502, -20.519900000000003, -20.669200000000004, -20.8122, -20.720699999999997, -20.499899999999997, -20.3209, -21.077399999999997, -20.483600000000006, -20.4057], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37186178876857895, "mean_inference_ms": 146.4839623782478, "mean_action_processing_ms": 0.05159436456727671, "mean_env_wait_ms": 0.7560636234853325, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 168000, "agent_timesteps_total": 168000, "timers": {"sample_time_ms": 590811.744, "sample_throughput": 6.77, "learn_time_ms": 1294576.104, "learn_throughput": 3.09, "update_time_ms": 13.504}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.22793993528466672, "policy_loss": -0.23129869403783232, "vf_loss": 0.0012695686114909677, "vf_explained_var": 0.99998939037323, "kl": 0.006963962805457413, "entropy": 693.0828762054443, "entropy_coeff": 0.0}}}, "num_steps_sampled": 168000, "num_agent_steps_sampled": 168000, "num_steps_trained": 168000}, "done": false, "episodes_total": 3360, "training_iteration": 42, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_13-46-56", "timestamp": 1618487216, "time_this_iter_s": 1884.1992514133453, "time_total_s": 79187.70270872116, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 79187.70270872116, "timesteps_since_restore": 0, "iterations_since_restore": 42, "perf": {"cpu_util_percent": 26.760401935243767, "ram_util_percent": 29.43881652400447}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.109700000000007, "episode_reward_min": -21.080000000000005, "episode_reward_mean": -20.666980000000002, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.5548, -20.550300000000004, -20.788100000000004, -20.672199999999993, -20.700300000000002, -21.037099999999995, -20.6727, -20.6677, -20.7673, -20.109700000000007, -20.6502, -20.519900000000003, -20.669200000000004, -20.8122, -20.720699999999997, -20.499899999999997, -20.3209, -21.077399999999997, -20.483600000000006, -20.4057, -20.487999999999996, -20.4553, -20.6343, -20.985999999999997, -20.530499999999996, -20.848999999999997, -20.47970000000001, -20.599600000000006, -20.69229999999999, -20.780899999999995, -20.649900000000002, -20.6813, -20.683399999999992, -20.628300000000003, -20.398999999999994, -20.621799999999997, -20.901800000000005, -20.408399999999993, -20.669500000000003, -20.701700000000002, -20.621700000000004, -20.4684, -20.579, -20.127100000000002, -20.578699999999998, -21.0655, -20.724300000000003, -20.647199999999998, -20.608500000000003, -20.9903, -20.601300000000002, -20.543999999999997, -21.0675, -20.754800000000007, -20.6947, -20.972399999999997, -20.861699999999992, -20.427300000000002, -21.080000000000005, -20.64, -20.52760000000001, -20.6783, -20.429600000000008, -20.8103, -20.480299999999996, -20.8411, -20.608000000000004, -20.808399999999995, -20.709200000000003, -20.864199999999997, -20.635999999999992, -20.6819, -20.476800000000004, -20.7629, -20.862900000000003, -20.7577, -20.6979, -20.599699999999995, -20.662899999999997, -20.958399999999994, -20.898299999999992, -20.760599999999993, -20.9677, -20.632, -20.6382, -20.570099999999996, -20.792600000000007, -20.628599999999995, -20.7625, -20.662399999999998, -20.4421, -20.307899999999997, -20.8368, -20.586900000000004, -20.622300000000003, -20.691000000000003, -20.553300000000004, -20.980200000000004, -20.2369, -20.6985], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3718147568360092, "mean_inference_ms": 146.48273105902945, "mean_action_processing_ms": 0.05156926764338593, "mean_env_wait_ms": 0.7541969477281393, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 172000, "agent_timesteps_total": 172000, "timers": {"sample_time_ms": 590735.137, "sample_throughput": 6.771, "learn_time_ms": 1294485.077, "learn_throughput": 3.09, "update_time_ms": 13.514}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2404723239596933, "policy_loss": -0.24628432327881455, "vf_loss": 0.0034348256000527044, "vf_explained_var": 0.999923825263977, "kl": 0.007923942248453386, "entropy": 693.0803298950195, "entropy_coeff": 0.0}}}, "num_steps_sampled": 172000, "num_agent_steps_sampled": 172000, "num_steps_trained": 172000}, "done": false, "episodes_total": 3440, "training_iteration": 43, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_14-18-21", "timestamp": 1618489101, "time_this_iter_s": 1885.294361114502, "time_total_s": 81072.99706983566, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 81072.99706983566, "timesteps_since_restore": 0, "iterations_since_restore": 43, "perf": {"cpu_util_percent": 27.024507251766455, "ram_util_percent": 29.646002231312757}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.2369, "episode_reward_min": -20.980200000000004, "episode_reward_mean": -20.621205, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.898299999999992, -20.760599999999993, -20.9677, -20.632, -20.6382, -20.570099999999996, -20.792600000000007, -20.628599999999995, -20.7625, -20.662399999999998, -20.4421, -20.307899999999997, -20.8368, -20.586900000000004, -20.622300000000003, -20.691000000000003, -20.553300000000004, -20.980200000000004, -20.2369, -20.6985, -20.711100000000002, -20.3165, -20.864000000000004, -20.453300000000002, -20.4786, -20.614899999999995, -20.557800000000007, -20.7555, -20.672100000000004, -20.508100000000006, -20.288, -20.364199999999997, -20.7191, -20.531499999999998, -20.416399999999992, -20.6164, -20.484699999999997, -20.691300000000002, -20.746199999999998, -20.8563, -20.644799999999996, -20.687099999999994, -20.752000000000002, -20.721900000000005, -20.339299999999998, -20.7781, -20.4068, -20.803499999999993, -20.767199999999995, -20.566599999999994, -20.689499999999995, -20.392, -20.3362, -20.629400000000004, -20.660300000000003, -20.782400000000003, -20.956699999999998, -20.756600000000006, -20.600200000000005, -20.5448, -20.906200000000002, -20.8243, -20.585800000000006, -20.736099999999993, -20.780000000000005, -20.751199999999997, -20.503800000000002, -20.8365, -20.759700000000002, -20.315199999999997, -20.443400000000004, -20.697699999999998, -20.648199999999996, -20.8186, -20.3979, -20.650900000000007, -20.623900000000003, -20.663000000000007, -20.802700000000005, -20.5501, -20.557000000000002, -20.6915, -20.323599999999995, -20.493699999999997, -20.583400000000008, -20.6419, -20.492499999999996, -20.634999999999998, -20.39699999999999, -20.311299999999996, -20.4545, -20.639699999999998, -20.889999999999997, -20.463900000000002, -20.9095, -20.677300000000002, -20.418, -20.393299999999996, -20.502499999999998, -20.619400000000002], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37174458333878624, "mean_inference_ms": 146.47807726967375, "mean_action_processing_ms": 0.05154221640218763, "mean_env_wait_ms": 0.7515320652969707, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 176000, "agent_timesteps_total": 176000, "timers": {"sample_time_ms": 590648.774, "sample_throughput": 6.772, "learn_time_ms": 1294585.334, "learn_throughput": 3.09, "update_time_ms": 13.521}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23208064958453178, "policy_loss": -0.23451260710135102, "vf_loss": 0.00024212308576920805, "vf_explained_var": 0.9999974370002747, "kl": 0.00729944376507774, "entropy": 693.0800590515137, "entropy_coeff": 0.0}}}, "num_steps_sampled": 176000, "num_agent_steps_sampled": 176000, "num_steps_trained": 176000}, "done": false, "episodes_total": 3520, "training_iteration": 44, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_14-49-46", "timestamp": 1618490986, "time_this_iter_s": 1884.1936919689178, "time_total_s": 82957.19076180458, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 82957.19076180458, "timesteps_since_restore": 0, "iterations_since_restore": 44, "perf": {"cpu_util_percent": 26.737983630952378, "ram_util_percent": 29.440104166666668}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.203200000000002, "episode_reward_min": -21.1045, "episode_reward_mean": -20.642560000000003, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.557000000000002, -20.6915, -20.323599999999995, -20.493699999999997, -20.583400000000008, -20.6419, -20.492499999999996, -20.634999999999998, -20.39699999999999, -20.311299999999996, -20.4545, -20.639699999999998, -20.889999999999997, -20.463900000000002, -20.9095, -20.677300000000002, -20.418, -20.393299999999996, -20.502499999999998, -20.619400000000002, -20.287299999999995, -20.831100000000003, -20.816200000000002, -20.81980000000001, -20.763900000000003, -20.493399999999998, -20.734999999999996, -20.4798, -20.456500000000002, -20.690499999999997, -20.938299999999998, -20.4483, -20.666800000000002, -20.975599999999993, -20.848000000000003, -20.670100000000005, -20.807599999999997, -20.5857, -20.638400000000004, -20.874000000000002, -20.278199999999995, -20.604599999999998, -21.033899999999992, -20.881899999999998, -20.863200000000003, -20.3757, -21.034199999999995, -20.7217, -20.7063, -20.449799999999996, -20.6023, -20.638399999999997, -20.6304, -20.695300000000003, -20.4366, -20.8074, -20.5192, -20.993299999999998, -20.972100000000005, -20.672000000000004, -20.7022, -20.865899999999996, -20.8301, -20.562299999999997, -20.790700000000008, -20.763800000000007, -20.217799999999993, -20.543500000000005, -20.609899999999996, -20.793499999999998, -20.761699999999998, -20.711, -20.7427, -20.491299999999995, -20.203200000000002, -20.304799999999997, -20.5985, -20.4114, -20.5896, -20.634899999999995, -20.7025, -21.1045, -20.876599999999996, -20.450099999999996, -20.7313, -20.3244, -20.7867, -20.3457, -20.568299999999997, -20.8801, -20.2461, -20.62959999999999, -20.8713, -20.471399999999996, -20.664399999999997, -20.761499999999995, -20.813000000000006, -20.7156, -20.6936, -20.6527], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.371688313788962, "mean_inference_ms": 146.47678810541078, "mean_action_processing_ms": 0.05152204891665365, "mean_env_wait_ms": 0.7494459827877226, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 180000, "agent_timesteps_total": 180000, "timers": {"sample_time_ms": 590526.581, "sample_throughput": 6.774, "learn_time_ms": 1294479.011, "learn_throughput": 3.09, "update_time_ms": 13.533}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23479816690087318, "policy_loss": -0.23967908264603466, "vf_loss": 0.0026637780328968574, "vf_explained_var": 0.999962329864502, "kl": 0.007390449303784408, "entropy": 693.0776271820068, "entropy_coeff": 0.0}}}, "num_steps_sampled": 180000, "num_agent_steps_sampled": 180000, "num_steps_trained": 180000}, "done": false, "episodes_total": 3600, "training_iteration": 45, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_15-21-10", "timestamp": 1618492870, "time_this_iter_s": 1884.8310215473175, "time_total_s": 84842.0217833519, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 84842.0217833519, "timesteps_since_restore": 0, "iterations_since_restore": 45, "perf": {"cpu_util_percent": 27.007065823726293, "ram_util_percent": 29.64629973968018}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.1803, "episode_reward_min": -21.1045, "episode_reward_mean": -20.649798, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.7025, -21.1045, -20.876599999999996, -20.450099999999996, -20.7313, -20.3244, -20.7867, -20.3457, -20.568299999999997, -20.8801, -20.2461, -20.62959999999999, -20.8713, -20.471399999999996, -20.664399999999997, -20.761499999999995, -20.813000000000006, -20.7156, -20.6936, -20.6527, -20.780200000000004, -20.546900000000004, -20.98800000000001, -20.760900000000003, -20.9092, -20.818099999999994, -20.7347, -20.5671, -20.381500000000003, -20.685, -20.8936, -20.776399999999995, -20.7839, -20.355599999999995, -20.557700000000008, -20.543999999999993, -20.737099999999995, -20.317200000000003, -20.7559, -20.633100000000002, -20.523500000000002, -20.55599999999999, -20.7203, -20.626099999999997, -20.947399999999995, -20.801000000000002, -20.250000000000004, -20.764100000000003, -20.7219, -20.812200000000004, -20.758199999999995, -20.5562, -20.923199999999994, -20.6563, -20.487700000000004, -20.7983, -20.583300000000012, -20.999700000000004, -20.5662, -20.7372, -20.584600000000002, -20.450499999999995, -20.784700000000004, -20.528000000000006, -20.619899999999994, -20.384800000000002, -20.81910000000001, -20.478500000000007, -20.40879999999999, -20.72639999999999, -20.3954, -20.649, -20.593600000000013, -20.74529999999999, -20.4766, -20.369499999999995, -20.973399999999994, -20.625800000000005, -20.726300000000002, -20.754199999999997, -20.729799999999997, -20.335299999999997, -20.657, -20.768300000000004, -20.79259999999999, -20.589100000000002, -20.6946, -20.4155, -20.4467, -20.431200000000004, -20.721500000000006, -20.6105, -20.749200000000002, -20.7074, -20.927899999999998, -20.684300000000004, -20.522100000000002, -20.1803, -20.741200000000013, -20.576600000000003], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3715978141532142, "mean_inference_ms": 146.47030448010912, "mean_action_processing_ms": 0.05148649286628177, "mean_env_wait_ms": 0.7463531088542917, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 184000, "agent_timesteps_total": 184000, "timers": {"sample_time_ms": 590313.234, "sample_throughput": 6.776, "learn_time_ms": 1294410.807, "learn_throughput": 3.09, "update_time_ms": 13.525}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23669174720998853, "policy_loss": -0.24293866916559637, "vf_loss": 0.0041391716945042845, "vf_explained_var": 0.9999783039093018, "kl": 0.007025841550785117, "entropy": 693.0784130096436, "entropy_coeff": 0.0}}}, "num_steps_sampled": 184000, "num_agent_steps_sampled": 184000, "num_steps_trained": 184000}, "done": false, "episodes_total": 3680, "training_iteration": 46, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_15-52-33", "timestamp": 1618494753, "time_this_iter_s": 1882.2269237041473, "time_total_s": 86724.24870705605, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 86724.24870705605, "timesteps_since_restore": 0, "iterations_since_restore": 46, "perf": {"cpu_util_percent": 26.75271982116244, "ram_util_percent": 29.438934426229505}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.177799999999994, "episode_reward_min": -20.97060000000001, "episode_reward_mean": -20.639457000000004, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.729799999999997, -20.335299999999997, -20.657, -20.768300000000004, -20.79259999999999, -20.589100000000002, -20.6946, -20.4155, -20.4467, -20.431200000000004, -20.721500000000006, -20.6105, -20.749200000000002, -20.7074, -20.927899999999998, -20.684300000000004, -20.522100000000002, -20.1803, -20.741200000000013, -20.576600000000003, -20.757700000000003, -20.960100000000004, -20.177799999999994, -20.539899999999992, -20.676199999999998, -20.45680000000001, -20.3913, -20.6115, -20.770199999999996, -20.516700000000004, -20.644300000000005, -20.619100000000007, -20.644700000000004, -20.615499999999997, -20.564400000000003, -20.720699999999994, -20.227500000000003, -20.570099999999996, -20.4687, -20.4673, -20.557500000000005, -20.799700000000005, -20.498000000000005, -20.7547, -20.424900000000004, -20.7147, -20.872900000000012, -20.753699999999995, -20.48240000000001, -20.751500000000004, -20.554900000000004, -20.679499999999997, -20.838299999999993, -20.4418, -20.870099999999997, -20.777499999999996, -20.4309, -20.692399999999996, -20.835099999999997, -20.711599999999997, -20.716899999999995, -20.425800000000002, -20.851399999999998, -20.3916, -20.453599999999994, -20.573, -20.5666, -20.789699999999996, -20.8561, -20.613299999999995, -20.6877, -20.5668, -20.857200000000002, -20.771099999999993, -20.798600000000004, -20.69010000000001, -20.534000000000006, -20.911899999999996, -20.3926, -20.720100000000002, -20.640400000000003, -20.88189999999999, -20.438100000000006, -20.536399999999993, -20.724, -20.7573, -20.686199999999996, -20.8703, -20.700099999999992, -20.744800000000005, -20.6638, -20.625800000000005, -20.588500000000003, -20.717499999999998, -20.97060000000001, -20.4555, -20.7867, -20.6076, -20.480399999999996, -20.778], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37150169679099415, "mean_inference_ms": 146.46600165680508, "mean_action_processing_ms": 0.05145743178625249, "mean_env_wait_ms": 0.7434536728792105, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 188000, "agent_timesteps_total": 188000, "timers": {"sample_time_ms": 590127.311, "sample_throughput": 6.778, "learn_time_ms": 1294383.672, "learn_throughput": 3.09, "update_time_ms": 13.545}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.24497021548449993, "policy_loss": -0.24786670808680356, "vf_loss": 0.0007771206148845522, "vf_explained_var": 0.9999945163726807, "kl": 0.007064575067488477, "entropy": 693.0780334472656, "entropy_coeff": 0.0}}}, "num_steps_sampled": 188000, "num_agent_steps_sampled": 188000, "num_steps_trained": 188000}, "done": false, "episodes_total": 3760, "training_iteration": 47, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_16-23-57", "timestamp": 1618496637, "time_this_iter_s": 1884.1880495548248, "time_total_s": 88608.43675661087, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 88608.43675661087, "timesteps_since_restore": 0, "iterations_since_restore": 47, "perf": {"cpu_util_percent": 27.087090773809525, "ram_util_percent": 29.650111607142858}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.207299999999993, "episode_reward_min": -21.1513, "episode_reward_mean": -20.646962, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.640400000000003, -20.88189999999999, -20.438100000000006, -20.536399999999993, -20.724, -20.7573, -20.686199999999996, -20.8703, -20.700099999999992, -20.744800000000005, -20.6638, -20.625800000000005, -20.588500000000003, -20.717499999999998, -20.97060000000001, -20.4555, -20.7867, -20.6076, -20.480399999999996, -20.778, -20.321500000000004, -20.508700000000005, -20.2684, -20.828699999999994, -20.496899999999997, -20.73669999999999, -20.798500000000004, -20.620400000000004, -20.542300000000004, -20.983800000000006, -20.385899999999996, -20.5744, -20.4094, -20.58, -20.9555, -20.801600000000004, -20.832199999999993, -20.813400000000005, -20.617200000000004, -20.482699999999994, -20.549899999999997, -20.719399999999997, -20.5308, -20.687200000000004, -20.7699, -20.71109999999999, -20.777000000000005, -20.802300000000006, -20.497999999999998, -20.7369, -21.148500000000002, -20.5843, -20.535200000000007, -20.207299999999993, -20.8868, -20.4651, -20.6879, -20.3867, -20.610000000000003, -20.833900000000007, -20.3322, -20.615899999999996, -20.794000000000004, -20.77229999999999, -20.770500000000002, -20.5118, -20.412000000000003, -20.820999999999994, -20.6517, -20.855799999999995, -20.717000000000002, -20.4284, -21.150800000000004, -20.541100000000004, -20.635499999999997, -20.548300000000005, -20.693799999999996, -20.583500000000004, -20.7016, -20.4185, -20.352200000000003, -20.625899999999994, -20.554799999999997, -21.1513, -20.7113, -20.7406, -20.5145, -20.3944, -20.507499999999993, -20.528000000000002, -20.742500000000003, -20.800499999999996, -20.6019, -20.5422, -20.735300000000006, -20.499999999999996, -20.9881, -20.4144, -20.466500000000003, -20.528300000000005], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.371438094348359, "mean_inference_ms": 146.46016752582628, "mean_action_processing_ms": 0.05143226865647334, "mean_env_wait_ms": 0.7412268332627161, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 192000, "agent_timesteps_total": 192000, "timers": {"sample_time_ms": 589945.513, "sample_throughput": 6.78, "learn_time_ms": 1294542.456, "learn_throughput": 3.09, "update_time_ms": 13.598}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2347892103716731, "policy_loss": -0.23775440594181418, "vf_loss": 0.0006323659278422156, "vf_explained_var": 0.999988853931427, "kl": 0.007776114027365111, "entropy": 693.0738849639893, "entropy_coeff": 0.0}}}, "num_steps_sampled": 192000, "num_agent_steps_sampled": 192000, "num_steps_trained": 192000}, "done": false, "episodes_total": 3840, "training_iteration": 48, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_16-55-22", "timestamp": 1618498522, "time_this_iter_s": 1884.6307752132416, "time_total_s": 90493.06753182411, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 90493.06753182411, "timesteps_since_restore": 0, "iterations_since_restore": 48, "perf": {"cpu_util_percent": 26.992113095238096, "ram_util_percent": 29.44895833333333}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.307099999999995, "episode_reward_min": -21.1513, "episode_reward_mean": -20.638332000000002, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.352200000000003, -20.625899999999994, -20.554799999999997, -21.1513, -20.7113, -20.7406, -20.5145, -20.3944, -20.507499999999993, -20.528000000000002, -20.742500000000003, -20.800499999999996, -20.6019, -20.5422, -20.735300000000006, -20.499999999999996, -20.9881, -20.4144, -20.466500000000003, -20.528300000000005, -20.406100000000002, -20.627000000000002, -20.55719999999999, -20.766699999999997, -20.588299999999997, -20.847900000000003, -20.457800000000002, -20.371600000000004, -20.554000000000002, -20.506399999999992, -20.809100000000004, -20.8666, -20.477999999999998, -20.46259999999999, -20.7355, -20.707900000000002, -20.771500000000003, -20.421, -20.399100000000004, -20.640900000000002, -20.6789, -20.39230000000001, -20.7997, -20.931699999999992, -20.871500000000005, -20.78, -20.632, -20.7055, -20.660099999999996, -20.6185, -20.860799999999998, -20.6256, -20.5677, -20.307099999999995, -20.7605, -20.912300000000002, -20.767500000000002, -20.648999999999994, -20.817500000000006, -20.5567, -20.735799999999998, -20.67610000000001, -20.541400000000003, -20.633199999999988, -20.519399999999994, -20.6041, -20.949199999999994, -20.4334, -20.70329999999999, -20.3778, -20.895099999999996, -20.5133, -20.439799999999998, -20.7399, -20.777200000000004, -20.62, -21.044200000000007, -20.5458, -20.526999999999994, -20.6329, -20.615899999999996, -20.6248, -20.416300000000007, -20.790799999999994, -20.728199999999998, -20.7205, -20.574299999999997, -20.717899999999997, -20.5871, -20.72209999999999, -20.492800000000006, -20.429399999999994, -20.4728, -20.456799999999994, -20.7366, -20.539500000000004, -20.5707, -20.8891, -20.6524, -20.916], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.3713507164155726, "mean_inference_ms": 146.45122588768467, "mean_action_processing_ms": 0.05139945038381383, "mean_env_wait_ms": 0.7379269705855285, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 196000, "agent_timesteps_total": 196000, "timers": {"sample_time_ms": 589678.623, "sample_throughput": 6.783, "learn_time_ms": 1294339.894, "learn_throughput": 3.09, "update_time_ms": 13.619}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23483945918269455, "policy_loss": -0.24187680007889867, "vf_loss": 0.004802719639783959, "vf_explained_var": 0.9999651908874512, "kl": 0.00744872038194444, "entropy": 693.0753650665283, "entropy_coeff": 0.0}}}, "num_steps_sampled": 196000, "num_agent_steps_sampled": 196000, "num_steps_trained": 196000}, "done": false, "episodes_total": 3920, "training_iteration": 49, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_17-26-43", "timestamp": 1618500403, "time_this_iter_s": 1881.8453903198242, "time_total_s": 92374.91292214394, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 92374.91292214394, "timesteps_since_restore": 0, "iterations_since_restore": 49, "perf": {"cpu_util_percent": 26.74456035767511, "ram_util_percent": 29.639828614008945}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.204600000000006, "episode_reward_min": -20.9839, "episode_reward_mean": -20.634566, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.615899999999996, -20.6248, -20.416300000000007, -20.790799999999994, -20.728199999999998, -20.7205, -20.574299999999997, -20.717899999999997, -20.5871, -20.72209999999999, -20.492800000000006, -20.429399999999994, -20.4728, -20.456799999999994, -20.7366, -20.539500000000004, -20.5707, -20.8891, -20.6524, -20.916, -20.6148, -20.820399999999996, -20.866199999999996, -20.8818, -20.603199999999998, -20.4917, -20.591599999999993, -20.457700000000003, -20.5792, -20.666900000000005, -20.678100000000008, -20.7196, -20.413199999999993, -20.498599999999996, -20.745900000000002, -20.2394, -20.465500000000002, -20.7732, -20.9511, -20.799899999999994, -20.754800000000003, -20.688399999999994, -20.838399999999996, -20.496500000000005, -20.4397, -20.4924, -20.513099999999998, -20.588500000000003, -20.512599999999996, -20.580199999999994, -20.9334, -20.739900000000002, -20.621999999999996, -20.3917, -20.845899999999997, -20.678, -20.580799999999996, -20.58480000000001, -20.6979, -20.749899999999997, -20.740999999999996, -20.424799999999987, -20.826000000000008, -20.9839, -20.8192, -20.574399999999994, -20.4628, -20.5261, -20.524000000000004, -20.3594, -20.6495, -20.563600000000005, -20.7328, -20.7975, -20.5664, -20.4176, -20.779299999999996, -20.456, -20.825300000000006, -20.596200000000007, -20.681000000000004, -20.669600000000006, -20.391899999999993, -20.4033, -20.844199999999997, -20.6525, -20.5009, -20.789299999999997, -20.791, -20.6148, -20.5189, -20.710100000000004, -20.716500000000003, -20.519099999999995, -20.672999999999995, -20.605900000000002, -20.813999999999997, -20.679899999999993, -20.809399999999997, -20.204600000000006], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.371280944612854, "mean_inference_ms": 146.44323841405986, "mean_action_processing_ms": 0.05137191451537257, "mean_env_wait_ms": 0.7353125225548334, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 200000, "agent_timesteps_total": 200000, "timers": {"sample_time_ms": 589557.79, "sample_throughput": 6.785, "learn_time_ms": 1294408.428, "learn_throughput": 3.09, "update_time_ms": 13.608}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.2431848575361073, "policy_loss": -0.2473535577300936, "vf_loss": 0.0019886312254016048, "vf_explained_var": 0.9999889135360718, "kl": 0.007266907618031837, "entropy": 693.0743103027344, "entropy_coeff": 0.0}}}, "num_steps_sampled": 200000, "num_agent_steps_sampled": 200000, "num_steps_trained": 200000}, "done": false, "episodes_total": 4000, "training_iteration": 50, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_17-58-07", "timestamp": 1618502287, "time_this_iter_s": 1883.684240579605, "time_total_s": 94258.59716272354, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 94258.59716272354, "timesteps_since_restore": 0, "iterations_since_restore": 50, "perf": {"cpu_util_percent": 27.016046165301567, "ram_util_percent": 29.449180938198065}, "trial_id": "dc0ca_00000"}
{"episode_reward_max": -20.204600000000006, "episode_reward_min": -21.0706, "episode_reward_mean": -20.645246, "episode_len_mean": 50.0, "episode_media": {}, "episodes_this_iter": 80, "policy_reward_min": {}, "policy_reward_max": {}, "policy_reward_mean": {}, "custom_metrics": {}, "hist_stats": {"episode_reward": [-20.681000000000004, -20.669600000000006, -20.391899999999993, -20.4033, -20.844199999999997, -20.6525, -20.5009, -20.789299999999997, -20.791, -20.6148, -20.5189, -20.710100000000004, -20.716500000000003, -20.519099999999995, -20.672999999999995, -20.605900000000002, -20.813999999999997, -20.679899999999993, -20.809399999999997, -20.204600000000006, -20.5835, -20.671200000000002, -20.519900000000007, -20.648400000000002, -20.533400000000004, -20.721099999999996, -20.600899999999992, -20.757500000000004, -20.618699999999993, -20.5216, -20.436700000000002, -20.615699999999997, -20.870199999999997, -20.779700000000002, -20.801699999999993, -20.724699999999995, -20.586199999999998, -20.749900000000004, -20.703499999999995, -20.558400000000013, -20.579900000000002, -20.6054, -20.951500000000003, -20.5521, -20.63060000000001, -20.471600000000006, -20.710599999999996, -20.681800000000003, -20.780399999999997, -20.5026, -20.695899999999998, -20.7675, -20.6801, -20.6375, -20.48189999999999, -20.5769, -20.631699999999988, -20.6186, -20.6345, -20.624299999999998, -20.843799999999995, -20.667900000000003, -20.568399999999993, -20.249200000000002, -20.584199999999996, -20.703099999999996, -20.622000000000003, -20.479100000000006, -20.517099999999992, -20.478299999999997, -20.788, -20.422599999999996, -20.7058, -20.533599999999996, -20.7937, -20.92120000000001, -20.838399999999996, -20.8877, -20.5008, -20.33659999999999, -20.554799999999993, -20.614700000000003, -21.064800000000005, -20.756800000000002, -20.78330000000001, -20.59359999999999, -20.746200000000005, -20.661200000000004, -20.534000000000002, -20.374899999999986, -20.8087, -20.622700000000002, -20.572900000000004, -20.564600000000002, -20.450899999999994, -20.750400000000006, -21.0706, -20.844100000000005, -20.393999999999995, -20.9117], "episode_lengths": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.37120004150602526, "mean_inference_ms": 146.43449341333863, "mean_action_processing_ms": 0.051333914752292396, "mean_env_wait_ms": 0.7322519133562173, "mean_env_render_ms": 0.0}, "off_policy_estimator": {}, "num_healthy_workers": 1, "timesteps_total": 204000, "agent_timesteps_total": 204000, "timers": {"sample_time_ms": 589289.428, "sample_throughput": 6.788, "learn_time_ms": 1294427.716, "learn_throughput": 3.09, "update_time_ms": 13.611}, "info": {"learner": {"default_policy": {"learner_stats": {"allreduce_latency": 0.0, "cur_kl_coeff": 0.30000000000000004, "cur_lr": 5e-05, "total_loss": -0.23153593065217137, "policy_loss": -0.24068683898076415, "vf_loss": 0.0067963251235596545, "vf_explained_var": 0.9999163746833801, "kl": 0.007848609136999585, "entropy": 693.0739936828613, "entropy_coeff": 0.0}}}, "num_steps_sampled": 204000, "num_agent_steps_sampled": 204000, "num_steps_trained": 204000}, "done": false, "episodes_total": 4080, "training_iteration": 51, "experiment_id": "579742dac3714fe6bdd8147e1d644b48", "date": "2021-04-15_18-29-29", "timestamp": 1618504169, "time_this_iter_s": 1882.3003280162811, "time_total_s": 96140.89749073982, "pid": 20853, "hostname": "hpc13", "node_ip": "129.187.240.128", "config": {"num_workers": 1, "num_envs_per_worker": 1, "create_env_on_driver": false, "rollout_fragment_length": 200, "batch_mode": "complete_episodes", "train_batch_size": 4000, "model": {"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "num_framestacks": "auto", "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1, "framestack": true}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env": "IntegerRoadEnv", "env_config": {"agentsize": 1000, "yellow": false, "global_re1": 0.1}, "render_env": false, "record_env": false, "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 5e-05, "log_level": "WARN", "callbacks": "<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "fake_sampler": false, "framework": "torch", "eager_tracing": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": null, "evaluation_num_episodes": 10, "evaluation_parallel_to_training": false, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": null, "sample_async": false, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 126291, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1.0, "_fake_gpus": false, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "placement_strategy": "PACK", "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {}, "policy_mapping_fn": null, "policies_to_train": null, "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "logger_config": "pretty_print", "simple_optimizer": true, "monitor": -1, "use_critic": true, "use_gae": true, "lambda": 1.0, "kl_coeff": 0.2, "sgd_minibatch_size": 128, "shuffle_sequences": true, "num_sgd_iter": 30, "lr_schedule": null, "vf_loss_coeff": 1.0, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, "clip_param": 0.3, "vf_clip_param": 10.0, "grad_clip": null, "kl_target": 0.01, "vf_share_layers": -1}, "time_since_restore": 96140.89749073982, "timesteps_since_restore": 0, "iterations_since_restore": 51, "perf": {"cpu_util_percent": 26.737430167597765, "ram_util_percent": 29.639255121042833}, "trial_id": "dc0ca_00000"}
