{"self": "<wasserstein_mdp.WassersteinMarkovDecisionProcess object at 0x2b65a1058fd0>", "state_shape": "(6,)", "action_shape": "(3,)", "reward_shape": "(1,)", "label_shape": "(7,)", "discretize_action_space": "False", "state_encoder_network": "ModelArchitecture(hidden_units=[512, 512], activation='leaky_relu', name='state_encoder_network_base')", "action_decoder_network": "ModelArchitecture(hidden_units=[512, 512], activation='leaky_relu', name='action_decoder_network_base')", "transition_network": "ModelArchitecture(hidden_units=[512, 512], activation='leaky_relu', name='transition_network_base')", "reward_network": "ModelArchitecture(hidden_units=[512, 512], activation='leaky_relu', name='reward_network_base')", "decoder_network": "ModelArchitecture(hidden_units=[512, 512], activation='leaky_relu', name='state_decoder_network_base')", "latent_policy_network": "ModelArchitecture(hidden_units=[512, 512], activation='leaky_relu', name='discrete_policy_network_base')", "steady_state_lipschitz_network": "ModelArchitecture(hidden_units=[512, 512], activation='leaky_relu', name='steady_state_network_base')", "transition_loss_lipschitz_network": "ModelArchitecture(hidden_units=[512, 512], activation='leaky_relu', name='transition_loss_network_base')", "latent_state_size": "13", "number_of_discrete_actions": "16", "action_encoder_network": "ModelArchitecture(hidden_units=[512, 512], activation='leaky_relu', name='action_encoder_network_base')", "state_encoder_pre_processing_network": "None", "state_decoder_pre_processing_network": "None", "time_stacked_states": "False", "state_encoder_temperature": "0.6666666666666666", "state_prior_temperature": "0.1", "action_encoder_temperature": "0.99", "latent_policy_temperature": "0.5", "wasserstein_regularizer_scale_factor": "WassersteinRegularizerScaleFactor(global_scaling=10.0, global_gradient_penalty_multiplier=20.0, steady_state_scaling=10.0, steady_state_gradient_penalty_multiplier=None, local_transition_loss_scaling=10.0, local_transition_loss_gradient_penalty_multiplier=None)", "encoder_temperature_decay_rate": "0.0", "prior_temperature_decay_rate": "0.0", "reset_state_label": "True", "autoencoder_optimizer": "None", "wasserstein_regularizer_optimizer": "None", "entropy_regularizer_scale_factor": "0.0", "entropy_regularizer_decay_rate": "0.0", "entropy_regularizer_scale_factor_min_value": "0.0", "importance_sampling_exponent": "0.4", "importance_sampling_exponent_growth_rate": "1e-05", "time_stacked_lstm_units": "128", "reward_bounds": "None", "latent_stationary_network": "None", "action_entropy_regularizer_scaling": "0.0", "enforce_upper_bound": "False", "squared_wasserstein": "True", "n_critic": "20", "trainable_prior": "False", "state_encoder_type": "EncodingType.DETERMINISTIC", "policy_based_decoding": "False", "deterministic_state_embedding": "True", "state_encoder_softclipping": "True", "args": "()", "kwargs": "{'evaluation_window_size': 0}", "__class__": "<class 'wasserstein_mdp.WassersteinMarkovDecisionProcess'>", "eval_policy": "-70.2", "local_reward_loss": "0.034769785", "local_transition_loss": "0.64947826", "training_step": "430000"}