{"self": "<wasserstein_mdp.WassersteinMarkovDecisionProcess object at 0x2ad85f220190>", "state_shape": "(2,)", "action_shape": "(3,)", "reward_shape": "(1,)", "label_shape": "(3,)", "discretize_action_space": "False", "state_encoder_network": "ModelArchitecture(hidden_units=[64, 64], activation='relu', name='state_encoder_network_base')", "action_decoder_network": "ModelArchitecture(hidden_units=[64, 64], activation='relu', name='action_decoder_network_base')", "transition_network": "ModelArchitecture(hidden_units=[64, 64], activation='relu', name='transition_network_base')", "reward_network": "ModelArchitecture(hidden_units=[64, 64], activation='relu', name='reward_network_base')", "decoder_network": "ModelArchitecture(hidden_units=[64, 64], activation='relu', name='state_decoder_network_base')", "latent_policy_network": "ModelArchitecture(hidden_units=[64, 64], activation='relu', name='discrete_policy_network_base')", "steady_state_lipschitz_network": "ModelArchitecture(hidden_units=[64, 64], activation='relu', name='steady_state_network_base')", "transition_loss_lipschitz_network": "ModelArchitecture(hidden_units=[64, 64], activation='relu', name='transition_loss_network_base')", "latent_state_size": "10", "number_of_discrete_actions": "5", "action_encoder_network": "ModelArchitecture(hidden_units=[64, 64], activation='relu', name='action_encoder_network_base')", "state_encoder_pre_processing_network": "None", "state_decoder_pre_processing_network": "None", "time_stacked_states": "False", "state_encoder_temperature": "0.1", "state_prior_temperature": "0.3333333333333333", "action_encoder_temperature": "-1.0", "latent_policy_temperature": "0.3333333333333333", "wasserstein_regularizer_scale_factor": "WassersteinRegularizerScaleFactor(global_scaling=None, global_gradient_penalty_multiplier=10, steady_state_scaling=100.0, steady_state_gradient_penalty_multiplier=None, local_transition_loss_scaling=25.0, local_transition_loss_gradient_penalty_multiplier=None)", "encoder_temperature_decay_rate": "0.0", "prior_temperature_decay_rate": "0.0", "reset_state_label": "True", "autoencoder_optimizer": "None", "wasserstein_regularizer_optimizer": "None", "entropy_regularizer_scale_factor": "0.0", "entropy_regularizer_decay_rate": "0.0", "entropy_regularizer_scale_factor_min_value": "0.0", "importance_sampling_exponent": "1.0", "importance_sampling_exponent_growth_rate": "1.0", "time_stacked_lstm_units": "128", "reward_bounds": "None", "latent_stationary_network": "None", "action_entropy_regularizer_scaling": "0.0", "enforce_upper_bound": "False", "squared_wasserstein": "False", "n_critic": "20", "trainable_prior": "False", "state_encoder_type": "EncodingType.DETERMINISTIC", "policy_based_decoding": "False", "deterministic_state_embedding": "True", "state_encoder_softclipping": "True", "args": "()", "kwargs": "{}", "__class__": "<class 'wasserstein_mdp.WassersteinMarkovDecisionProcess'>", "eval_policy": "-92.1", "local_reward_loss": "0.014176323", "local_transition_loss": "0.38232273", "training_step": "232000"}