{"self": "<wasserstein_mdp.WassersteinMarkovDecisionProcess object at 0x2addb46fe040>", "state_shape": "(4,)", "action_shape": "(2,)", "reward_shape": "(1,)", "label_shape": "(2,)", "discretize_action_space": "False", "state_encoder_network": "ModelArchitecture(hidden_units=[64, 64, 64], activation='tanh', name='state_encoder_network_base')", "action_decoder_network": "ModelArchitecture(hidden_units=[64, 64, 64], activation='tanh', name='action_decoder_network_base')", "transition_network": "ModelArchitecture(hidden_units=[64, 64, 64], activation='tanh', name='transition_network_base')", "reward_network": "ModelArchitecture(hidden_units=[64, 64, 64], activation='tanh', name='reward_network_base')", "decoder_network": "ModelArchitecture(hidden_units=[64, 64, 64], activation='tanh', name='state_decoder_network_base')", "latent_policy_network": "ModelArchitecture(hidden_units=[64, 64, 64], activation='tanh', name='discrete_policy_network_base')", "steady_state_lipschitz_network": "ModelArchitecture(hidden_units=[64, 64, 64], activation='tanh', name='steady_state_network_base')", "transition_loss_lipschitz_network": "ModelArchitecture(hidden_units=[64, 64, 64], activation='tanh', name='transition_loss_network_base')", "latent_state_size": "9", "number_of_discrete_actions": "16", "action_encoder_network": "ModelArchitecture(hidden_units=[64, 64, 64], activation='tanh', name='action_encoder_network_base')", "state_encoder_pre_processing_network": "None", "state_decoder_pre_processing_network": "None", "time_stacked_states": "False", "state_encoder_temperature": "0.3333333333333333", "state_prior_temperature": "0.3333333333333333", "action_encoder_temperature": "0.99", "latent_policy_temperature": "0.6666666666666666", "wasserstein_regularizer_scale_factor": "WassersteinRegularizerScaleFactor(global_scaling=10.0, global_gradient_penalty_multiplier=20.0, steady_state_scaling=75.0, steady_state_gradient_penalty_multiplier=None, local_transition_loss_scaling=75.0, local_transition_loss_gradient_penalty_multiplier=None)", "encoder_temperature_decay_rate": "1e-06", "prior_temperature_decay_rate": "2e-06", "reset_state_label": "True", "autoencoder_optimizer": "None", "wasserstein_regularizer_optimizer": "None", "entropy_regularizer_scale_factor": "0.0", "entropy_regularizer_decay_rate": "0.0", "entropy_regularizer_scale_factor_min_value": "0.0", "importance_sampling_exponent": "0.4", "importance_sampling_exponent_growth_rate": "1e-05", "time_stacked_lstm_units": "128", "reward_bounds": "None", "latent_stationary_network": "None", "action_entropy_regularizer_scaling": "0.0", "enforce_upper_bound": "False", "squared_wasserstein": "False", "n_critic": "5", "trainable_prior": "False", "state_encoder_type": "EncodingType.DETERMINISTIC", "policy_based_decoding": "False", "deterministic_state_embedding": "True", "state_encoder_softclipping": "False", "args": "()", "kwargs": "{'evaluation_window_size': 5}", "__class__": "<class 'wasserstein_mdp.WassersteinMarkovDecisionProcess'>", "eval_policy": "200.0", "local_reward_loss": "0.0037750935", "local_transition_loss": "0.40456417", "training_step": "120000"}