{"self": "<wasserstein_mdp.WassersteinMarkovDecisionProcess object at 0x2b74bebf5040>", "state_shape": "(8,)", "action_shape": "(2,)", "reward_shape": "(1,)", "label_shape": "(6,)", "discretize_action_space": "True", "state_encoder_network": "ModelArchitecture(hidden_units=[256], activation='relu', name='state_encoder_network_base')", "action_decoder_network": "ModelArchitecture(hidden_units=[256], activation='relu', name='action_decoder_network_base')", "transition_network": "ModelArchitecture(hidden_units=[256], activation='relu', name='transition_network_base')", "reward_network": "ModelArchitecture(hidden_units=[256], activation='relu', name='reward_network_base')", "decoder_network": "ModelArchitecture(hidden_units=[256], activation='relu', name='state_decoder_network_base')", "latent_policy_network": "ModelArchitecture(hidden_units=[256], activation='relu', name='discrete_policy_network_base')", "steady_state_lipschitz_network": "ModelArchitecture(hidden_units=[256], activation='relu', name='steady_state_network_base')", "transition_loss_lipschitz_network": "ModelArchitecture(hidden_units=[256], activation='relu', name='transition_loss_network_base')", "latent_state_size": "14", "number_of_discrete_actions": "3", "action_encoder_network": "ModelArchitecture(hidden_units=[256], activation='relu', name='action_encoder_network_base')", "state_encoder_pre_processing_network": "None", "state_decoder_pre_processing_network": "None", "time_stacked_states": "False", "state_encoder_temperature": "0.6", "state_prior_temperature": "0.75", "action_encoder_temperature": "0.3333333333333333", "latent_policy_temperature": "0.5", "wasserstein_regularizer_scale_factor": "WassersteinRegularizerScaleFactor(global_scaling=10.0, global_gradient_penalty_multiplier=20.0, steady_state_scaling=100.0, steady_state_gradient_penalty_multiplier=None, local_transition_loss_scaling=50.0, local_transition_loss_gradient_penalty_multiplier=None)", "encoder_temperature_decay_rate": "0.0", "prior_temperature_decay_rate": "0.0", "reset_state_label": "True", "autoencoder_optimizer": "None", "wasserstein_regularizer_optimizer": "None", "entropy_regularizer_scale_factor": "0.0", "entropy_regularizer_decay_rate": "0.0", "entropy_regularizer_scale_factor_min_value": "0.0", "importance_sampling_exponent": "0.4", "importance_sampling_exponent_growth_rate": "1e-05", "time_stacked_lstm_units": "128", "reward_bounds": "None", "latent_stationary_network": "None", "action_entropy_regularizer_scaling": "0.0", "enforce_upper_bound": "False", "squared_wasserstein": "True", "n_critic": "15", "trainable_prior": "False", "state_encoder_type": "EncodingType.DETERMINISTIC", "policy_based_decoding": "False", "deterministic_state_embedding": "True", "state_encoder_softclipping": "False", "args": "()", "kwargs": "{'evaluation_window_size': 0}", "__class__": "<class 'wasserstein_mdp.WassersteinMarkovDecisionProcess'>", "eval_policy": "282.56876", "local_reward_loss": "0.020720486", "local_transition_loss": "0.13135736", "training_step": "320000"}