import numpy as np
import os
from math import pi
import tensorflow as tf
import tensorflow_datasets as tfds
from gmmvi.experiments.target_distributions.lnpdf import LNPDF
from gmmvi.models.gmm_wrapper import GmmWrapper
def create_MNIST_splits(num_seeds):
def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label
for seed in range(num_seeds):
tf.keras.utils.set_random_seed(seed)
(ds_train, ds_test), ds_info = tfds.load(
'mnist',
split=['train', 'test'],
shuffle_files=True,
as_supervised=True,
with_info=True,
)
ds_train = ds_train.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
# return ds_train, ds_test, 784, 10
def create_WINE_splits(num_seeds):
my_path = os.path.dirname(os.path.realpath(__file__))
for i in range(num_seeds):
tf.keras.utils.set_random_seed(i)
DATASET_SIZE = 4898
train_size = int(DATASET_SIZE * 0.60)
test_size = int(DATASET_SIZE * 0.20)
vali_size = DATASET_SIZE - train_size - test_size
dataset = tfds.load(name="wine_quality", as_supervised=True, split="train").shuffle(DATASET_SIZE)
features, labels = dataset.batch(DATASET_SIZE).get_single_element()
feature_mat = tf.transpose(tf.stack([tf.reshape(tf.cast(a, tf.float32), [-1]) for a in features.values()]))
feature_mean = tf.reduce_mean(feature_mat, axis=0)
feature_mat -= feature_mean
feature_std = tf.math.reduce_std(feature_mat, axis=0)
feature_mat /= feature_std
features_train = feature_mat[:train_size]
features_test = feature_mat[train_size:train_size + test_size]
features_vali = feature_mat[train_size + test_size:]
labels_train = labels[:train_size]
labels_test = labels[train_size:train_size + test_size]
labels_vali = labels[train_size + test_size:]
os.makedirs(os.path.join(my_path, "datasets", "wine"), exist_ok=True)
np.savez(os.path.join(my_path, "datasets", "wine", f"wine_seed_{i}.npz"),
features_train=features_train, features_vali=features_vali, features_test=features_test,
labels_train=labels_train, labels_vali=labels_vali, labels_test=labels_test)
[docs]class BNN_LNPDF(LNPDF):
"""This class is used for implementing the target distribution given by the posterior for a Bayesian Neural Network.
Parameters:
likelihood_scaling: float
a coefficient that can be used to scale the effect of the likelihood
dataset_seed: int
The dataset_seed is used for reproducible train/test-splits
prior_std: float
The standard deviation of the (zero-mean) prior over the network weights
batch_size: int
size of the minibatches
hidden_units: list[int]
The length of the list defines the number of hidden layers, the entries define their width
loss: a tf.Keras.losses
The loss function used for computing the log-likelihood
activations: a list of Tensorflow activation functions
activations for each hidden layer and the output layer
"""
def __init__(self, likelihood_scaling, dataset_seed, prior_std, batch_size, hidden_units, loss, activations):
super(BNN_LNPDF, self).__init__(use_log_density_and_grad=True)
self.dataset_seed = dataset_seed
self.likelihood_scaling = tf.Variable(likelihood_scaling, dtype=tf.float32)
self.hidden_units = hidden_units
self.activations = activations
self.batch_size = batch_size
ds_train, ds_test, ds_vali, self.input_dim, self.output_dim = self.prepare_data()
self.train_size = len(ds_train)
self.ds_train = ds_train.cache()\
.repeat()\
.shuffle(len(ds_train), reshuffle_each_iteration=True)\
.batch(self.batch_size, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False, drop_remainder=True)\
.prefetch(tf.data.AUTOTUNE)
self.test_size = len(ds_test)
self.ds_test = ds_test.cache()\
.shuffle(len(ds_test), reshuffle_each_iteration=False)\
.batch(self.batch_size, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False, drop_remainder=False)\
.prefetch(tf.data.AUTOTUNE)
self.vali_size = len(ds_vali)
self.ds_vali = ds_vali.cache()\
.shuffle(len(ds_vali), reshuffle_each_iteration=False)\
.batch(self.batch_size, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False, drop_remainder=False)\
.prefetch(tf.data.AUTOTUNE)
self.loss = loss #tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
last_layer_width = self.input_dim
self.layer_size = []
self.layer_shape = []
for width in self.hidden_units:
self.layer_shape.append([last_layer_width, width]) # weights
self.layer_size.append(last_layer_width * width) # weights
self.layer_shape.append([width]) # bias
self.layer_size.append(width) # bias
last_layer_width = width
self.layer_shape.append([last_layer_width, self.output_dim])
self.layer_size.append(last_layer_width * self.output_dim)
self.layer_shape.append([self.output_dim])
self.layer_size.append(self.output_dim)
self._num_dimensions = tf.reduce_sum(self.layer_size)
self._prior_std = prior_std * tf.ones(self._num_dimensions) # zero-mean prior is implicitly assumed
self.prior_lnpdf_constant_part = - 0.5 * tf.cast(self._num_dimensions, dtype=tf.float32) * tf.math.log(2 * pi) \
- tf.reduce_sum(tf.math.log(self._prior_std))
self.model, self.metric = self.create_model()
[docs] def prepare_data(self):
raise NotImplementedError
[docs] def create_model(self):
raise NotImplementedError
[docs] def get_num_dimensions(self):
return self._num_dimensions
@property
def prior_std(self):
return self._prior_std
[docs] def forward_from_weight_vector(self, input, x):
output = tf.reshape(input, [-1, self.input_dim])
start = 0
i = 0
j = 0
while i < len(self.layer_shape):
W = tf.reshape(x[start:start+self.layer_size[i]], self.layer_shape[i])
start += self.layer_size[i]
i += 1
b = tf.reshape(x[start:start+self.layer_size[i]], self.layer_shape[i])
start += self.layer_size[i]
i += 1
output = self.activations[j](output @ W + b)
j += 1
return output
# def set_weights(self, weights_as_vector):
# start = 0
# for i in range(len(self.layer_size)):
# flat_layer = weights_as_vector[start:start + self.layer_size[i]]
# start += self.layer_size[i]
# self.model.trainable_variables[i].assign(tf.reshape(flat_layer, self.layer_shape[i]))
[docs] def log_likelihood(self, x):
lls = tf.TensorArray(size=tf.shape(x)[0], dtype=tf.float32)
i = 0
for features, labels in self.ds_train.take(tf.cast(tf.shape(x)[0], dtype=tf.int64)):
output = self.forward_from_weight_vector(features, (x[i]))
ll = - self.train_size * self.loss(labels, output)
lls = lls.write(i, ll)
i+= 1
return lls.stack()
[docs] def log_likelihood_and_grad(self, x):
lls = tf.TensorArray(size=tf.shape(x)[0], dtype=tf.float32)
ll_grads = tf.TensorArray(size=tf.shape(x)[0], dtype=tf.float32)
i = 0
for features, labels in self.ds_train.take(tf.cast(tf.shape(x)[0], dtype=tf.int64)):
this_x = x[i]
with tf.GradientTape() as tape:
tape.watch(this_x)
output = self.forward_from_weight_vector(features, this_x)
ll = - self.train_size * self.loss(labels, output)
lls = lls.write(i, ll)
ll_grads = ll_grads.write(i, tape.gradient(ll, this_x))
i+= 1
return lls.stack(), ll_grads.stack()
[docs] def log_likelihood_old(self, x):
lls = tf.TensorArray(size=tf.shape(x)[0], dtype=tf.float32)
for i in range(tf.shape(x)[0]):
features, labels = self.ds_train.take(1).get_single_element()
output = self.forward_from_weight_vector(features, (x[i]))
ll = - self.train_size * self.loss(labels, output)
lls = lls.write(i, ll)
return lls.stack()
# def weight_vector_to_network_parameters(self, x):
# start = 0
# weights_and_biases = []
# for i in range(len(self.layer_shape)):
# weights_and_biases.append(tf.reshape(x[start:start+self.layer_size[0]], self.layer_shape[0]))
# start += self.layer_size[i]
# return weights_and_biases
[docs] def log_density(self, x):
log_posterior = self.likelihood_scaling * (self.log_likelihood(x) + self.log_prior(x, ignore_constant=True))
return log_posterior
[docs] def log_density_and_grad(self, x: tf.Tensor) -> [tf.Tensor, tf.Tensor]:
ll, ll_grad = self.log_likelihood_and_grad(x)
log_prior, log_prior_grad = self.log_prior_and_grad(x, ignore_constant=True)
log_posterior = self.likelihood_scaling * (ll + log_prior)
log_posterior_grad = self.likelihood_scaling * (ll_grad + log_prior_grad)
return log_posterior, log_posterior_grad
[docs] def log_prior(self, x, ignore_constant=False):
if ignore_constant:
return - 0.5 * tf.reduce_sum(tf.square(x / self.prior_std), axis=1)
else:
return self.prior_lnpdf_constant_part - 0.5 * tf.reduce_sum(tf.square(x / self.prior_std), axis=1)
[docs] def log_prior_and_grad(self, x, ignore_constant=False):
with tf.GradientTape() as tape:
tape.watch(x)
log_prior = self.log_prior(x, ignore_constant=ignore_constant)
log_prior_grad = tape.gradient(log_prior, x)
return log_prior, log_prior_grad
[docs] @tf.function
def bayesian_inference_test_loss(self, x):
features, labels = self.ds_test.take(1).get_single_element()
output = self.forward_from_weight_vector(features, (x[0]))
for weights in x:
output += self.forward_from_weight_vector(features, weights)
test_loss = self.loss(labels, output / tf.cast(tf.shape(x)[0], tf.float32))
self.accuracy.reset_state()
self.accuracy.update_state(labels, output / tf.cast(tf.shape(x)[0], tf.float32))
return test_loss, self.accuracy.result()
# @tf.function
[docs] def average_loss(self, x, dataset):
all_losses = []
if dataset == "test":
ds = self.ds_test
elif dataset == "vali":
ds = self.ds_vali
test_loss = 0.
for params in x:
this_test_loss = 0.
num_batches = 0
for features, labels in ds:
output = self.forward_from_weight_vector(features, params)
this_loss = self.loss(labels, output)
this_test_loss += this_loss
all_losses.append(this_loss)
num_batches += 1
test_loss += this_test_loss / num_batches
return test_loss / tf.cast(tf.shape(x)[0], tf.float32), tf.stack(all_losses)
[docs] @tf.function
def avg_bayesian_inference_test_loss(self, x, num_batches):
test_loss = 0.
test_metric = 0.
for _ in tf.range(num_batches):
features, labels = self.ds_test.take(1).get_single_element()
output = self.forward_from_weight_vector(features, (x[0]))
for i in tf.range(1, tf.shape(x)[0]):
output += self.forward_from_weight_vector(features, (x[i]))
test_loss += self.loss(labels, output / tf.cast(tf.shape(x)[0], tf.float32))
self.metric.reset_state()
self.metric.update_state(labels, output / tf.cast(tf.shape(x)[0], tf.float32))
test_metric += self.metric.result()
return test_loss / num_batches, test_metric / num_batches
[docs] @tf.function
def avg_bayesian_inference_loss(self, x, dataset):
if dataset == "train":
ds = self.ds_train
elif dataset == "test":
ds = self.ds_test
elif dataset == "vali":
ds = self.ds_vali
loss = 0.
metric = 0.
num_batches = 0.
for features, labels in ds:
# features, labels = ds.take(1).get_single_element()
output = self.forward_from_weight_vector(features, (x[0]))
for i in tf.range(1, tf.shape(x)[0]):
output += self.forward_from_weight_vector(features, (x[i]))
loss += self.loss(labels, output / tf.cast(tf.shape(x)[0], tf.float32))
self.metric.reset_state()
self.metric.update_state(labels, output / tf.cast(tf.shape(x)[0], tf.float32))
metric += self.metric.result()
num_batches += 1.
return loss / num_batches, metric / num_batches
class BNN_MNIST(BNN_LNPDF):
def __init__(self, likelihood_scaling, prior_std, batch_size):
super(BNN_MNIST, self).__init__(likelihood_scaling=likelihood_scaling, prior_std=prior_std,
batch_size=batch_size, dataset_seed=-1,
hidden_units = [128],
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
activations = [tf.nn.relu, tf.keras.activations.linear])
def prepare_data(self):
(ds_train, ds_test), ds_info = tfds.load(
'mnist',
split=['train', 'test'],
shuffle_files=True,
as_supervised=True,
with_info=True,
)
def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label
ds_train = ds_train.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
return ds_train, ds_test.take(5000), ds_test.skip(5000), 784, 10
def create_model(self):
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10)
])
accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=self.loss,
metrics=[accuracy],
)
return model, accuracy
def expensive_metrics(self, model: GmmWrapper, samples: tf.Tensor) -> dict:
""" This method computes four task-specific metric:
1. bi_test_loss: Expected loss on the test set when using Bayesian inference
2. bi_test_accuracy: Expected accuracy on the test set when using Bayesian inference
3. bi_vali_loss: Expected loss on the validation set when using Bayesian inference
4. bi_vali_accuracy: Expected accuracy on the validation set when using Bayesian inference
Parameters:
model: :py:class:`GmmWrapper<gmmvi.models.gmm_wrapper.GmmWrapper>`
The learned model that we want to evaluate for this target distribution.
samples: tf.Tensor
Samples that have been drawn from the model, which can be used for evaluations.
Returns:
dict: a dictionary containing the four task-specific metrics
"""
expensive_metrics = dict()
bi_test_loss, bi_test_accuracy = self.avg_bayesian_inference_loss(samples, "test")
expensive_metrics.update({"bi_test_loss": bi_test_loss, "bi_test_accuracy": bi_test_accuracy})
bi_vali_loss, bi_vali_accuracy = self.avg_bayesian_inference_loss(samples, "vali")
expensive_metrics.update({"bi_vali_loss": bi_vali_loss, "bi_vali_accuracy": bi_vali_accuracy})
return expensive_metrics
def make_MNIST_target(likelihood_scaling, prior_std, batch_size):
return BNN_MNIST(likelihood_scaling=likelihood_scaling, prior_std=prior_std, batch_size=batch_size)
class BNN_WINE(BNN_LNPDF):
def __init__(self, dataset_seed, likelihood_scaling, prior_std, batch_size):
super(BNN_WINE, self).__init__(dataset_seed=dataset_seed, likelihood_scaling=likelihood_scaling,
prior_std=prior_std, batch_size=batch_size, hidden_units=[8, 8],
loss=tf.keras.losses.MeanSquaredError(),
activations=[tf.math.sigmoid, tf.math.sigmoid, tf.keras.activations.linear])
def prepare_data(self):
dataset_seed = self.dataset_seed % 10
print(f"using dataset seed: {dataset_seed}")
my_path = os.path.dirname(os.path.realpath(__file__))
dataset = np.load(os.path.join(my_path, "datasets", "wine", f"wine_seed_{dataset_seed}.npz"))
ds_train = tf.data.Dataset.from_tensor_slices((dataset["features_train"], dataset["labels_train"]))
ds_test = tf.data.Dataset.from_tensor_slices((dataset["features_test"], dataset["labels_test"]))
ds_vali = tf.data.Dataset.from_tensor_slices((dataset["features_vali"], dataset["labels_vali"]))
return ds_train, ds_test, ds_vali, 11, 1
def create_model(self):
inputs = tf.keras.Input(shape=[11], dtype=tf.float32)
features = inputs
for units in self.hidden_units:
features = tf.keras.layers.Dense(units, activation="sigmoid")(features)
outputs = tf.keras.layers.Dense(units=1)(features)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
rmse = tf.keras.metrics.RootMeanSquaredError()
model.compile(
optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
loss=lambda y_true, y_pred: self.loss(y_true, y_pred),
metrics=[rmse],
)
return model, rmse
def expensive_metrics(self, model: GmmWrapper, samples: tf.Tensor) -> dict:
""" This method computes four task-specific metric:
1. bi_test_loss: Expected loss on the test set when using Bayesian inference
2. bi_test_accuracy: Expected accuracy on the test set when using Bayesian inference
3. bi_vali_loss: Expected loss on the validation set when using Bayesian inference
4. bi_vali_accuracy: Expected accuracy on the validation set when using Bayesian inference
Parameters:
model: :py:class:`GmmWrapper<gmmvi.models.gmm_wrapper.GmmWrapper>`
The learned model that we want to evaluate for this target distribution.
samples: tf.Tensor
Samples that have been drawn from the model, which can be used for evaluations.
Returns:
dict: a dictionary containing the four task-specific metrics
"""
expensive_metrics = dict()
bi_test_loss, bi_test_accuracy = self.avg_bayesian_inference_loss(samples, "test")
expensive_metrics.update({"bi_test_loss": bi_test_loss, "bi_test_accuracy": bi_test_accuracy})
bi_vali_loss, bi_vali_accuracy = self.avg_bayesian_inference_loss(samples, "vali")
expensive_metrics.update({"bi_vali_loss": bi_vali_loss, "bi_vali_rmse": bi_vali_accuracy})
return expensive_metrics
def make_WINE_target(likelihood_scaling, dataset_seed, prior_std, batch_size):
return BNN_WINE(likelihood_scaling=likelihood_scaling, dataset_seed=dataset_seed,
prior_std=prior_std, batch_size=batch_size)
if __name__ == "__main__":
ds0_test_losses = []
for seed in range(10):
tf.keras.utils.set_random_seed(seed)
# test = make_MNIST_target(1., 1., 128)
test = make_WINE_target(likelihood_scaling=1., dataset_seed=seed, prior_std=1., batch_size=128)
best_vali_loss = tf.float32.max
test_losses = []
for i in range(2000):
test.model.fit(
test.ds_train,
epochs=1,
verbose=0,
steps_per_epoch=test.train_size // test.batch_size,
validation_data=test.ds_vali,
)
params = tf.concat([tf.reshape(x, [-1]) for x in test.model.trainable_variables], axis=0)
vali_loss = test.average_loss(tf.reshape(params, [1,-1]), "vali")[0]
if vali_loss < best_vali_loss:
test_loss = test.average_loss(tf.reshape(params, [1, -1]), "test")[0]
# if test_loss < 0.41:
# print("debug")
test_losses.append(test_loss)
print(f"iter: {i}, new best params, test_loss: {test_loss}, vali_loss: {vali_loss}")
best_params = params
best_vali_loss = vali_loss
print(f"run {seed}, test_loss {tf.stack(test_losses)[-1]}")
ds0_test_losses.append(tf.stack(test_losses))
params = tf.concat([tf.reshape(x, [-1]) for x in test.model.trainable_variables], axis=0)
print("done")