Source code for gmmvi.gmmvi_runner

import logging
import os
from time import time

import psutil
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # ERROR
logging.getLogger('tensorflow').setLevel(logging.ERROR)
import tensorflow as tf
import numpy as np
import matplotlib

from gmmvi.experiments.evaluation.mmd import MMD
from gmmvi.optimization.gmmvi import GMMVI
from gmmvi.experiments.setup_experiment import init_experiment

matplotlib.use("Agg")

def get_process_memory():
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    return mem_info.rss

[docs]class GmmviRunner: """ This class runs :py:class:`GMMVI<gmmvi.optimization.gmmvi.GMMVI>`, but also evaluates learning metrics and provides logging functionality. Parameters: config: dict A dictionary containing the hyperparameters and environment specifications. log_metrics_interval: int metrics that take non-negligible overhead are evaluated ever *log_metrics_interval* iterations. """ def __init__(self, config, log_metrics_interval): if "seed" not in config.keys(): config["seed"] = config["start_seed"] tf.keras.utils.set_random_seed(config["seed"]) self.wall_times = [] self.config = config self.log_metrics_interval = log_metrics_interval target_distribution, initial_model = init_experiment(self.config) self.gmmvi = GMMVI.build_from_config(self.config, target_distribution, initial_model) if "mmd_evaluation_config" in config.keys(): print("building graph for MMD evaluation... (this may take a while)") dir_path = os.path.dirname(os.path.realpath(__file__)) samples = np.load(os.path.join(dir_path, config['mmd_evaluation_config']['sample_dir'])) self.mmd = MMD(samples, config['mmd_evaluation_config']["alpha"]) # Ensure that the TF graph for compute_MMD() is built during initialization: mmd = self.mmd.compute_MMD(self.gmmvi.model.sample(2000)[0]) print("done") else: self.mmd = None if "dump_gmm_path" not in self.config: self.dump_gmms = False else: self.dump_gmms = True self.dump_gmm_path = os.path.join(self.config["dump_gmm_path"], str(time())) os.makedirs(self.dump_gmm_path, exist_ok=True)
[docs] @staticmethod def build_from_config(config: dict): """Create a :py:class:`GMMVI<gmmvi.gmmvi_runner.GmmviRunner>` instance from a configuration dictionary. This static method provides a convenient way to create a :py:class:`GMMVI<gmmvi.gmmvi_runner.GmmviRunner>` instance, based on a dictionary containing the types and parameters of the :py:mod:`GMMVI modules<gmmvi.optimization.gmmvi_modules>`. Parameters: config: dict The dictionary should contain for each :py:mod:`GMMVI module<gmmvi.optimization.gmmvi_modules>` an entry of the form XXX_type (a string) and XXX_config (a dict) for specifying the type of each module, and the module-specific hyperparameters. For example, the dictionary could contain sample_selector_type={"component-based"} and sample_selector_config={"desired_samples_per_component": 100, "ratio_reused_samples_to_desired": 2.}. Refer to the example yml-configs, or to the individual GMMVI module for the expected parameters, and type-strings. """ return GmmviRunner(config=config, **config['gmmvi_runner_config'])
[docs] @tf.function def get_samples_and_entropy(self, num_samples): """ Draws *num_samples* from the model and uses them to estimate the model's entropy. Parameters: num_samples: int Number of samples to be drawn Returns: tuple(tf.Tensor, float): **test_samples** - The drawn samples **entropy** - MC estimate of the entropy """ test_samples = self.gmmvi.model.sample(num_samples)[0] entropy = -tf.reduce_mean(self.gmmvi.model.log_density(test_samples)) return test_samples, entropy
[docs] def get_cheap_metrics(self): """ Returns a dictionary of 'cheap' metrics, e.g. the current number of components, that we can obtain after every iteration without adding computational overhead. Returns: dict: A dictionary containing cheap metrics. """ cheap_metrics = dict() num_samples = self.gmmvi.sample_db.num_samples_written.numpy() cheap_metrics.update({"num_samples": num_samples, "num_components": self.gmmvi.model.num_components, "max_weight": tf.reduce_max(self.gmmvi.model.weights), "num_db_samples": tf.shape(self.gmmvi.sample_db.samples)[0], "num_db_components": tf.shape(self.gmmvi.sample_db.means)[0], }) return cheap_metrics
[docs] def get_expensive_metrics(self): """ Computes 'expensive' metrics, such as plots, test-set evaluations, etc. Some of these metrics can be task-specific (see :py:meth:`LNPDF.expensive_metrics()<gmmvi.experiments.target_distributions.lnpdf.LNPDF.expensive_metrics>`). Returns: dict: A dictionary containing expensive metrics. """ expensive_metrics = dict() test_samples, entropy = self.get_samples_and_entropy(2000) mean_reward = tf.reduce_mean(self.gmmvi.sample_selector.target_uld(test_samples)) elbo = mean_reward + self.gmmvi.temperature * entropy expensive_metrics.update({"-elbo": -elbo, "entropy": entropy, "target_density": mean_reward, "algo_time": np.sum(self.wall_times)}) expensive_metrics.update(self.gmmvi.sample_selector.target_distribution.expensive_metrics(self.gmmvi.model, test_samples)) if self.mmd is not None: mmd = self.mmd.compute_MMD(test_samples) expensive_metrics.update({"MMD:": mmd}) return expensive_metrics
[docs] def iterate_and_log(self, n: int) -> dict: """ Perform one learning iteration and computes and logs metrics. Parameters: n: int The current iteration Returns: dict: A dictionary containing metrics and plots that we want to log. """ output_dict = {} ts1 = time() self.gmmvi.train_iter() ts2 = time() wall_time = ts2 - ts1 output_dict.update({"walltime": wall_time}) self.wall_times.append(ts2 - ts1) output_dict.update(self.get_cheap_metrics()) # get metrics if n % self.log_metrics_interval == 0: eval_dict = self.get_expensive_metrics() print("Checkpoint {:3d} | FEVALS: {:10d} | avg. sample logpdf: {:05.05f} | ELBO: {:05.05f}".format( n, output_dict["num_samples"], eval_dict["target_density"], -eval_dict["-elbo"])) print(f"{self.gmmvi.model.num_components} components\n") output_dict.update(eval_dict) return output_dict
[docs] def log_to_disk(self, n: int): """ Saves the model parameters to the hard drive Parameters: n: int The current iteration """ if self.dump_gmms: if n < 100 or n % 50 == 0: np.savez(self.dump_gmm_path + '/gmm_dump_' + str("%01d" % n) + '.npz', weights=np.exp(self.gmmvi.model.log_weights.numpy()), means=self.gmmvi.model.means.numpy(), covs=self.gmmvi.model.covs.numpy(), timestamps=time(), fevals=self.gmmvi.sample_db.num_samples_written.numpy())
[docs] def finalize(self): """ Can be called after learning. Saves the final model parameters to the hard drive. """ if self.dump_gmms: np.savez(self.dump_gmm_path + '/final_gmm_dump.npz', weights=self.gmmvi.model.weights.numpy(), means=self.gmmvi.model.means.numpy(), covs=self.gmmvi.model.covs.numpy(), timestamps=time(), fevals=self.gmmvi.sample_db.num_samples_written.numpy())