In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
import evaluation.evaluate as evaluate
import matplotlib.pyplot as plt
import math_utils.matrix_utils as matrix_utils
import models.regression_models as reg_models
import models.PLDS as PLDS
import numpy as np
import os
import PGLDSID.PGLDSID as PGLDSID
import PGLDSID.SDP_optimize as SDP_optimize
import visualization.visualization as viz_utils
In [3]:
data = np.load(os.path.join(os.getcwd(), 'sample_data', 'sample_sys.npz'), allow_pickle=True)
In [4]:
### Prepare the data.
Y, Z, params = data['Y'], data['Z'], data['params'][()]
Ytrain, Ztrain = Y[:int(1e6), :].astype('float64'), Z[:int(1e6), :]
Ytest, Ztest = Y[-int(1e6):, :].astype('float64'), Z[-int(1e6):, :]

##### Without loss of generality, demean the Gaussian observations.
Zmean = np.mean(Ztrain, axis=0, keepdims=True)
Ztrain -= Zmean
Ztest -= Zmean
In [5]:
n1 = nx = 4
i = 10 # horizon

### PG-LDS-ID Stage 1 only.
pgldsid_params = PGLDSID.PGLDSID(i, nx, Ytrain.T, Z=Ztrain.T, n1=n1, Z_horizon=i)
pgldsid_params = SDP_optimize.optimize(pgldsid_params)

### PLDSID.
pldsid_params = PGLDSID.PGLDSID(i, nx, Ytrain.T, n1=0)
pldsid_params = SDP_optimize.optimize(pldsid_params)
The optimal value is 46.354467039999236 with status: optimal
The optimal value is 6.152844128904793 with status: optimal
In [6]:
### Extracting shared and disjoint modes in Y, as well as the unencoded (i.e., disjoint) modes in Z.
shared_eigs = np.linalg.eigvals(matrix_utils.extract_block(params['A'], params['rev_statesz']))
disjoint_dims = np.delete(params['rev_statesy'], params['rev_statesz'])
y_only_eigs = np.linalg.eigvals(matrix_utils.extract_block(params['A'], disjoint_dims))
z_only_eigs = np.linalg.eigvals(params['eps_sysz']['A'])

pgldsid_eigs = np.linalg.eigvals(pgldsid_params['A'])
pldsid_eigs = np.linalg.eigvals(pldsid_params['A'])
In [7]:
### Visualizing the identified modes by both learning algorithms vs. ground truth modes.
colors = { # Colors for visualizing the modes.
  'shared': '#39b54a', 'y only': '#e5542c', 'z only': '#0071bd', 'pgldsid': '#39b54a', 'pldsid': '#e5542c'}

viz_utils.plot_eigenvalues(
  [shared_eigs, y_only_eigs, z_only_eigs, pgldsid_eigs, pldsid_eigs],
  'n1=nx=4', labels=['shared', 'y only', 'z only', 'pgldsid', 'pldsid'],
   markers=['o', 'o', 'o', 'x', 'x'],
   edgecolors=[colors['shared'], colors['y only'], colors['z only'], None, None],
   facecolors=['None', 'None', 'None', colors['pgldsid'], colors['pldsid']],
   alpha_vals=[0.4, 0.4, 0.4, 0.8, 0.8],
   bound_lims_to_circle=False, show_legend=True, legend_location='upper left', fig=None, ax=None)
plt.xlim([0.8, 1.0])
plt.ylim([-0.35, 0.35])
plt.show()
In [8]:
### Demonstarting decoding performance. This block will take some time to run as we're testing on 1e6 contiguous
### samples and the point-process filter runs recursively.

#### PG-LDS-ID
pgldsid_sys = PLDS.update_missing_params_and_construct_PLDS(pgldsid_params)

other_out = {'Z': pgldsid_params['Cz']}
pgldsid_y_pred, _, other_est = pgldsid_sys.predict(Ytest, other_out)
pgldsid_z_pred = other_est['Z']
pgldsid_z_evalres = evaluate.evaluate_results(Ztest, pgldsid_z_pred, ['CC'])['CC']

#### Handle prediction covariances to compute AUC.
Pp = other_est.get('Pp', None) # Prediction error covariance.
assert (Pp is not None)
log_rate_covariances = Pp @ pgldsid_params['C'].T # (time, nx, nr)
log_rate_covariances = np.transpose(log_rate_covariances, (0, 2, 1)) @ pgldsid_params['C'].T # (time, nr, nr)
pgldsid_y_evalres = evaluate.evaluate_results(
                      Ytest, pgldsid_y_pred, ['AUC'], predicted_covariances=log_rate_covariances)['AUC']
In [9]:
#### PLDSID
pldsid_sys = PLDS.update_missing_params_and_construct_PLDS(pldsid_params)

##### First fit Cz.
_, x_pred, _ = pldsid_sys.predict(Ytrain, {}) # predict xTrain
reg_kwargs = {'fit_intercept': True}
regression = reg_models.RegressionModel(reg_models.RegressionMethod.OLS_REG)
## Must be (num_samples, num_features)
regression.fit(x_pred, Ztrain, **reg_kwargs)
reg_weights, reg_intercept = regression.weights()
pldsid_params['Cz'] = reg_weights

##### Now predict test data.
other_out = {'Z': pldsid_params['Cz']}
pldsid_y_pred, _, other_est = pldsid_sys.predict(Ytest, other_out)
pldsid_z_pred = other_est['Z']

pldsid_z_evalres = evaluate.evaluate_results(Ztest, pldsid_z_pred, ['CC'])['CC']

#### Handle prediction covariances to compute AUC.
Pp = other_est.get('Pp', None) # Prediction error covariance.
assert (Pp is not None)
log_rate_covariances = Pp @ pldsid_params['C'].T # (time, nx, nr)
log_rate_covariances = np.transpose(log_rate_covariances, (0, 2, 1)) @ pldsid_params['C'].T # (time, nr, nr)
pldsid_y_evalres = evaluate.evaluate_results(
                    Ytest, pldsid_y_pred, ['AUC'], predicted_covariances=log_rate_covariances)['AUC']
In [10]:
print(f'PG-LDS-ID Z prediction CC {pgldsid_z_evalres.mean()}, PLDSID Z prediction CC {pldsid_z_evalres.mean()}')
print(f'PG-LDS-ID Y prediction AUC {pgldsid_y_evalres.mean()}, PLDSID Y prediction AUC {pldsid_y_evalres.mean()}')
PG-LDS-ID Z prediction CC 0.46216264610684094, PLDSID Z prediction CC 0.4096154663324887
PG-LDS-ID Y prediction AUC 0.5447589112937984, PLDSID Y prediction AUC 0.5592364736580343

Including stage 2 and using the true shared and disjoint latent state dimensions¶

In [11]:
n1 = 4; nx = 8
i = 10 # horizon

# PG-LDS-ID Stage 1 + 2 only.
pgldsid_params = PGLDSID.PGLDSID(i, nx, Ytrain.T, Z=Ztrain.T, n1=n1, Z_horizon=i)
pgldsid_params = SDP_optimize.optimize(pgldsid_params)

# PLDSID.
pldsid_params = PGLDSID.PGLDSID(i, nx, Ytrain.T, n1=0)
pldsid_params = SDP_optimize.optimize(pldsid_params)
The optimal value is 6.034638926762543 with status: optimal
The optimal value is 2.548239739182338 with status: optimal
In [12]:
### Visualizing the identified modes by both learning algorithms vs. ground truth modes.

pgldsid_eigs = np.linalg.eigvals(pgldsid_params['A'])
pldsid_eigs = np.linalg.eigvals(pldsid_params['A'])

viz_utils.plot_eigenvalues(
  [shared_eigs, y_only_eigs, z_only_eigs, pgldsid_eigs, pldsid_eigs],
  'n1=4, nx=16', labels=['shared', 'y only', 'z only', 'pgldsid', 'pldsid'],
   markers=['o', 'o', 'o', 'x', 'x'],
   edgecolors=[colors['shared'], colors['y only'], colors['z only'], None, None],
   facecolors=['None', 'None', 'None', colors['pgldsid'], colors['pldsid']],
   alpha_vals=[0.4, 0.4, 0.4, 0.8, 0.8],
   bound_lims_to_circle=False, show_legend=True, legend_location='upper left', fig=None, ax=None)
plt.xlim([0.8, 1.0])
plt.ylim([-0.35, 0.35])
plt.show()