All the experiments in this notebook are made for e_{f}=1 and e_{cf}=0. To change that, change the values of label_real_test and label_cf_test, and in the cells bellow, exchange y_data_1_test and y_data_0_test

In [23]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sbs
import tensorflow as tf
from sklearn.metrics import accuracy_score

from synthetic_data_generators import create_dataset, create_dataset_counterfactuals
import sys
sys.path.append('../..')
from Models import CSAE, CVAE, event_predictor, forecast_model

Synthetic data generation¶

In [26]:
uniform_change = 0.7
scale_param = 0.1
key_step = 20
seq_len = 30

#train and eval data for the model
train_labels, train_data = create_dataset(n = 2000, seq_len=seq_len, key_step=key_step, uniform_change=uniform_change, scale_param=scale_param) 
x_train, y_train = train_data[:,:key_step,:], train_data[:,key_step:,:]
eval_labels, eval_data = create_dataset(n = 500, seq_len=seq_len, key_step=key_step, uniform_change=uniform_change, scale_param=scale_param) 
x_eval, y_eval = eval_data[:,:key_step,:], eval_data[:,key_step:,:]

#test data 
data_0_test, data_1_test = create_dataset_counterfactuals(n = 500, seq_len=seq_len, key_step=key_step, uniform_change=uniform_change, scale_param=scale_param) 
x_data_test, y_data_0_test, _, y_data_1_test = data_0_test[:,:key_step,:], data_0_test[:,key_step:,:], data_1_test[:,:key_step,:], data_1_test[:,key_step:,:]
label_real_test = np.full([len(x_data_test),1], 1.)  #modify 1. or 0. depending on the desired setting 
label_cf_test = np.full([len(x_data_test),1], 0.)  #modify 1. or 0. depending on the desired setting 

Train predictor for effectiveness metric¶

In [ ]:
predictor = event_predictor()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
predictor.compile(optimizer=optimizer, loss=tf.keras.losses.BinaryCrossentropy(), metrics = "accuracy")
predictor.fit(y_train, train_labels, epochs= 300, validation_data = (y_eval, eval_labels), batch_size=32, verbose=0)

CSAE¶

In [6]:
# Training CSAE
latent_dim = 7
Lambda = 0.19
series_size = seq_len - key_step
batch_size = 32
model_sparse = CSAE(seq_len = series_size, latent_dim = latent_dim, feat_dim = 1, hidden_layer_sizes = [100,200], Lambda=Lambda)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model_sparse.compile(optimizer, loss=model_sparse.loss_, metrics=[model_sparse.reconstruction, model_sparse.regularization])
history = model_sparse.fit(x =[train_labels, x_train, y_train], y=y_train, validation_data = [[eval_labels, x_eval, y_eval], y_eval], epochs=200, batch_size=batch_size, verbose=0)
In [17]:
#cf mae and mbe
pred = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))
mae_list = np.mean((np.abs(y_data_0_test- pred)), axis=1)
mae = np.mean(mae_list)
mbe = np.mean(y_data_0_test- pred)
print("MAE: ", mae, "MBE: ", mbe)
MAE:  0.0629630140544818 MBE:  -0.011144542554720596
In [21]:
# total steps and aletered steps metrics
seq_length = 10
dif_rel_tot_csae = []
dif_rel_steps_csae = []
for i in range(3):
    ini_step= 2 +i
    fin_step = 6 +i 
    for j in range(21):
        val = -1 + 0.1*j
        if val != 0:

            # y_data_1_test are the actuals
            actuals = y_data_1_test
            alteration_ = np.concatenate([np.zeros([1,ini_step,1]), np.full(([1,fin_step-ini_step,1]), val), np.zeros([1,seq_length-fin_step,1])], axis=1)
            alteration = np.repeat(alteration_, len(x_data_test), axis=0)
            altered_actuals = actuals + alteration

            cf = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=actuals))
            altered_cf = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=altered_actuals))

            instance_dif = np.sum((altered_cf - cf), axis=1)
            difference_steps_alt = np.sum((altered_cf[:,ini_step:fin_step,:] - cf[:,ini_step:fin_step,:]),axis=1)
            difference_steps_no_alt = np.sum((altered_cf[:,:ini_step,:] - cf[:,:ini_step,:]),axis=1) + np.sum((altered_cf[:,fin_step:,:] - cf[:,fin_step:,:]),axis=1)

            dif_ideal = (fin_step - ini_step)*val
            dif_rel_tot_ = np.mean(instance_dif)/dif_ideal
            dif_rel_steps_ = np.mean(difference_steps_alt)/dif_ideal
            dif_rel_tot_csae.append(dif_rel_tot_)
            dif_rel_steps_csae.append(dif_rel_steps_)

print("Total diference: ", np.mean(dif_rel_tot_csae), "Step diference: ", np.mean(dif_rel_steps_csae))
Total diference:  0.9996108959157944 Step diference:  0.9850414102867481
In [11]:
# composition, reversibility and effectiveness metrics
reconstruction = model_sparse.composition(label_real_test, x_data_test, y_data_1_test, 1)
rec_error = np.mean((np.abs(y_data_1_test- reconstruction)), axis=1)
reversibility = model_sparse.reversibility(label_real_test, label_cf_test, x_data_test, y_data_1_test, 1)
rev_error = np.mean((np.abs(y_data_1_test- reversibility)), axis=1)
cf_estimation = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))
pred_labels = predictor(cf_estimation)
pred_labels_error = np.mean((np.abs(label_cf_test - pred_labels)), axis=1)
pred_labels_binary = list(map(lambda x: 1 if(x>=0.5) else 0 , pred_labels))
acc = accuracy_score(label_cf_test, pred_labels_binary)

mean_rec_error = np.mean(rec_error)
mean_rev_error = np.mean(rev_error)
mean_pred_labels_error = np.mean(pred_labels_error)
mean_eff_error = np.mean(acc)

print("Composition: ", mean_rec_error, "Reversibility: ", mean_rev_error, "Effectiveness: ", mean_eff_error)
Composition:  0.053748786 Reversibility:  0.06969808 Effectiveness:  1.0

CVAE¶

In [ ]:
latent_dim = 3
recon_weight = 90
series_size = seq_len - key_step
batch_size = 32
vae_model = CVAE(seq_len = series_size, latent_dim = latent_dim, feat_dim = 1, hidden_layer_sizes = [100,200], recon_weight=recon_weight)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
vae_model.compile(optimizer, loss=vae_model.loss_, metrics=[vae_model.reconstruction, vae_model.kl])
history = vae_model.fit(x =[train_labels, x_train, y_train], y=y_train, validation_data = [[eval_labels, x_eval, y_eval], y_eval], epochs=250, batch_size=batch_size, verbose=0)
In [16]:
#cf mae and mbe
pred = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))
mae_list = np.mean((np.abs(y_data_0_test- pred)), axis=1)
mae = np.mean(mae_list)
mbe = np.mean(y_data_0_test- pred)
print("MAE: ", mae, "MBE: ", mbe)
MAE:  0.13047234035219277 MBE:  0.05347146836099079
In [17]:
# total steps and aletered steps metrics
seq_length = 10
dif_rel_tot_cvae = []
dif_rel_steps_cvae = []
for i in range(3):
    ini_step= 2 +i
    fin_step = 6 +i 
    for j in range(21):
        val = -1 + 0.1*j
        if val != 0:

            # y_data_1_test are actuals
            actuals = y_data_1_test
            alteration_ = np.concatenate([np.zeros([1,ini_step,1]), np.full(([1,fin_step-ini_step,1]), val), np.zeros([1,seq_length-fin_step,1])], axis=1)
            alteration = np.repeat(alteration_, len(x_data_test), axis=0)
            altered_actuals = actuals + alteration

            cf = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=actuals))
            altered_cf = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=altered_actuals))

            instance_dif = np.sum((altered_cf - cf), axis=1)
            difference_steps_alt = np.sum((altered_cf[:,ini_step:fin_step,:] - cf[:,ini_step:fin_step,:]),axis=1)
            difference_steps_no_alt = np.sum((altered_cf[:,:ini_step,:] - cf[:,:ini_step,:]),axis=1) + np.sum((altered_cf[:,fin_step:,:] - cf[:,fin_step:,:]),axis=1)

            dif_ideal = (fin_step - ini_step)*val
            dif_rel_tot_ = np.mean(instance_dif)/dif_ideal
            dif_rel_steps_ = np.mean(difference_steps_alt)/dif_ideal
            dif_rel_tot_cvae.append(dif_rel_tot_)
            dif_rel_steps_cvae.append(dif_rel_steps_)

print("Total diference: ", np.mean(dif_rel_tot_cvae), "Step diference: ", np.mean(dif_rel_steps_cvae))
Total diference:  0.4586754665958877 Step diference:  0.3496101022130652
In [18]:
# composition, reversibility and effectiveness metrics
reconstruction = vae_model.composition(label_real_test, x_data_test, y_data_1_test, 1)
rec_error = np.mean((np.abs(y_data_1_test- reconstruction)), axis=1)
reversibility = vae_model.reversibility(label_real_test, label_cf_test, x_data_test, y_data_1_test, 1)
rev_error = np.mean((np.abs(y_data_1_test- reversibility)), axis=1)
cf_estimation = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))
pred_labels = predictor(cf_estimation)
pred_labels_error = np.mean((np.abs(label_cf_test - pred_labels)), axis=1)
pred_labels_binary = list(map(lambda x: 1 if(x>=0.5) else 0 , pred_labels))
acc = accuracy_score(label_cf_test, pred_labels_binary)

mean_rec_error = np.mean(rec_error)
mean_rev_error = np.mean(rev_error)
mean_pred_labels_error = np.mean(pred_labels_error)
mean_eff_error = np.mean(acc)

print("Composition: ", mean_rec_error, "Reversibility: ", mean_rev_error, "Effectiveness: ", mean_eff_error)
Composition:  0.11366185 Reversibility:  0.12815796 Effectiveness:  1.0

Forecast¶

In [27]:
model_forecast = forecast_model(pred_steps=10)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model_forecast.compile(optimizer=optimizer, loss="mse", metrics = "mae")
model_forecast.fit([train_labels, x_train], y_train, epochs= 500, validation_data = ([eval_labels, x_eval], y_eval), batch_size=32, verbose=0)
Out[27]:
<keras.src.callbacks.History at 0x7fd95ffb3a00>
In [31]:
#cf mae and mbe
pred = model_forecast([label_cf_test, x_data_test])
mae = np.mean(np.abs(y_data_0_test.reshape(-1,10) - pred))
mbe = np.mean(y_data_0_test.reshape(-1,10) - pred)

print("MAE: ", mae, "MBE: ", mbe)
MAE:  0.20356284 MBE:  0.015091702