All the experiments in this notebook are made for e_{f}=1 and e_{cf}=0. To change that, change the values of label_real_test and label_cf_test, the parameter eval123_test_event in the preprocessing() function and, in the cells, exchange y_data_1_test and y_data_0_test
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sbs
import tensorflow as tf
import sys
sys.path.append('../..')
from Models import CSAE, CVAE, event_predictor, forecast_model
import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
dir = "./rosseman_data/train.csv"
df_train = pd.read_csv(dir)
list_counts = []
df_train_ = df_train.copy()
df_train_["Date"] = df_train_["Date"].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d'))
x_series = []
y_series = []
dates = [datetime.datetime(2013, 3, 4, 0, 0), datetime.datetime(2014, 3, 3, 0, 0) ,datetime.datetime(2015, 3, 2, 0, 0)]
for store in df_train_["Store"].unique():
df_ = df_train_[df_train_["Store"]==store]
for date in dates:
initial = date - datetime.timedelta(days=28)
final = date + datetime.timedelta(days=21)
candidate_x = list(df_[(df_["Date"] >= initial) & (df_["Date"] < date)]["Sales"])
candidate_y = list(df_[(df_["Date"] >= date) & (df_["Date"] < final)]["Sales"])
candidate_x.reverse()
candidate_y.reverse()
x_series.append(list(candidate_x))
y_series.append(list(candidate_y))
#normalize
mean = np.mean(x_series, axis=1)
x_ = []
y_ = []
for i in range(len(x_series)):
if mean[i] != 0.:
x_.append(x_series[i]/mean[i])
y_.append(y_series[i]/mean[i])
def preprocessing(input_x=x_, input_y=y_, eval_test_event=1., seed=None):
if seed:
train_x, eval_test_x, train_y, eval_test_y = train_test_split(input_x, input_y, eval_size=0.2, random_state=seed)
eval_x, test_x, eval_y, test_y = train_test_split(eval_test_x, eval_test_y, eval_size=0.5, random_state=seed)
train_x_0, train_x_1, train_y_0, train_y_1 = train_test_split(train_x, train_y, eval_size=0.5, random_state=seed)
else:
train_x, eval_test_x, train_y, eval_test_y = train_test_split(input_x, input_y, eval_size=0.2)
eval_x, test_x, eval_y, test_y = train_test_split(eval_test_x, eval_test_y, eval_size=0.5)
train_x_0, train_x_1, train_y_0, train_y_1 = train_test_split(train_x, train_y, eval_size=0.5)
train_x_0 = np.array(train_x_0)
train_x_1 = np.array(train_x_1)
train_y_0 = np.array(train_y_0)
train_y_1 = np.array(train_y_1)
eval_x = np.array(eval_x)
eval_y = np.array(eval_y)
eval_y_event = eval_y.copy()
test_x = np.array(test_x)
test_y = np.array(test_y)
test_y_event = test_y.copy()
#alternative intervention
train_y_1[:,0], train_y_1[:,1], train_y_1[:,2:] = train_y_1[:,0]*1.1, train_y_1[:,1]*1.2, train_y_1[:,2:]*1.3
eval_y_event[:,0], eval_y_event[:,1], eval_y_event[:,2:] = eval_y_event[:,0]*1.1, eval_y_event[:,1]*1.2, eval_y_event[:,2:]*1.3
test_y_event[:,0], test_y_event[:,1], test_y_event[:,2:] = test_y_event[:,0]*1.1, test_y_event[:,1]*1.2, test_y_event[:,2:]*1.3
x_train = np.concatenate([train_x_0, train_x_1]).reshape(-1,28,1)
y_train = np.concatenate([train_y_0, train_y_1]).reshape(-1,21,1)
label_0_train = np.full((len(train_x_0),1), 0.)
label_1_train = np.full((len(train_x_1),1), 1.)
train_labels = np.concatenate([label_0_train, label_1_train])
#eval data
x_data_eval = eval_x.reshape(-1,28,1)
y_data_0_eval = eval_y.reshape(-1,21,1)
y_data_1_eval = eval_y_event.reshape(-1,21,1)
eval_labels = np.full((len(x_data_eval),1), eval_test_event)
#test data
x_data_test = eval_x.reshape(-1,28,1)
y_data_0_test = eval_y.reshape(-1,21,1)
y_data_1_test = eval_y_event.reshape(-1,21,1)
test_labels = np.full((len(x_data_test),1), eval_test_event)
return x_train, y_train, train_labels, x_data_eval, y_data_0_eval, y_data_1_eval, eval_labels, x_data_test, y_data_0_test, y_data_1_test, test_labels
x_train, y_train, train_labels, x_data_eval, y_data_0_eval, y_data_1_eval, eval_labels, x_data_test, y_data_0_test, y_data_1_test, test_labels = preprocessing(eval_test_event=1.)
#eval data for the counterfactual pipeline
#choose among data_0 or data_1, in the x_data, y_data and y_data_cf parameters to testuate TS with nevative or positive event. Modify also 1. or 0. in label_real and label_cf
label_real = eval_labels
label_cf = np.full_like(eval_labels, 0.) #modify 1. or 0.
#change y_data and y_data_cf if eval event = 0./1.
#test data
label_real_test = test_labels
label_cf_test = np.full_like(test_labels, 0.) #modify 1. or 0.
predictor = event_predictor()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
predictor.compile(optimizer=optimizer, loss=tf.keras.losses.BinaryCrossentropy(), metrics = "accuracy")
predictor.fit(y_train, train_labels, epochs= 200, batch_size=32, verbose=0)
<keras.src.callbacks.History at 0x7fc81e0eceb0>
latent_dim = 8
Lambda = 0.15
batch_size = 32
series_size=21
model_sparse = CSAE(seq_len = series_size, latent_dim = latent_dim, feat_dim = 1, hidden_layer_sizes = [100,200], Lambda=Lambda)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model_sparse.compile(optimizer, loss=model_sparse.loss_, metrics=[model_sparse.reconstruction, model_sparse.regularization])
history = model_sparse.fit(x =[train_labels, x_train, y_train], y=y_train, validation_data = [[eval_labels, x_data_eval, y_data_1_eval], y_data_1_eval], epochs=250, batch_size=batch_size, verbose=0)
#cf mae and mbe
pred = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))
mae_list = np.mean((np.abs(y_data_0_test- pred)), axis=1)
mae = np.mean(mae_list)
mbe = np.mean(y_data_0_test- pred)
print("MAE: ", mae, "MBE: ", mbe)
MAE: 0.058011309491545815 MBE: 0.0062942160193514085
# total steps and aletered steps metrics
seq_length = 21
dif_rel_tot_csae = []
dif_rel_steps_csae = []
for i in range(12):
ini_step= 4 +i
fin_step = 8 +i
for j in range(21):
val = -1 + 0.1*j
if val != 0:
# y_data_1_test are the actuals
actuals = y_data_1_test
alteration_ = np.concatenate([np.zeros([1,ini_step,1]), np.full(([1,fin_step-ini_step,1]), val), np.zeros([1,seq_length-fin_step,1])], axis=1)
alteration = np.repeat(alteration_, len(x_data_test), axis=0)
altered_actuals = actuals + alteration
cf = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=actuals))
altered_cf = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=altered_actuals))
instance_dif = np.sum((altered_cf - cf), axis=1)
difference_steps_alt = np.sum((altered_cf[:,ini_step:fin_step,:] - cf[:,ini_step:fin_step,:]),axis=1)
difference_steps_no_alt = np.sum((altered_cf[:,:ini_step,:] - cf[:,:ini_step,:]),axis=1) + np.sum((altered_cf[:,fin_step:,:] - cf[:,fin_step:,:]),axis=1)
dif_ideal = (fin_step - ini_step)*val
dif_rel_tot_ = np.mean(instance_dif)/dif_ideal
dif_rel_steps_ = np.mean(difference_steps_alt)/dif_ideal
dif_rel_tot_csae.append(dif_rel_tot_)
dif_rel_steps_csae.append(dif_rel_steps_)
print("Total diference: ", np.mean(dif_rel_tot_csae), "Step diference: ", np.mean(dif_rel_steps_csae))
Total diference: 0.7236494183289035 Step diference: 0.4944469327853608
# composition, reversibility and effectiveness metrics
reconstruction = model_sparse.composition(label_real_test, x_data_test, y_data_1_test, 1)
rec_error = np.mean((np.abs(y_data_1_test- reconstruction)), axis=1)
reversibility = model_sparse.reversibility(label_real_test, label_cf_test, x_data_test, y_data_1_test, 1)
rev_error = np.mean((np.abs(y_data_1_test- reversibility)), axis=1)
cf_estimation = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))
pred_labels = predictor(cf_estimation)
pred_labels_error = np.mean((np.abs(label_cf_test - pred_labels)), axis=1)
pred_labels_binary = list(map(lambda x: 1 if(x>=0.5) else 0 , pred_labels))
acc = accuracy_score(label_cf_test, pred_labels_binary)
mean_rec_error = np.mean(rec_error)
mean_rev_error = np.mean(rev_error)
mean_pred_labels_error = np.mean(pred_labels_error)
mean_eff_error = np.mean(acc)
print("Composition: ", mean_rec_error, "Reversibility: ", mean_rev_error, "Effectiveness: ", mean_eff_error)
Composition: 0.057749063 Reversibility: 0.064619824 Effectiveness: 0.9850299401197605
latent_dim = 6
recon_weight = 200
batch_size = 32
series_size=21
vae_model = CVAE(seq_len = series_size, latent_dim = latent_dim, feat_dim = 1, hidden_layer_sizes = [100,200], recon_weight=recon_weight)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
vae_model.compile(optimizer, loss=vae_model.loss_, metrics=[vae_model.reconstruction, vae_model.kl])
history = vae_model.fit(x =[train_labels, x_train, y_train], y=y_train, validation_data = [[eval_labels, x_data_eval, y_data_1_eval], y_data_1_eval], epochs=250, batch_size=batch_size, verbose=0)
#cf mae and mbe
pred = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))
mae_list = np.mean((np.abs(y_data_0_test- pred)), axis=1)
mae = np.mean(mae_list)
mbe = np.mean(y_data_0_test- pred)
std = np.std(mae_list)
print("MAE: ", mae, "MBE: ", mbe)
MAE: 0.0831350622519281 MBE: -0.010847777158230846
# total steps and aletered steps metrics
seq_length = 21
dif_rel_tot_csae = []
dif_rel_steps_csae = []
for i in range(12):
ini_step= 4 +i
fin_step = 8 +i
for j in range(21):
val = -1 + 0.1*j
if val != 0:
# y_data_1_test are the actuals
actuals = y_data_1_test
alteration_ = np.concatenate([np.zeros([1,ini_step,1]), np.full(([1,fin_step-ini_step,1]), val), np.zeros([1,seq_length-fin_step,1])], axis=1)
alteration = np.repeat(alteration_, len(x_data_test), axis=0)
altered_actuals = actuals + alteration
cf = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=actuals))
altered_cf = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=altered_actuals))
instance_dif = np.sum((altered_cf - cf), axis=1)
difference_steps_alt = np.sum((altered_cf[:,ini_step:fin_step,:] - cf[:,ini_step:fin_step,:]),axis=1)
difference_steps_no_alt = np.sum((altered_cf[:,:ini_step,:] - cf[:,:ini_step,:]),axis=1) + np.sum((altered_cf[:,fin_step:,:] - cf[:,fin_step:,:]),axis=1)
dif_ideal = (fin_step - ini_step)*val
dif_rel_tot_ = np.mean(instance_dif)/dif_ideal
dif_rel_steps_ = np.mean(difference_steps_alt)/dif_ideal
dif_rel_tot_csae.append(dif_rel_tot_)
dif_rel_steps_csae.append(dif_rel_steps_)
print("Total diference: ", np.mean(dif_rel_tot_csae), "Step diference: ", np.mean(dif_rel_steps_csae))
Total diference: -0.024628333956686562 Step diference: 0.09543485683194983
# composition, reversibility and effectiveness metrics
reconstruction = vae_model.composition(label_real_test, x_data_test, y_data_1_test, 1)
rec_error = np.mean((np.abs(y_data_1_test- reconstruction)), axis=1)
reversibility = vae_model.reversibility(label_real_test, label_cf_test, x_data_test, y_data_1_test, 1)
rev_error = np.mean((np.abs(y_data_1_test- reversibility)), axis=1)
cf_estimation = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))
pred_labels = predictor(cf_estimation)
pred_labels_error = np.mean((np.abs(label_cf_test - pred_labels)), axis=1)
pred_labels_binary = list(map(lambda x: 1 if(x>=0.5) else 0 , pred_labels))
acc = accuracy_score(label_cf_test, pred_labels_binary)
mean_rec_error = np.mean(rec_error)
mean_rev_error = np.mean(rev_error)
mean_pred_labels_error = np.mean(pred_labels_error)
mean_pred_labels_accuracy = np.mean(acc)
print("Composition: ", mean_rec_error, "Reversibility: ", mean_rev_error, "Effectiveness: ", mean_eff_error)
Composition: 0.10214457 Reversibility: 0.10964968 Effectiveness: 0.9850299401197605
model_forecast = forecast_model(pred_steps=21)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model_forecast.compile(optimizer=optimizer, loss="mse", metrics = "mae")
model_forecast.fit([train_labels, x_train], y_train, epochs= 500, validation_data = ([np.full([len(x_data_eval),1], 0.), x_data_eval], y_data_0_eval), batch_size=32, verbose=0)
<keras.src.callbacks.History at 0x7fc81c980400>
#cf mae and mbe
mae = np.mean(np.abs(y_data_0_test - pred))
mbe = np.mean(y_data_0_test - pred)
print("MAE: ", mae, "MBE: ", mbe)
MAE: 0.08313506225192811 MBE: -0.010847777158230846