%Cascaded tanks derivatives Gaussian Process 5-fold verification
%Models of the five folds are saved and loaded in CasTanks_GP_RK4Int for the evaluation of
%the integrated version of this model

%Load the data sets, set up vectors of inputs
S = load('Tank1.mat');
S2 = load('Tank2.mat');
u1 = S.u';
y1 = S.y';
u2 = S2.u';
y2 = S2.y';
h11=y1(:,1);
h12=y1(:,2);
h21=y2(:,1);
h22=y2(:,2);

%Create vectors of time values corresponding to datapoints
t1=1:1:length(u1);
t1=t1*5;
t2=1:1:length(u2);
t2 = t2*4;

%Set up the first set of hyperparameters for the Gaussian Process
varh1=1*10^(-3);
varh2=1*10^(-3);
scaleh1 = abs(mean(h21)+mean(h11))/2;
scaleh2 = abs(mean(h22)+mean(h12))/2;

a = 1000;
atau = 4;

bh1 = varh1*(a+1);
bh2 = varh2*(a+1);


%Calculation of the derivatives:
derivh11 = center_difference(h11,t1);
derivh12 = center_difference(h12,t1);
derivh21 = center_difference(h21,t2);
derivh22 = center_difference(h22,t2);
%Smoothing of the calculated derivatives and forcing function input:
smderivh11 = smooth(derivh11,'lowess');
smderivh12 = smooth(derivh12,'lowess');
smderivh21 = smooth(derivh21,'lowess');
smderivh22 = smooth(derivh22,'lowess');
smu1 = smooth(u1);
smu2 = smooth(u2);

%Concatenation of the two datasets together (later be partitioned into test/train folds)
smderivh1 = [smderivh11;smderivh21];
smderivh2 = [smderivh12;smderivh22];
smu = [smu1;smu2];
tot_u = [u1;u2];
h1 = [h11;h21];
h2 = [h12;h22];
u = [u1;u2];

%Inputs for the models are: h1, h2, u smoothed for dh1/dt, h1, h2, u for dh2/dt:
inputs_h1=[h1,h2,smu];
inputs_h2=[h1,h2,u];

%Set up the remaining hyperparameters, load the spline coefficients
scalederh1 = abs(mean(smderivh1));
scalederh2 = abs(mean(smderivh2));
tauh1 = sqrt(scalederh1/varh1^2);
tauh2 = sqrt(scalederh2/varh2^2);

btauh1 = tauh1*(atau+1);
btauh2 = tauh2*(atau+1);

x = dlmread('spline_coefficient_500.txt');
phis = splineconvert500(x);

draws=2000;
relats = [1;1;1;1;1;1];

%Initialize measurement values
MAE_h1 = zeros(5,1);
MAE_h2 = zeros(5,1);

T_Train = zeros(5,1);
T_Predict = zeros(5,1);

%Set up cells to save the models and test data to be verified for the integration problem
matrixh1 = cell(5,1);
matrixh2 = cell(5,1);
betash1 = cell(5,1);
betash2 = cell(5,1);
norms = cell(5,1);

%Partition of data for ordered 5-fold verification (have five groups of 2000 datapoints):
for i=1:5
    test_inputs_h1 = inputs_h1(1+2000*(i-1):2000*i,:);
    test_inputs_h2 = inputs_h2(1+2000*(i-1):2000*i,:);
    test_derivh1 = smderivh1(1+2000*(i-1):2000*i,:);
    test_derivh2 = smderivh2(1+2000*(i-1):2000*i,:);
    if i == 1
        train_inputs_h1 = inputs_h1(2001:end,:);
        train_inputs_h2 = inputs_h2(2001:end,:);
        train_derivh1 = smderivh1(2001:end,:);
        train_derivh2 = smderivh2(2001:end,:);
    else
        if i == 5
            train_inputs_h1 = inputs_h1(1:8000,:);
            train_inputs_h2 = inputs_h2(1:8000,:);
            train_derivh1 = smderivh1(1:8000,:);
            train_derivh2 = smderivh2(1:8000,:);
        else
            train_inputs_h1 = [inputs_h1(1:2000*(i-1),:);inputs_h1(1+2000*i:end,:)];
            train_inputs_h2 = [inputs_h2(1:2000*(i-1),:);inputs_h2(1+2000*i:end,:)];
            train_derivh1 = [smderivh1(1:2000*(i-1),:);smderivh1(1+2000*i:end,:)];
            train_derivh2 = [smderivh2(1:2000*(i-1),:);smderivh2(1+2000*i:end,:)];
        end
    end
    %Calculate the minimums and maximums of the inputs in the current
    %training dataset to normalize the training and testing sets
    maxim_h1= [max(train_inputs_h1(:,3)),max(train_inputs_h1(:,1)),max(train_inputs_h1(:,2))];
    minim_h1 = [min(train_inputs_h1(:,3)),min(train_inputs_h1(:,1)),min(train_inputs_h1(:,2))];
    norms_h1 = [minim_h1;maxim_h1];
    maxim_h2= [max(train_inputs_h2(:,3)),max(train_inputs_h2(:,1)),max(train_inputs_h2(:,2))];
    minim_h2 = [min(train_inputs_h2(:,3)),min(train_inputs_h2(:,1)),min(train_inputs_h2(:,2))];
    norms_h2 = [minim_h1;maxim_h1];
    %Set up the norms to be saved and reused in the integration model
    norms{i} = norms_h1;
    %Normalize the inputs:
    ntrain_inputs_h1 = [normalize(train_inputs_h1(:,1),minim_h1(2),maxim_h1(2)),normalize(train_inputs_h1(:,2),minim_h1(3),maxim_h1(3)),normalize(train_inputs_h1(:,3),minim_h1(1),maxim_h1(1))];
    ntrain_inputs_h2 = [normalize(train_inputs_h2(:,1),minim_h2(2),maxim_h2(2)),normalize(train_inputs_h2(:,2),minim_h2(3),maxim_h2(3)),normalize(train_inputs_h2(:,3),minim_h2(1),maxim_h2(1))];
    ntest_inputs_h1 = [normalize(test_inputs_h1(:,1),minim_h1(2),maxim_h1(2)),normalize(test_inputs_h1(:,2),minim_h1(3),maxim_h1(3)),normalize(test_inputs_h1(:,3),minim_h1(1),maxim_h1(1))];
    ntest_inputs_h2 = [normalize(test_inputs_h2(:,1),minim_h2(2),maxim_h2(2)),normalize(test_inputs_h2(:,2),minim_h2(3),maxim_h2(3)),normalize(test_inputs_h2(:,3),minim_h2(1),maxim_h2(1))];


    %Build the model (and measure the time it takes):
    T_TrainS = tic;
    [over_betash1,matrixh1{i},evsh1] = emulator_Xin(varh1, ntrain_inputs_h1, train_derivh1, relats, phis, a, bh1, atau, btauh1, 3, draws, 0, 1,1);
    [over_betash2,matrixh2{i},evsh2] = emulator_Xin(varh2, ntrain_inputs_h2, train_derivh2, relats, phis, a, bh2, atau, btauh2, 5, draws, 0, 1,1);
    T_Train(i) = toc(T_TrainS);

    %Throw out the first 1000 draws:
    act_draws = ceil(draws - 1000);
    betash1{i} = over_betash1(act_draws:end,:);
    betash2{i} = over_betash2(act_draws:end,:);

    %Use the mean model to make predictions + calculate the errors
    mean_betash1 = mean(betash1{i});
    mean_betash2 = mean(betash2{i});

    %Test the model (and measure the time it takes):
    T_PredictS = tic;
    h1_model = bss_eval(ntest_inputs_h1, mean_betash1, phis, matrixh1{i});
    h2_model = bss_eval(ntest_inputs_h2, mean_betash2, phis, matrixh2{i});
    T_Predict(i) = toc(T_PredictS);

    %Calculate the MAE for each fold
    for l=1:length(test_derivh1)
        MAE_h1(i) = MAE_h1(i) + abs(h1_model(l)-test_derivh1(l));
        MAE_h2(i) = MAE_h2(i) + abs(h2_model(l)-test_derivh2(l));
    end

    MAE_h1(i) = MAE_h1(i)/(length(test_derivh1));
    MAE_h2(i) = MAE_h2(i)/(length(test_derivh1));
end

%Average value of the MAEs over all five folds:
MAE_h1_tot = mean(MAE_h1)
MAE_h2_tot = mean(MAE_h2)

%Average times for model building and predictions for all five folds:
Mean_Train_Time = mean(T_Train)
Mean_Predict_Time = mean(T_Predict)

%Standard deviation of the MAE over the five folds:
MAE_h1_stdev = (sum((MAE_h1-MAE_h1_tot).^2)/5)^0.5
MAE_h2_stdev = (sum((MAE_h2-MAE_h2_tot).^2)/5)^0.5
