%%
clear;clc;
cd('G:\EEG_AV_speech');
subj = 21;
load(['.\subj_' num2str(subj) '\subj_' num2str(subj) '_data.mat'])
fs = 128;

% figure;for ch =1:64;subplot(8,8,ch),plot(record(1:fs*3600,ch));ylim([-(1e+4) 1e+4]);title(ch);end
%%
b_pass=fir1(fs*6,2*[1 30]/fs,'bandpass');% band-pass filter
bad{1} = [28+32 20+32 32];
bad{2} = [32 28+32];
bad{3} = [32];
bad{4} = [32];
bad{5} = [32];
bad{6} = [32];
bad{7} = [32 25+32];
bad{8} = [32];
bad{9} = [13,28,24+32,25+32,31+32,32+32];
bad{10} = [15+32,25+32];
bad{11} = [25+32];
bad{12} = [25+32];
bad{13} = [17+32 25+32];
bad{14} = [23 26+32];
bad{15} = [24 23+32];
bad{16} = [30 31+32];
bad{17} = [20];
bad{18} = [29 25+32];
bad{19} = [25+32 31+32];
bad{20} = [30];
bad{21} = [5 30 25+32];

record = [zeros(fs*5,71);record]; 

trig = diff(record(:,end));
pt_start = find(trig == 1)+1;
pt_end = find(trig == -1);
prestim = 0;
% poststim = T;
if subj == 1
   pt_start = pt_start(2:end);
   pt_end = pt_end(2:end);
end
T = (pt_end-pt_start+1)/fs;

% delete rest part
for i = length(T)-1
    record([pt_end(i)+1:pt_start(i+1)-1],:) = 0;
end
record(1:pt_start(1)-1,:) = 0;
record(pt_end(end)+1:end,:) = 0;

% filter
record_filter = filter(b_pass,1,record(:,[1:70]));
record_filter = record_filter([[length(b_pass)-1]/2+1]:end,:); % compensate the latency of FIR filter

%% delete bad segments
clear record_filter_c
record_filter_c = record_filter;
for ch = 1:70
    if ismember(ch,[bad{subj}]) == 0
        clear temp id0 id1 id2 tt_start tt_end tt
        temp = record_filter(:,ch);
        id0 = find(abs(temp)>500);
        if isempty(id0) == 0
            id1 = diff(id0); % diff, not the ordr
            id2 = find(id1>fs*10); % find the order of diff
            tt_start = [id0(1);id0(id2+1)];
            tt_end   = [id0(id2);id0(end)];
            for t = 1:length(tt_start)
               temp(tt_start(t):tt_end(t)) = 0; 
            end
        end
        record_filter_c(:,ch) = temp;
    end
    bad_seg{ch,1} = id0;
end

% test the location of extra channels
figure;
for cc = 65:70
    subplot(3,2,cc-64)
    plot(record_filter_c(2110*128:2120*128,cc))
    ylim([-300 300])
end
clear cc
%% regress eye channels
eyem = [record_filter_c(:,65)-record_filter_c(:,66) record_filter_c(:,69)-record_filter_c(:,70)];
record_eye = record_filter_c(:,1:70)-[[record_filter_c(:,1:70)'/eyem']*eyem']';
record2 = record_eye(:,[1:64]);
record_extr = record_eye(:,[65:end]);

record_raw = record2';
EEG = pop_importdata('dataformat','array','nbchan',64,'data','record_raw','srate',fs,'pnts',0,'xmin',0);
EEG = pop_chanedit(EEG, 'lookup','G:\EEG_AV_speech\BioSemi64.loc','load',...
            {'G:\EEG_AV_speech\BioSemi64.loc','filetype','autodetect'});
EEG = pop_interp(EEG, bad{subj}, 'spherical');
record3(:,1:64) = EEG.data';
save(['.\subj_' num2str(subj) '\subj_' num2str(subj) '_data_pre_eeglab_0.mat'],'record3');
save(['.\subj_' num2str(subj) '\subj_' num2str(subj) '_data_pre_eeglab.mat'],'record_extr','pt_start','pt_end','T');

%% run ICA
clear;
eeglab;
% load record3; comfirm the sampling rate
% run ICA
%%%% delete ICA component manually
subj = 21;
record_ica = EEG.data';
save(['.\subj_' num2str(subj) '\subj_' num2str(subj) '_data_post_eeglab.mat'],'record_ica');

%%
clear;
cd('G:\EEG_AV_speech');
subj = 21;
load(['.\subj_' num2str(subj) '\subj_' num2str(subj) '_data_post_eeglab.mat'],'record_ica');
load(['.\subj_' num2str(subj) '\subj_' num2str(subj) '_data_pre_eeglab.mat'],'record_extr','pt_start','pt_end','T');

addpath G:\EEG_AV_speech\preprocessing_EEG
record4 = [record_ica record_extr];
record_baseline = demean(record4);

if subj == 1 | subj == 7
    record_reference = record_baseline(:,1:64)-record_baseline(:,[67])*ones(1,64); % re-reference: bilateral mastiod
else
    record_reference = record_baseline(:,1:64)-mean(record_baseline(:,[67 68]),2)*ones(1,64); % re-reference: bilateral mastiod
end

fs = 128;
clear x
for tv =1:length(pt_start)
    clear trial
     trial = record_reference(pt_start(tv):pt_end(tv),:);
     trial = single(trial);
     x{tv,1} = trial;
end

x_re = [];
for tv = 1:length(pt_start)
    x_re = [x_re;x{tv,1}];
end

for ch = 1:64
    xx = x_re(:,ch);
    median_all(:,ch) = median(xx);
end

for tv = 1:size(x,1)
    x_trial = x{tv,1};
    for ch = 1:64
        x_chan = x_trial(:,ch);
        x_chan = x_chan-median_all(:,ch); % baseline correction across trials
        y = abs(x_chan);
        x_chan(x_chan > 6*median(y)) = 6*median(y);
        x_chan(x_chan < -6*median(y)) = -6*median(y);
        x_trial_2(:,ch) = x_chan;
    end
    clear ch y x_chan
    x_2{tv,1} = x_trial_2;
    clear x_trial_2
end

save(['.\subj_' num2str(subj) '\subj_' num2str(subj) '-trial.mat'], 'x_2');

%% read trial for each participant
clear;
cd('G:\EEG_AV_speech');
subj = 19;
load(['.\subj_' num2str(subj) '\subj_' num2str(subj) '-trial.mat'], 'x_2');
load(['.\data_beh\' num2str(subj) '_trialorder.mat'],'order');
clear A_only AV_con AV_incon
for s = 1:48
    if order(s,1) == 1
        A_only{order(s,2),1} = x_2{s};
    elseif order(s,1) == 2
        AV_con{order(s,2),1} = x_2{s};
    else
        AV_incon{order(s,2),1} = x_2{s}; 
    end
end

for j = 1:16
    T1(j,1) = size(A_only{j,1},1);
    T1(j,2) = size(AV_con{j,1},1);
    T1(j,3) = size(AV_incon{j,1},1);
end

for j = 1:16
    clear aa
    aa = min(T1,[],2);
    A_only_2{j,1} = A_only{j,1}(1:aa(j),:);
    AV_con_2{j,1} = AV_con{j,1}(1:aa(j),:);
    AV_incon_2{j,1} = AV_incon{j,1}(1:aa(j),:);
end
save(['.\subj_' num2str(subj) '\subj_' num2str(subj) '_condition.mat'],'A_only_2','AV_con_2','AV_incon_2');

%% read trials of all participants
clear;
cd('G:\EEG_AV_speech');
clear A_only AV_con AV_incon
for subj = 1:21
    clear x_2 order
    load(['.\subj_' num2str(subj) '\subj_' num2str(subj) '-trial.mat'], 'x_2');
    load(['.\data_beh\' num2str(subj) '_trialorder.mat'],'order');

    for s = 1:48
        if order(s,1) == 1
            A_only{order(s,2),subj} = x_2{s};
        elseif order(s,1) == 2
            AV_con{order(s,2),subj} = x_2{s};
        else
            AV_incon{order(s,2),subj} = x_2{s}; 
        end
    end
end 

load('G:\EEG_AV_speech\stimuli feature\envelope.mat');
for j = 1:16
   T_env(j,1) = size(env{j,1},1); 
end

for j = 1:16
    for subj = 1:21
        if length(A_only{j,subj}) >= T_env(j,1)
           A_only{j,subj} = A_only{j,subj}([1:T_env(j,1)],:); 
        else
           aa = T_env(j,1)-length(A_only{j,subj});
           A_only{j,subj} = [A_only{j,subj};zeros(aa,size(A_only{j,subj},2))];
        end
        
        if length(AV_con{j,subj}) >= T_env(j,1)
            AV_con{j,subj} = AV_con{j,subj}([1:T_env(j,1)],:); 
        else
           aa = T_env(j,1)-length(AV_con{j,subj});
           AV_con{j,subj} = [AV_con{j,subj};zeros(aa,size(AV_con{j,subj},2))];
        end
        
        if length(AV_incon{j,subj}) >= T_env(j,1)
            AV_incon{j,subj} = AV_incon{j,subj}([1:T_env(j,1)],:); 
        else
           aa = T_env(j,1)-length(AV_incon{j,subj});
           AV_incon{j,subj} = [AV_incon{j,subj};zeros(aa,size(AV_incon{j,subj},2))];
        end

    end
end

save(['.\eegdata_condition.mat'],'A_only','AV_con','AV_incon');

%% filter env and EEG in 1-10 hz, and resample to 25 hz
clearvars -except A_only AV_con AV_incon
cd('G:\EEG_AV_speech');
load('G:\EEG_AV_speech\stimuli feature\envelope.mat');
load(['G:\EEG_AV_speech\eegdata_condition.mat'],'A_only','AV_con','AV_incon');

fs = 128;
b_pass = fir1(fs*6,[1 10]/(fs/2),'bandpass');
for j = 1:16
    clear temp temp2
    temp = env{j,1};
    temp2 = filtfilt(b_pass,1,temp);
    env_dsp{j,1} = resample(temp2,25,fs);
end
env_dsp_all = [env_dsp;env_dsp;env_dsp];
data = [A_only; AV_con; AV_incon];
clear data_dsp
for j = 1:48
    for subj = 1:21
        clear temp temp2
        temp = data{j,subj};
        temp2 = filtfilt(b_pass,1,double(temp));
        data_dsp{j,1}(:,:,subj) = resample(temp2,25,fs);
    end
end
save(['G:\EEG_AV_speech\data_dsp.mat'],'data_dsp','env_dsp');


%% PCA of multi-model output
clear
% load('G:\EEG_AV_speech\multi_output_con_12_layer.mat','multi_outp');
% load('G:\EEG_AV_speech\multi_output_incon_12_layer.mat','multi_outp');
% load('G:\EEG_AV_speech\multi_output_aonly_12_layer.mat','multi_outp');

clear coeff score explained
for j = 1:16
    for lay = 1:12
        [coeff{j,lay},score{j,lay},~,~,explained{j,lay}] = pca(multi_outp{j,lay});
    end
end
clear sum_explained
for j = 1:16
    for lay = 1:12
         sum_explained(:,lay,j) = cumsum(explained{j,lay});
    end
end

for j =1:16
    aa = sum_explained(:,:,j);
    figure;
    for lay = 1:12
        clear d
        d = find(aa(:,lay)>=90);
        id(lay,j) = d(1);
       plot(aa(:,lay));
       hold on
       xline(id(lay,j));
    end
    ylim([0 100])
end

for j = 1:16
    for lay = 1:12
        pca_multi_con{j,lay} = score{j,lay}(:,[1:205]);
%         pca_multi_incon{j,lay} = score{j,lay}(:,[1:205]);
%         pca_multi_aonly{j,lay} = score{j,lay}(:,[1:205]);
    end
end
mean(id,'all')
% mean id = 185.4
% save('G:\EEG_AV_speech\pca_multi_con_12layer_205.mat','pca_multi_con');
% mean id = 131
% save('G:\EEG_AV_speech\pca_multi_incon_12layer_205.mat','pca_multi_incon');
% mean id = 133
% save('G:\EEG_AV_speech\pca_multi_aonly_12layer_205.mat','pca_multi_aonly');
% mean id = 111

%%