%/usr/bin/env matlab
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Created on 12:01, May. 14th, 2023
% 
% @author: Anonymous
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Initialize environment configuration.
% Clear all variables.
clear; clc; restoredefaultpath; close all;
% Initialize fsl env configuration.
setenv('FSLDIR', fullfile(getenv('HOME'), 'fsl'));
setenv('FSLOUTPUTTYPE', 'NIFTI_GZ');
path(path, sprintf('%s/etc/matlab', getenv('FSLDIR')));
% Initialize osl env configuration.
addpath(fullfile(getenv('HOME'), 'Documents', 'storage', 'Data', 'matlab', 'osl'));
addpath(fullfile(getenv('HOME'), 'Documents', 'storage', 'Data', 'matlab', 'osl', 'osl-core')); osl_startup;
% Initialize spm env configuration.
addpath(fullfile(getenv('HOME'), 'Documents', 'storage', 'Data', 'matlab', 'spm', 'spm12')); spm eeg;
% Close all initialization.
close all hidden;

%% Define macros & global variables.
global markers; markers = containers.Map;
markers_ = struct('image_alarm', 1, 'image_apple', 2, 'image_ball', 3, 'image_book', 4, 'image_box', 5, ...
    'image_chair', 6, 'image_kiwi', 7, 'image_microphone', 8, 'image_motorcycle', 9, 'image_pepper', 10, ...
    'image_sheep', 11, 'image_shoes', 12, 'image_strawberry', 13, 'image_tomato', 14, 'image_watch', 15, ...
    'audio_alarm', 101, 'audio_apple', 102, 'audio_ball', 103, 'audio_book', 104, 'audio_box', 105, ...
    'audio_chair', 106, 'audio_kiwi', 107, 'audio_microphone', 108, 'audio_motorcycle', 109, 'audio_pepper', 110, ...
    'audio_sheep', 111, 'audio_shoes', 112, 'audio_strawberry', 113, 'audio_tomato', 114, 'audio_watch', 115);
markers_keys = fieldnames(markers_);
for key_idx = 1:numel(markers_keys)
    markers(markers_keys{key_idx}) = sprintf('S%3d', markers_.(markers_keys{key_idx}));
end

%% Execute preprocess.
% Initialize base path & subj_runs.
base = fullfile(pwd, '..', '..', '..', '..');
subj_runs = {'013/20230308', '014/20230313', '015/20230315', '016/20230327', '017/20230329', '018/20230331', ...
    '019/20230403', '020/20230405', '021/20230407', '022/20230410', '023/20230412', '024/20230414', '025/20230417', ...
    '026/20230419', '027/20230421', '028/20230424', '029/20230428', '030/20230504', '031/20230510', '032/20230512', ...
    '033/20230517', '034/20230519', '035/20230524', '036/20230526', '037/20230529', '038/20230531', '039/20230605', ...
    '040/20230607', '041/20230612', '042/20230614', '043/20230616', '044/20230619', '045/20230626', '046/20230628', ...
    '047/20230703', '048/20230705', '049/20230710', '050/20230712', '051/20230717', '052/20230719', '053/20230724', ...
    '054/20230726'};
subj_runs = {subj_runs{1:1}, subj_runs{6:9}, subj_runs{11:19}, subj_runs{21:22}, subj_runs{24:28}, subj_runs{30:end}};
% Loop preprocess over all available `subj_runs`.
for subj_run_idx = 1:length(subj_runs)
    % Get `subj_run_i` of current preprocess.
    subj_run_i = subj_runs{subj_run_idx};
    % Get `subj_i` & `run_i` from `subj_run_i`, then get `path_run_i`.
    subj_run_sep_i = strsplit(subj_run_i, '/'); assert(length(subj_run_sep_i) == 2);
    subj_i = subj_run_sep_i{1}; run_i = subj_run_sep_i{2};
    path_run_i = fullfile(base, 'data', 'eeg.anonymous', subj_i, run_i);
    % Preprocess task data.
    preprocess_task(path_run_i);
    % Preprocess tmr data.
    preprocess_tmr(path_run_i);
end

%% Finish preprocess.
% Exit current process.
exit;

%% Define tool funcs.
% def preprocess_task func
function preprocess_task(path_run)
    % The whole pipeline to preprocess task eeg data of specified run, e.g. [013,...].
    %
    % Args:
    %     path_run: str - The path of specified eeg run.
    %
    % Returns:
    %     None

    % Initialize `session_types`.
    session_types = {'audio-image-pre', 'audio-image-post', 'image-audio-pre', 'image-audio-post'};
    % Iterate `session_types` to preprocess task sessions.
    for session_idx = 1:numel(session_types)
        preprocess_task_helper(path_run, session_types{session_idx});
    end
end

% def preprocess_tmr func
function preprocess_tmr(path_run)
    % The whole pipeline to preprocess tmr eeg data of specified run, e.g. [013,...].
    %
    % Args:
    %     path_run: str - The path of specified eeg run.
    %
    % Returns:
    %     None

    % Initialize `session_types`.
    session_types = {'N23', 'REM'};
    % Iterate `session_types` to preprocess tmr sessions.
    for session_idx = 1:numel(session_types)
        preprocess_tmr_helper(path_run, session_types{session_idx});
    end
end

% def preprocess_task_helper func
function preprocess_task_helper(path_run, session_type)
    % The whole pipeline to preprocess task eeg data of specified run/session_type, e.g. [013,...].
    %
    % Args:
    %     path_run: str - The path of specified eeg run.
    %     session_type: str - The type of specified session.
    %
    % Returns:
    %     None

    % Check whether `session_type` is proviede.
    if nargin < 2
        session_type = 'audio-image-pre';
    end
    global markers;

    %% Prepare for preprocess.
    % Initialize path of dataset.
    path_run_dataset = fullfile(path_run, 'dataset.lvbj.task');
    if ~exist(path_run_dataset) mkdir(path_run_dataset); end
    % Initialize path of output.
    fnames = {dir(path_run).name};
    output_fname = fnames(startsWith(fnames, 'output') & endsWith(fnames, 'task')); output_fname = output_fname{1};
    path_run_output = fullfile(path_run, output_fname);
    path_run_output_session = fullfile(path_run_output, session_type);
    raw_fname = 'data-lvbj.vhdr'; data_fname = 'data-lvbj';
    % Check whether raw data exists. If not, directly return.
    if ~exist(fullfile(path_run_output_session, raw_fname)) return; end

    %% Execute preprocess.
    % Convert raw EEG format to SPM12 format.
    S_spm_eeg_convert.dataset = fullfile(path_run_output_session, raw_fname);
    S_spm_eeg_convert.outfile = fullfile(path_run_output_session, data_fname);
    S_spm_eeg_convert.Fdataset = fullfile(path_run_output_session, raw_fname);
    D_spm_eeg_convert = spm_eeg_convert(S_spm_eeg_convert);
    % Define epochs from converted SPM12 data.
    markers_keys = markers.keys; markers_values = markers.values;
    S_spm_eeg_definetrial.D = fullfile(path_run_output_session, [data_fname, '.mat']);
    S_spm_eeg_definetrial.timewin = [-200, 800-(1000./D_spm_eeg_convert.fsample)];
    S_spm_eeg_definetrial.save = 0; S_spm_eeg_definetrial.reviewtrials = 0;
    for marker_idx = 1:length(markers_values)
        S_spm_eeg_definetrial.trialdef(marker_idx).conditionlabel = markers_keys{marker_idx};
        S_spm_eeg_definetrial.trialdef(marker_idx).eventtype = 'Stimulus';
        S_spm_eeg_definetrial.trialdef(marker_idx).eventvalue = markers_values{marker_idx};
        S_spm_eeg_definetrial.trialdef(marker_idx).trlshift = 0;
    end; [trl, trl_markers, S_spm_eeg_definetrial] = spm_eeg_definetrial(S_spm_eeg_definetrial);
    % Epoch converted SPM12 data.
    S_spm_eeg_epochs.D = fullfile(path_run_output_session, [data_fname, '.mat']);
    S_spm_eeg_epochs.bc = 0; S_spm_eeg_epochs.prefix = 'epoch-';
    S_spm_eeg_epochs.trl = trl; S_spm_eeg_epochs.conditionlabels = trl_markers;
    D_spm_eeg_epochs = spm_eeg_epochs(S_spm_eeg_epochs);
    % Downsample epoched data.
    S_spm_eeg_downsample.D = fullfile(path_run_output_session, [S_spm_eeg_epochs.prefix, data_fname, '.mat']);
    S_spm_eeg_downsample.fsample_new = 100; S_spm_eeg_downsample.prefix = 'dsample-';
    D_spm_eeg_downsample = spm_eeg_downsample(S_spm_eeg_downsample);
    % Use osl to automatically detect bad epochs.
    modalities = {'EEG',};
    D_osl_detect_artefacts = osl_detect_artefacts(D_spm_eeg_downsample, 'badchannels', false, ...
        'badtimes', true, 'modalities', modalities); D_osl_detect_artefacts.save;

    %% Separate different channels.
    % Initialize `D` from `D_osl_detect_artefacts`.
    D = D_osl_detect_artefacts;
    % Get the whole data from `D`.
    % data - (n_samples, seq_len, n_channels)
    data = D(:,:,:); data = permute(data, [3, 2, 1]);
    % Initialize the indices of audio & image.
    n_events = size(data, 1); assert(mod(n_events, 2) == 0, sprintf('ERROR: The number of events (%d) is not divisible by 2 in run_v6.preprocess_task.', n_events));
    if startsWith(session_type, 'audio-image')
        audio_idxs = 1:2:n_events; image_idxs = 2:2:n_events;
    elseif startsWith(session_type, 'image-audio')
        image_idxs = 1:2:n_events; audio_idxs = 2:2:n_events;
    else
        error(sprintf('ERROR: Unknown session_type (%s) in run_v11.preprocess_task.', session_type));
    end
    % Get the corresponding [markers,bad_idxs] of audio & image.
    audio_markers = D.conditions(audio_idxs); image_markers = D.conditions(image_idxs);
    bad_idxs = D.badtrials; audio_badidxs = bad_idxs(ismember(bad_idxs, audio_idxs)); image_badidxs = bad_idxs(ismember(bad_idxs, image_idxs));
    % Get the indices of eeg channels.
    eeg_idxs = find(strcmp(D.chantype, 'EEG'));
    % Get audio_eeg & image_eeg from `data`.
    audio_eeg = data(audio_idxs,:,eeg_idxs); image_eeg = data(image_idxs,:,eeg_idxs);
    % Save bad_idxs & markers & data to `path_run_dataset`.
    save(fullfile(path_run_dataset, [session_type, '.mat']), ...
        "audio_markers", "image_markers", "audio_badidxs", "image_badidxs", "audio_eeg", "image_eeg");
end

% def preprocess_tmr_helper func
function preprocess_tmr_helper(path_run, session_type)
    % The whole pipeline to preprocess tmr eeg data of specified run, e.g. [013,...].
    %
    % Args:
    %     path_run: str - The path of specified eeg run.
    %     session_type: str - The type of specified session.
    %
    % Returns:
    %     None

    % Check whether `session_type` is proviede.
    if nargin < 2
        session_type = 'N2/3';
    end
    global markers;

    %% Prepare for preprocess.
    % Initialize path of dataset.
    path_run_dataset = fullfile(path_run, 'dataset.lvbj.tmr');
    if ~exist(path_run_dataset) mkdir(path_run_dataset); end
    % Initialize path of output.
    fnames = {dir(path_run).name};
    output_fname = fnames(startsWith(fnames, 'output') & endsWith(fnames, 'tmr')); output_fname = output_fname{1};
    path_run_output = fullfile(path_run, output_fname);
    path_run_output_session = fullfile(path_run_output, session_type);
    raw_fname = 'data-lvbj.vhdr'; data_fname = 'data-lvbj';
    % Check whether raw data exists. If not, directly return.
    if ~exist(fullfile(path_run_output_session, raw_fname)) return; end

    %% Execute preprocess.
    % Convert raw EEG format to SPM12 format.
    S_spm_eeg_convert.dataset = fullfile(path_run_output_session, raw_fname);
    S_spm_eeg_convert.outfile = fullfile(path_run_output_session, data_fname);
    S_spm_eeg_convert.Fdataset = fullfile(path_run_output_session, raw_fname);
    D_spm_eeg_convert = spm_eeg_convert(S_spm_eeg_convert);
    % Define epochs from converted SPM12 data.
    markers_keys = markers.keys; markers_values = markers.values;
    available_markers = startsWith(markers_keys, 'audio'); markers_keys = markers_keys(available_markers); markers_values = markers_values(available_markers);
    S_spm_eeg_definetrial.D = fullfile(path_run_output_session, [data_fname, '.mat']);
    S_spm_eeg_definetrial.timewin = [-200, 800-(1000./D_spm_eeg_convert.fsample)];
    S_spm_eeg_definetrial.save = 0; S_spm_eeg_definetrial.reviewtrials = 0;
    for marker_idx = 1:length(markers_values)
        S_spm_eeg_definetrial.trialdef(marker_idx).conditionlabel = markers_keys{marker_idx};
        S_spm_eeg_definetrial.trialdef(marker_idx).eventtype = 'Stimulus';
        S_spm_eeg_definetrial.trialdef(marker_idx).eventvalue = markers_values{marker_idx};
        S_spm_eeg_definetrial.trialdef(marker_idx).trlshift = 0;
    end; [trl, trl_markers, S_spm_eeg_definetrial] = spm_eeg_definetrial(S_spm_eeg_definetrial);
    % Epoch converted SPM12 data.
    S_spm_eeg_epochs.D = fullfile(path_run_output_session, [data_fname, '.mat']);
    S_spm_eeg_epochs.bc = 0; S_spm_eeg_epochs.prefix = 'epoch-';
    S_spm_eeg_epochs.trl = trl; S_spm_eeg_epochs.conditionlabels = trl_markers;
    D_spm_eeg_epochs = spm_eeg_epochs(S_spm_eeg_epochs);
    % Downsample epoched data.
    S_spm_eeg_downsample.D = fullfile(path_run_output_session, [S_spm_eeg_epochs.prefix, data_fname, '.mat']);
    S_spm_eeg_downsample.fsample_new = 100; S_spm_eeg_downsample.prefix = 'dsample-';
    D_spm_eeg_downsample = spm_eeg_downsample(S_spm_eeg_downsample);
    % Use osl to automatically detect bad epochs.
    modalities = {'EEG',};
    D_osl_detect_artefacts = osl_detect_artefacts(D_spm_eeg_downsample, 'badchannels', false, ...
        'badtimes', true, 'modalities', modalities); D_osl_detect_artefacts.save;

    %% Separate different channels.
    % Initialize `D` from `D_osl_detect_artefacts`.
    D = D_osl_detect_artefacts;
    % Get the whole data from `D`.
    % data - (n_samples, seq_len, n_channels)
    data = D(:,:,:); data = permute(data, [3, 2, 1]);
    % Initialize the indices of audio.
    n_events = size(data, 1); audio_idxs = 1:n_events;
    % Get the corresponding [markers,bad_idxs] of audio & image.
    audio_markers = D.conditions; audio_badidxs = D.badtrials;
    % Get the indices of eeg channels.
    eeg_idxs = find(strcmp(D.chantype, 'EEG'));
    % Get audio_eeg from `data`.
    audio_eeg = data(audio_idxs,:,eeg_idxs);
    % Save bad_idxs & markers & data to `path_run_dataset`.
    save(fullfile(path_run_dataset, [session_type, '.mat']), ...
        "audio_markers", "audio_badidxs", "audio_eeg");
end

