#%% import and function module

import pandas as pd
import numpy as np
import sys
import os

run_path = ''
if os.path.abspath('.') != run_path:
    os.chdir(run_path)
sys.path.append(run_path)


#%% preprocessing of dataset

class Pre_Processing():
    
    def __init__(self, dataset):
        
        self.dataset = dataset
        
        # train rate, valid_rate, predict_rate
        self.train_rate = 0.7
        self.valid_rate = 0.1
        self.predict_rate = 1- self.train_rate - self.valid_rate
        
        self.mae_SI_total = 0
        self.mse_SI_total = 0
        self.opti_iter = 10000
        
        self.df = pd.read_csv('./Data/'+self.dataset+'.csv', header=0)
        self.value_need = self.df.values[:, 1:].astype('float')
        
        for i in range(self.value_need.shape[1]):
            self.need_mean = np.mean(self.value_need[:int(self.train_rate*len(self.value_need)), i])
            self.need_std = np.std(self.value_need[:int(self.train_rate*len(self.value_need)), i])
            self.value_need[:, i] = (self.value_need[:, i] - self.need_mean) / self.need_std
            
        self.x = self.value_need
        self.t = np.array(range(len(self.x)))
        self.t_len = len(self.t)
        
        self.predict_len = int(self.t_len*self.predict_rate)
        self.vaild_len = int(self.t_len*self.valid_rate)
        self.train_len = self.t_len - self.predict_len - self.vaild_len
        self.predict_point = self.train_len + self.vaild_len
        
        self.variables_len = self.x.shape[1]
        
        
        if (dataset == 'ETTh2'):
            
            self.max_k_global = 10
            self.foriour_len_global = 24*365+1  # min*hour*day
            self.max_k_roll = 10
            self.global_opti = 5e-5
            
            self.a = 0.8
            self.b = 1
            self.c = 0.95
            self.sep = 0
            
            self.valid_div = 3
            self.valid_opti = 5e-3
            self.valid_step = 10
            
            self.local_judge_1 = 0.8
            self.local_judge_2 = 0.4
            self.predict_step = 10
            self.local_opti = 1e-1
            self.local_opti_2 = 5e-5
            
        
        elif (dataset == 'ETTm2'):
            
            self.max_k_global = 150
            self.foriour_len_global = 6*24*365+1
            self.max_k_roll = 5
            self.global_opti = 5e-5
            
            self.a = 0.85
            self.b = 1
            self.c = 0.95
            self.sep = 0
            
            self.valid_div = 3
            self.valid_opti = 5e-3
            self.valid_step = 60
            
            self.local_judge_1 = 0.8
            self.local_judge_2 = 0.4
            self.predict_step = 60
            self.local_opti = 2e-3
            self.local_opti_2 = 5e-3
    
        
        elif (dataset == 'default'):
            
            self.df = pd.read_csv('')
            
            self.max_k_global = 100
            self.foriour_len_global = 24*365+1  
            self.max_k_roll = 5
            self.global_opti = 1e-1     # All set defalt 1e-1
            
            self.a_1 = 0.8
            self.a_2 = 0.1
            self.b = 1
            self.c = 0.9
            
            self.sep = 0
            
            self.valid_div = 3
            self.valid_opti = 1e-1
            self.valid_step = 60
            
            self.local_judge_1 = 0.8
            self.local_judge_2 = 0.4
            self.predict_step = 600
            self.local_opti = 1e-1
            self.local_opti_2 = 1e-1
    
    
    
    
    
    
    
    
    