#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 22 11:43:18 2020

@author: pooya
"""

import csv
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from scipy.io import savemat
from sklearn import preprocessing
#week = ('mon','tue','wed','thu','fri','sat','sun')
#week_days =[0,0,0,0,0,0,0]
#month = ('jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec')
#month_num =[0,0,0,0,0,0,0,0,0,0,0,0]
#dict_month = {}
#dict_week = {}
#i=0
#for x in month:
#    month_num[i]=1
#    dict_month[x]=cp(month_num)
#    month_num[i]=0
#    i=i+1
data = []
with open('forestfires.csv', 'r') as file:
    reader = csv.reader(file)
    i = 0
    for row in reader:
        if i>0:
            data.append(row)
        i = 1
data = np.array(data)
values = data[:,2]
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
#print(integer_encoded)
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
month_coded = onehot_encoder.fit_transform(integer_encoded)
#print(onehot_encoded)
values = data[:,3]
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
#print(integer_encoded)
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
week_coded = onehot_encoder.fit_transform(integer_encoded)
Y=data[:,12].astype(np.float)
Y=Y.reshape((Y.shape[0],1))
data=np.delete(data,2,1)
data=np.delete(data,2,1)
data=np.delete(data,10,1)
data=data.astype(np.float)
X=np.concatenate((data,week_coded,month_coded),axis=1)

N=np.shape(X)
select = np.random.permutation(N[0])
Xtr = X[select[0:int(np.ceil(0.8*N[0]))],:]
Ytr = Y[select[0:int(np.ceil(0.8*N[0]))]]
X_test = X[select[int(np.ceil(0.8*N[0])):N[0]],:]
Y_test = Y[select[int(np.ceil(0.8*N[0])):N[0]]]
#### dimensionality reduction due to rank deficiency (rank(Xtr)=27 or 26)
a = np.linalg.svd(Xtr)
Xtr = np.matmul(Xtr,a[2][:,0:27])
X_test = np.matmul(X_test,a[2][:,0:27])

#### normalization
scaler = preprocessing.StandardScaler().fit(Xtr)
Xtr = scaler.transform(Xtr)
X_test = scaler.transform(X_test)
####
loc = '/home/pooya/Desktop/Projects_codes/Project10_(Reg_SRNN)/forest_fires_mat.mat'
savemat(loc, {'Xtr':Xtr,'Ytr':Ytr,'X_test':X_test,'Y_test':Y_test})
