import numpy as np
import torch 
from torch.utils.data import Dataset
import torchvision.transforms as tfs
import cv2
from PIL import Image
import pandas as pd

class Melanoma(Dataset):
    def __init__(self, 
                 csv_path, 
                 image_root_path='',
                 image_size=32,
                 shuffle=True,
                 seed=123,
                 verbose=True,
                 train_cols=['target'],
                 mode='train'):
        
    
        # load data from csv
        self.df = pd.read_csv(csv_path) 
        self._num_images = len(self.df)
            
        # shuffle data
        if shuffle:
            data_index = list(range(self._num_images))
            np.random.seed(seed)
            np.random.shuffle(data_index)
            self.df = self.df.iloc[data_index]
        
        
        assert image_root_path != '', 'You need to pass the correct location for the dataset!'

        self.select_cols = ['target']  # this var determines the number of classes
        self.value_counts_dict = self.df[self.select_cols[0]].value_counts().to_dict()
        
        self.mode = mode
        self.image_size = image_size
        
        self._images_list = [image_root_path+'/train/'+path+'.jpg' for path in self.df['image_name'].tolist()]
        self._labels_list = self.df[train_cols].values.tolist()
    
        if verbose:
            if True:
                print ('-'*30)
                self.imratio = self.value_counts_dict[1]/(self.value_counts_dict[0]+self.value_counts_dict[1])
                print('Found %s images in total, %s positive images, %s negative images'%(self._num_images, self.value_counts_dict[1], self.value_counts_dict[0] ))
                print ('%s: imbalance ratio is %.4f'%(self.select_cols[0], self.imratio ))
                print ('-'*30) 
            
    @property        
    def class_counts(self):
        return self.value_counts_dict
    
    @property
    def imbalance_ratio(self):
        return self.imratio

    @property
    def num_classes(self):
        return len(self.select_cols)
       
    @property  
    def data_size(self):
        return self._num_images 
    
    def image_augmentation(self, image):
        img_aug = tfs.Compose([tfs.RandomAffine(degrees=(-15, 15), translate=(0.05, 0.05), scale=(0.95, 1.05), fill=128)]) # pytorch 3.7: fillcolor --> fill
        image = img_aug(image)
        return image
    
    def __len__(self):
        return self._num_images
    
    def __getitem__(self, idx):

        image = cv2.imread(self._images_list[idx], 0)
        #print('imread: '+ str(image.shape))
        image = Image.fromarray(image)
        if self.mode == 'train':
            image = self.image_augmentation(image)
        image = np.array(image)
        #print('image augmentation: '+ str(image.shape))
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        #print('cvt color: '+ str(image.shape))
        #exit()
        
        # resize and normalize; e.g., ToTensor()
        image = cv2.resize(image, dsize=(self.image_size, self.image_size), interpolation=cv2.INTER_LINEAR)  
        image = image/255.0
        __mean__ = np.array([[[0.485, 0.456, 0.406]]])
        __std__ =  np.array([[[0.229, 0.224, 0.225]]]) 
        image = (image-__mean__)/__std__
        image = image.transpose((2, 0, 1)).astype(np.float32)
        label = np.array(self._labels_list[idx]).reshape(-1).astype(np.float32)   
        return image, label

    def get_labels(self):
        return np.array(self._labels_list).reshape(-1)

if __name__ == '__main__':
    root = '/dual_data/not_backed_up/dixzhu/melanoma/'
    traindSet = Melanoma(csv_path=root+'train.csv', image_root_path=root, image_size=128, mode='train')
    testSet =  Melanoma(csv_path=root+'valid.csv',  image_root_path=root, image_size=128, mode='valid')
    trainloader =  torch.utils.data.DataLoader(traindSet, batch_size=32, num_workers=2, drop_last=True, shuffle=True)
    testloader =  torch.utils.data.DataLoader(testSet, batch_size=32, num_workers=2, drop_last=False, shuffle=False)
    # convert jpgs to binary file.
    for idx, data in enumerate(trainloader):
      train_data, train_label = data
      #print(idx)
      if idx==0:
        trX = train_data
        trY = train_label
      else:
        trX = np.concatenate((trX, train_data), axis=0)
        trY = np.concatenate((trY, train_label), axis=0)
    np.save('/dual_data/not_backed_up/dixzhu/melanoma/melanoma_train_X',trX)
    np.save('/dual_data/not_backed_up/dixzhu/melanoma/melanoma_train_Y',trY)
    for idx, data in enumerate(testloader):
      #print(idx)
      test_data, test_label = data
      if idx==0:
        teX = test_data
        teY = test_label
      else:
        teX = np.concatenate((teX, test_data), axis=0)
        teY = np.concatenate((teY, test_label), axis=0)
    np.save('/dual_data/not_backed_up/dixzhu/melanoma/melanoma_test_X',teX)
    np.save('/dual_data/not_backed_up/dixzhu/melanoma/melanoma_test_Y',teY)
    
