import os
import numpy as np
import torch
import torch.utils.data as data
import torchvision.transforms as transforms
from PIL import Image
# from utils import text_helper
import utils
from utils import VocabDict

class FineTuningImageLoader(data.Dataset):
    def __init__(self, images_PATHs, transform=None):
        self.transform = transform
        self.images_PATHs = images_PATHs

    def __getitem__(self, idx):

        image = self.images_PATHs[idx]
        image = Image.open(image).convert('RGB')
        image = self.transform(image)

        return image, idx

    def __len__(self):

        return len(self.images_PATHs)

class FineTuningImageLoaderTest(data.Dataset):
    def __init__(self, images_PATHs, index, transform=None):
        self.transform = transform
        self.images_PATHs = images_PATHs
        self.ret_indexs = index

    def __getitem__(self, idx):
        
        image = self.images_PATHs[self.ret_indexs[idx]]
        image = Image.open(image).convert('RGB')
        image = self.transform(image)

        return image, self.ret_indexs[idx]

    def __len__(self):

        return len(self.ret_indexs)
    


class FineTuningDataset(data.Dataset):

    def __init__(self, sim_PATH, images_PATHs, transform=None):
        self.transform = transform
        self.sim = np.load(sim_PATH)
        self.images_PATHs = images_PATHs

    def __getitem__(self, idx):

        image = self.images_PATHs[idx]
        image = Image.open(image).convert('RGB')
        image = self.transform(image)

        sim = self.sim[idx]#.to(torch.float64)
        sim = torch.from_numpy(sim).float()

        feat = self.feat[idx]#.to(torch.float64)
        feat = torch.from_numpy(feat).float()

        return image, sim, feat, idx

    def __len__(self):

        return len(self.images_PATHs)


class VqaDataset(data.Dataset):

    def __init__(self, input_dir, input_vqa, max_qst_length=30, max_num_ans=10, transform=None):
        self.input_dir = input_dir
        self.vqa = np.load(input_dir+'/'+input_vqa, allow_pickle=True)
        self.qst_vocab = VocabDict(input_dir+'/vocab_questions.txt')
        self.ans_vocab = VocabDict(input_dir+'/vocab_answers.txt')
        self.max_qst_length = max_qst_length
        self.max_num_ans = max_num_ans
        self.load_ans = ('valid_answers' in self.vqa[0]) and (self.vqa[0]['valid_answers'] is not None)
        self.transform = transform

    def __getitem__(self, idx):

        vqa = self.vqa
        qst_vocab = self.qst_vocab
        ans_vocab = self.ans_vocab
        max_qst_length = self.max_qst_length
        max_num_ans = self.max_num_ans
        transform = self.transform
        load_ans = self.load_ans

        image = vqa[idx]['image_path']
        image = "./../Dataset/Images/" + image.split("/",6)[-1]
        image = Image.open(image).convert('RGB')
        image = transform(image)

        image_id = vqa[idx]['image_path'].split("/")[-1].split(".")[0]

        qst2idc = np.array([qst_vocab.word2idx('<start>')] + [qst_vocab.word2idx(w) for w in vqa[idx]['question_tokens']] + [qst_vocab.word2idx('<end>')])
        qst2idc = torch.from_numpy(qst2idc).clone()

        if load_ans:
            ans2idc = [ans_vocab.word2idx(w) for w in vqa[idx]['valid_answers']]
            ans2idx = np.random.choice(ans2idc)

        return image, qst2idc, ans2idx, image_id

    def __len__(self):

        return len(self.vqa)

def collate_fn(data):
    """Create mini-batches of (image, caption)

    Custom collate_fn for torch.utils.data.DataLoader is necessary for patting captions

    :param data: list; (image, caption) tuples
            - image: tensor;    3 x 256 x 256
            - caption: tensor;  1 x length_caption

    Return: mini-batch
    :return images: tensor;     batch_size x 3 x 256 x 256
    :return padded_captions: tensor;    batch_size x length
    :return caption_lengths: list;      lenghths of actual captions (without padding)
    """

    # sort data by caption length
    data.sort(key=lambda x: len(x[1]), reverse=True)
    images, captions, image_id = zip(*data)

    # Merge image tensors (stack)
    images = torch.stack(images, 0)

    # Merge captions
    caption_lengths = [len(caption) for caption in captions]

    # zero-matrix num_captions x caption_max_length
    padded_captions = torch.zeros(len(captions), max(caption_lengths)).long()

    # fill the zero-matrix with captions. the remaining zeros are padding
    for ix, caption in enumerate(captions):
        end = caption_lengths[ix]
        padded_captions[ix, :end] = caption[:end]
    return images, padded_captions, torch.tensor(caption_lengths),image_id

def collate_fn_VQA(data):
    """Create mini-batches of (image, caption)

    Custom collate_fn for torch.utils.data.DataLoader is necessary for patting captions

    :param data: list; (image, caption) tuples
            - image: tensor;    3 x 256 x 256
            - caption: tensor;  1 x length_caption

    Return: mini-batch
    :return images: tensor;     batch_size x 3 x 256 x 256
    :return padded_captions: tensor;    batch_size x length
    :return caption_lengths: list;      lenghths of actual captions (without padding)
    """

    # sort data by caption length
    data.sort(key=lambda x: len(x[1]), reverse=True)
    images, captions, anss, image_id = zip(*data)

    # Merge image tensors (stack)
    images = torch.stack(images, 0)

    # Merge captions
    caption_lengths = [len(caption) for caption in captions]
    anss = [ans for ans in anss]

    # zero-matrix num_captions x caption_max_length
    padded_captions = torch.zeros(len(captions), max(caption_lengths)).long()

    # fill the zero-matrix with captions. the remaining zeros are padding
    for ix, caption in enumerate(captions):
        end = caption_lengths[ix]
        padded_captions[ix, :end] = caption[:end]

    return images, padded_captions, torch.tensor(anss), torch.tensor(caption_lengths),image_id

def get_loader(sim_PATH, images_PATHs, batch_size, img_batch_size, num_workers, test=False):

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])

    dataset = FineTuningDataset(sim_PATH, images_PATHs, transform)


    if test:
        data_loader = torch.utils.data.DataLoader(
                dataset=dataset,
                batch_size=batch_size,
                shuffle=False,
                num_workers=num_workers,drop_last=True)
    else:
        data_loader = torch.utils.data.DataLoader(
                dataset=dataset,
                batch_size=batch_size,
                shuffle=True,
                num_workers=num_workers,drop_last=True)

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        # transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])

    if test:
        img_dataset = FineTuningImageLoader(images_PATHs,transform)
        img_loader = torch.utils.data.DataLoader(
                dataset=img_dataset,
                batch_size=img_batch_size,
                shuffle=False,
                num_workers=num_workers,drop_last=True)
    else:
        img_dataset = FineTuningImageLoader(images_PATHs,transform)
        img_loader = torch.utils.data.DataLoader(
                dataset=img_dataset,
                batch_size=img_batch_size,
                shuffle=False,
                num_workers=num_workers,drop_last=True)

    return data_loader, img_loader


def get_loader_test(images_PATHs, img_batch_size, idxs, num_workers):

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])

    img_dataset = FineTuningImageLoaderTest(images_PATHs, idxs,transform)
    img_loader = torch.utils.data.DataLoader(
            dataset=img_dataset,
            batch_size=img_batch_size,
            shuffle=False,
            num_workers=num_workers,drop_last=False)

    return img_loader


def get_vqa_loader(input_dir, input_vqa_train, input_vqa_valid, max_qst_length, max_num_ans, batch_size, num_workers):

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])

    transform = {
        phase: transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
            ]) 
        for phase in ['train', 'valid']}

    vqa_dataset = {
        'train': VqaDataset(
            input_dir=input_dir,
            input_vqa=input_vqa_train,
            max_qst_length=max_qst_length,
            max_num_ans=max_num_ans,
            transform=transform['train']),
        'valid': VqaDataset(
            input_dir=input_dir,
            input_vqa=input_vqa_valid,
            max_qst_length=max_qst_length,
            max_num_ans=max_num_ans,
            transform=transform['valid'])}

    data_loader = {
        phase: torch.utils.data.DataLoader(
            dataset=vqa_dataset[phase],
            batch_size=batch_size,
            shuffle=True,
            num_workers=num_workers,collate_fn=collate_fn_VQA,drop_last=True)
        for phase in ['train', 'valid']}

    return data_loader