import os
import torch
import torchvision.datasets as datasets
import re


def pretify_classname(classname):
    l = re.findall(r"[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))", classname)
    l = [i.lower() for i in l]
    out = " ".join(l)
    if out.endswith("al"):
        return out + " area"
    return out


class EuroSATBase:
    def __init__(
        self,
        preprocess,
        test_split,
        location="~/datasets",
        batch_size=32,
        num_workers=16,
        persistent_workers=False,
        subset_ratio=1,
    ):
        # Data loading code
        traindir = os.path.join(location, "EuroSAT_splits", "train")
        testdir = os.path.join(location, "EuroSAT_splits", test_split)

        self.train_dataset = datasets.ImageFolder(
            traindir, transform=preprocess)
        self.train_loader = torch.utils.data.DataLoader(
            self.train_dataset,
            shuffle=True,
            batch_size=batch_size,
            num_workers=num_workers,
        )

        self.test_dataset = datasets.ImageFolder(testdir, transform=preprocess)
        from torch.utils.data import Subset
        import random
        dataset_size = len(self.test_dataset)
        subset_size = int(0.1 * dataset_size)

        
        indices = list(range(dataset_size))
        random.shuffle(indices)
        subset_indices = indices[:subset_size]

        if subset_ratio != 1:
            test_subset = Subset(self.test_dataset, subset_indices)
            self.test_loader = torch.utils.data.DataLoader(
                test_subset, batch_size=batch_size, num_workers=num_workers, shuffle=True
            )
        else:
            self.test_loader = torch.utils.data.DataLoader(
                self.test_dataset, batch_size=batch_size, num_workers=num_workers
            )
        self.test_loader_shuffle = torch.utils.data.DataLoader(
            self.test_dataset,
            shuffle=True,
            batch_size=batch_size,
            num_workers=num_workers,
            persistent_workers=persistent_workers,
        )

        idx_to_class = dict((v, k)
                            for k, v in self.train_dataset.class_to_idx.items())
        self.classnames = [
            idx_to_class[i].replace("_", " ") for i in range(len(idx_to_class))
        ]
        self.classnames = [pretify_classname(c) for c in self.classnames]
        ours_to_open_ai = {
            "annual crop": "annual crop land",
            "forest": "forest",
            "herbaceous vegetation": "brushland or shrubland",
            "highway": "highway or road",
            "industrial area": "industrial buildings or commercial buildings",
            "pasture": "pasture land",
            "permanent crop": "permanent crop land",
            "residential area": "residential buildings or homes or apartments",
            "river": "river",
            "sea lake": "lake or sea",
        }
        for i in range(len(self.classnames)):
            self.classnames[i] = ours_to_open_ai[self.classnames[i]]


class EuroSAT(EuroSATBase):
    def __init__(
        self,
        preprocess,
        location="~/datasets",
        batch_size=32,
        num_workers=16,
        persistent_workers=False,
        subset_ratio=1,
    ):
        super().__init__(
            preprocess, "test", location, batch_size, num_workers
        )


class EuroSATVal(EuroSATBase):
    def __init__(
        self,
        preprocess,
        location="~/datasets",
        batch_size=32,
        num_workers=16,
        persistent_workers=False,
        subset_ratio=1,
    ):
        super().__init__(
            preprocess, "val", location, batch_size, num_workers
        )
