import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, random_split
from ucimlrepo import fetch_ucirepo 

# fetch dataset 
concrete_compressive_strength = fetch_ucirepo(id=165) 
  
# data (as pandas dataframes) 
X = concrete_compressive_strength.data.features 
y = concrete_compressive_strength.data.targets 
  
# metadata 
#print(concrete_compressive_strength.metadata) 
  
# variable information 
#print(concrete_compressive_strength.variables)

features_tensor = torch.tensor(X.values, dtype=torch.float32)
labels_tensor = torch.tensor(y.values, dtype=torch.float32)

mean = features_tensor.mean(dim=0)  # Mean across the rows (dim=0)
std = features_tensor.std(dim=0)    # Standard deviation across the rows (dim=0)
features_tensor -= mean
features_tensor /= std

#the following ordeal basically exists to do batching and random shuffling

class ConcreteDataset(Dataset):
    def __init__(self, data_tensor, labels_tensor):
        """
        Args:
            data_tensor (torch.Tensor): Tensor containing feature data.
            labels_tensor (torch.Tensor): Tensor containing labels.
        """
        self.data_tensor = data_tensor
        self.labels_tensor = labels_tensor

    def __len__(self):
        return len(self.data_tensor)

    def __getitem__(self, idx):
        return self.data_tensor[idx], self.labels_tensor[idx]
    
concrete_dataset = ConcreteDataset(features_tensor,labels_tensor)

train_size = int(0.8 * len(concrete_dataset))  # 80% for training
test_size = len(concrete_dataset) - train_size
train_dataset, test_dataset = random_split(concrete_dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

# from concrete_dataset import* will only show these two objects
# does not affect other types of import statements
__all__ = ['train_loader','test_loader']