import os
import os.path as osp
import shutil
import torch
from torch_geometric.data import InMemoryDataset, download_url, extract_zip
from torch_geometric.io import read_tu_data
# from torch_geometric.read import read_tu_data
import pdb
import pruning

class TUDatasetExt(InMemoryDataset):
    
    # url = 'https://ls11-www.cs.uni-dortmund.de/people/morris/' \
    #       'graphkerneldatasets'
    url = 'https://ls11-www.cs.tu-dortmund.de/people/morris/' \
          'graphkerneldatasets'
    def __init__(self,
                 root,
                 name,
                 transform=None,
                 pre_transform=None,
                 pre_filter=None,
                 use_node_attr=False,
                 processed_filename='data.pt',
                 pruning_percent=0):
        
        self.name = name
        self.pruning_percent = pruning_percent
        self.processed_filename = processed_filename
        super(TUDatasetExt, self).__init__(root, transform, pre_transform, pre_filter)

        if self.pruning_percent > 0:
            self.pruned_data_path = self.processed_paths[0][:-3] + "_" + str(pruning_percent * 100) + self.processed_paths[0][-3:]
            if not os.path.exists(self.pruned_data_path):
                self.process()
            self.data, self.slices = torch.load(self.pruned_data_path)
        else:
            self.data, self.slices = torch.load(self.processed_paths[0])
        if self.data.x is not None and not use_node_attr:
            self.data.x = self.data.x[:, self.num_node_attributes:]

    @property
    def num_node_labels(self):
        if self.data.x is None:
            return 0

        for i in range(self.data.x.size(1)):
            if self.data.x[:, i:].sum().item() == self.data.x.size(0):
                return self.data.x.size(1) - i

        return 0

    @property
    def num_node_attributes(self):
        if self.data.x is None:
            return 0

        return self.data.x.size(1) - self.num_node_labels

    @property
    def raw_file_names(self):
        names = ['A', 'graph_indicator']
        return ['{}_{}.txt'.format(self.name, name) for name in names]

    @property
    def processed_file_names(self):
        return self.processed_filename

    def download(self):
        path = download_url('{}/{}.zip'.format(self.url, self.name), self.root)
        extract_zip(path, self.root)
        os.unlink(path)
        shutil.rmtree(self.raw_dir)
        os.rename(osp.join(self.root, self.name), self.raw_dir)

    def process(self):

        self.data, self.slices = read_tu_data(self.raw_dir, self.name)

        if self.pre_filter is not None:
            data_list = [self.get(idx) for idx in range(len(self))]
            data_list = [data for data in data_list if self.pre_filter(data)]
            pdb.set_trace()
            self.data, self.slices = self.collate(data_list)

        if self.pre_transform is not None:
            data_list = [self.get(idx) for idx in range(len(self))]
            data_list = [self.pre_transform(data) for data in data_list]
            
        if self.pruning_percent > 0:
            data_list = pruning.random_pruning(data_list, self.pruning_percent)
            self.data, self.slices = self.collate(data_list)
            torch.save((self.data, self.slices), self.pruned_data_path)
        else:
            self.data, self.slices = self.collate(data_list)
            torch.save((self.data, self.slices), self.processed_paths[0])

    def __repr__(self):
        return '{}({})'.format(self.name, len(self))






# class TUDatasetExt(TUDataset):
#     r"""A variety of graph kernel benchmark datasets, *.e.g.* "IMDB-BINARY",
#     "REDDIT-BINARY" or "PROTEINS", collected from the `TU Dortmund University
#     <http://graphkernels.cs.tu-dortmund.de>`_.

#     Args:
#         root (string): Root directory where the dataset should be saved.
#         name (string): The `name <http://graphkernels.cs.tu-dortmund.de>`_ of
#             the dataset.
#         transform (callable, optional): A function/transform that takes in an
#             :obj:`torch_geometric.data.Data` object and returns a transformed
#             version. The data object will be transformed before every access.
#             (default: :obj:`None`)
#         pre_transform (callable, optional): A function/transform that takes in
#             an :obj:`torch_geometric.data.Data` object and returns a
#             transformed version. The data object will be transformed before
#             being saved to disk. (default: :obj:`None`)
#         pre_filter (callable, optional): A function that takes in an
#             :obj:`torch_geometric.data.Data` object and returns a boolean
#             value, indicating whether the data object should be included in the
#             final dataset. (default: :obj:`None`)
#         use_node_attr (bool, optional): If :obj:`True`, the dataset will
#             contain additional continuous node features (if present).
#             (default: :obj:`False`)
#     """

#     url = 'https://ls11-www.cs.tu-dortmund.de/people/morris/' \
#           'graphkerneldatasets'

#     def __init__(self,
#                  root,
#                  name,
#                  transform=None,
#                  pre_transform=None,
#                  pre_filter=None,
#                  use_node_attr=False,
#                  processed_filename='data.pt'):
#         self.processed_filename = processed_filename
#         pdb.set_trace()
#         super(TUDatasetExt, self).__init__(root, name, transform, pre_transform,
#                                            pre_filter, use_node_attr)

#     @property
#     def processed_file_names(self):
#         return self.processed_filename