import os
from typing import Callable, Optional

import numpy as np
import torch
from torch_geometric.data import InMemoryDataset

from temporal_graph.data import TemporalData


class DBLP(InMemoryDataset):
    url = ("https://www.dropbox.com/sh/palzyh5box1uc1v/"
           "AACSLHB7PChT-ruN-rksZTCYa?dl=0")

    def __init__(
        self,
        root: str,
        transform: Optional[Callable] = None,
        pre_transform: Optional[Callable] = None,
        force_reload: bool = False,
    ):
        super().__init__(root, transform, pre_transform,
                         force_reload=force_reload)
        self.load(self.processed_paths[0], data_cls=TemporalData)

    def download(self):
        raise RuntimeError(
            f"Dataset not found. Please download '{self.raw_file_names}' from "
            f"'{self.url}' and move it to '{self.raw_dir}'")

    @property
    def raw_file_names(self) -> str:
        return ['dblp.txt', 'node2label.txt']

    @property
    def processed_file_names(self) -> str:
        return 'data.pt'

    def process(self):
        src = []
        dst = []
        t = []
        path = os.path.join(self.raw_dir, 'dblp.txt')
        with open(path) as f:
            for line in f:
                x, y, z = line.strip().split()
                src.append(int(x))
                dst.append(int(y))
                t.append(float(z))
        num_nodes = max(max(src), max(dst)) + 1
        src = torch.tensor(src, dtype=torch.long)
        dst = torch.tensor(dst, dtype=torch.long)
        t = torch.tensor(t, dtype=torch.float)

        t, perm = t.sort()
        src = src[perm]
        dst = dst[perm]

        nodes = []
        labels = []
        path = os.path.join(self.raw_dir, 'node2label.txt')
        with open(path) as f:
            for line in f:
                node, label = line.strip().split()
                nodes.append(int(node))
                labels.append(int(label))

        from sklearn.preprocessing import LabelEncoder
        labels = LabelEncoder().fit_transform(labels)
        y = torch.zeros(num_nodes, dtype=torch.long)
        y[nodes] = torch.tensor(labels, dtype=torch.long)

        path = os.path.join(self.raw_dir, 'dblp.npy')
        if os.path.exists(path):
            print('Loading processed node features...')
            x = np.load(path)
            x = torch.tensor(x).to(torch.float).transpose(0, 1).contiguous()
        else:
            x = None

        data = TemporalData(src=src, dst=dst, t=t, y=y, x=x,
                            num_nodes=num_nodes)
        data = data if self.pre_transform is None else self.pre_transform(data)
        self.save([data], self.processed_paths[0])
