# generate application 1 dataset
import pandas as pd
import numpy as np
import torch
from torch_geometric.data import InMemoryDataset
from torch_geometric.data import Data
import pickle
from pathlib import Path
import yaml
import re
import itertools
from torch_geometric.data import DataLoader
from utils import get_diracs
from tqdm import tqdm
from gurobipy import * 
import gurobipy as gp
from gurobipy import GRB
from torch_geometric.datasets import TUDataset
import pulp
import networkx as nx
from torch_geometric.utils import erdos_renyi_graph, to_networkx, from_networkx
import random
from random import choice
from torch_geometric.utils import degree, remove_self_loops
from tqdm import tqdm
import time

def greedy_poor(data):
    edges = data.edge_index
    original_nodes_num = data.num_nodes
    deleted_nodes = []
    chosen_mask = torch.zeros(original_nodes_num)
    unchosen_nodes = np.arange(0,original_nodes_num)
    while edges.shape[1] >0:
        # sort the degree
        deg = degree(edges[0], original_nodes_num)
        #random_index = torch.argsort(deg)
        random_index = np.random.choice(unchosen_nodes)
        unchosen_nodes = unchosen_nodes[unchosen_nodes!=random_index]
        #random_index = torch.randint(0,deg.shape[0]+1,(1,))
        # choose the node
        neighbor_list = []
        neighbor_list.append(random_index)
        deleted_nodes.append(random_index)
        chosen_mask[random_index] = 1
        # find its neighbors
        wheres_node = (edges==random_index).nonzero(as_tuple=False)
        for i in range(wheres_node.shape[0]):
            neighbor_list.append(edges[1-wheres_node[i][0]][wheres_node[i][1]].item())
            deleted_nodes.append(edges[1-wheres_node[i][0]][wheres_node[i][1]].item())
        neighbor_list = list(dict.fromkeys(neighbor_list))
        deleted_nodes = list(dict.fromkeys(deleted_nodes))
        # delete the nodes and its neighbors
        for q in range(len(neighbor_list)):
            ids = (edges==neighbor_list[q]).nonzero(as_tuple=False)
            columns_to_remove = []
            all_lists = np.arange(edges.shape[1])
            for p in range(ids.shape[0]):
                columns_to_remove.append(ids[p][1])
            all_lists = np.delete(all_lists,columns_to_remove)
            edges = edges[:,all_lists]
            unchosen_nodes = unchosen_nodes[unchosen_nodes!=neighbor_list[q]]
        # check the situation where ther would be isolate nodes
        deg = degree(edges[0], original_nodes_num)
        deg0_list = (deg==0).nonzero(as_tuple = False).reshape(-1).numpy().tolist()
        if len(deg0_list)!=len(deleted_nodes):
            missed_nodes = list(set(deg0_list)^set(deleted_nodes))
            chosen_mask[missed_nodes] = 1
            for i in range(len(missed_nodes)):
                deleted_nodes.append(missed_nodes[i])
                unchosen_nodes = unchosen_nodes[unchosen_nodes!=missed_nodes[i]]
    # check if it is independent
    ###############
    edges = data.edge_index
    row, col = edges
    probs_row = chosen_mask[row]
    probs_col = chosen_mask[col]
    penalty = ((probs_row) * (probs_col)).sum()
    if penalty>0:
        print('wrong')
    return chosen_mask

def greedy(data):
    edges = data.edge_index
    original_nodes_num = data.num_nodes
    sort_counter = 0
    deleted_nodes = []
    chosen_mask = torch.zeros(original_nodes_num)
    while edges.shape[1] >0:
        # sort the degree
        deg = degree(edges[0], original_nodes_num)
        sorted_index = torch.argsort(deg)
        # choose the node
        neighbor_list = []
        neighbor_list.append(sorted_index[sort_counter])
        deleted_nodes.append(sorted_index[sort_counter].item())
        chosen_mask[sorted_index[sort_counter]] = 1
        # find its neighbors
        wheres_node = (edges==sorted_index[sort_counter]).nonzero(as_tuple=False)
        for i in range(wheres_node.shape[0]):
            neighbor_list.append(edges[1-wheres_node[i][0]][wheres_node[i][1]].item())
            deleted_nodes.append(edges[1-wheres_node[i][0]][wheres_node[i][1]].item())
        neighbor_list = list(dict.fromkeys(neighbor_list))
        deleted_nodes = list(dict.fromkeys(deleted_nodes))
        # delete the nodes and its neighbors
        for q in range(len(neighbor_list)):
            ids = (edges==neighbor_list[q]).nonzero(as_tuple=False)
            columns_to_remove = []
            all_lists = np.arange(edges.shape[1])
            for p in range(ids.shape[0]):
                columns_to_remove.append(ids[p][1])
            all_lists = np.delete(all_lists,columns_to_remove)
            edges = edges[:,all_lists]
        # check the situation where there would be isolate nodes
        deg = degree(edges[0], original_nodes_num)
        deg0_list = (deg==0).nonzero(as_tuple = False).reshape(-1).numpy().tolist()
        if len(deg0_list)!=len(deleted_nodes):
            missed_nodes = list(set(deg0_list)^set(deleted_nodes))
            chosen_mask[missed_nodes] = 1
            for i in range(len(missed_nodes)):
                deleted_nodes.append(missed_nodes[i])
        # update the sort counter
        sort_counter = len(deleted_nodes)
    # check if it is independent
    ###############
    edges = data.edge_index
    row, col = edges
    probs_row = chosen_mask[row]
    probs_col = chosen_mask[col]
    penalty = ((probs_row) * (probs_col)).sum()
    if penalty>0:
        print('wrong')
    return chosen_mask

def generate_graph(n, d=None, p=None, graph_type='reg', random_seed=0):
    """
    Helper function to generate a NetworkX random graph of specified type,
    given specified parameters (e.g. d-regular, d=3). Must provide one of
    d or p, d with graph_type='reg', and p with graph_type in ['prob', 'erdos'].
    Input:
        n: Problem size
        d: [Optional] Degree of each node in graph
        p: [Optional] Probability of edge between two nodes
        graph_type: Specifies graph type to generate
        random_seed: Seed value for random generator
    Output:
        nx_graph: NetworkX OrderedGraph of specified type and parameters
    """
    if graph_type == 'reg':
        print(f'Generating d-regular graph with n={n}, d={d}, seed={random_seed}')
        nx_temp = nx.random_regular_graph(d=d, n=n, seed=random_seed)
    elif graph_type == 'prob':
        print(f'Generating p-probabilistic graph with n={n}, p={p}, seed={random_seed}')
        nx_temp = nx.fast_gnp_random_graph(n, p, seed=random_seed)
    elif graph_type == 'erdos':
        print(f'Generating erdos-renyi graph with n={n}, p={p}, seed={random_seed}')
        nx_temp = nx.erdos_renyi_graph(n, p, seed=random_seed)
    else:
        raise NotImplementedError(f'!! Graph type {graph_type} not handled !!')

    # Networkx does not enforce node order by default
    nx_temp = nx.relabel.convert_node_labels_to_integers(nx_temp)
    # Need to pull nx graph into OrderedGraph so training will work properly
    nx_graph = nx.OrderedGraph()
    nx_graph.add_nodes_from(sorted(nx_temp.nodes()))
    nx_graph.add_edges_from(nx_temp.edges)
    return nx_graph


class REGULAR_test(InMemoryDataset):
    def __init__(self, config:dict):
        self.config = config
        self.data_path = Path(config['data_dir'])
        super(REGULAR_test, self).__init__(root=self.data_path)
        self.data, self.slices = torch.load(self.processed_paths[0])
    @property
    def raw_file_names(self):
        return []
    @property
    def processed_file_names(self):
        return ['data.pt']
    def download(self):
        # Download to `self.raw_dir`.
        pass
    def get_idx_split(self, split_type = 'Random'):
        data_idx = np.arange(2389)
        train_idx = data_idx
        return {'train':torch.tensor(train_idx,dtype = torch.long)}
    def process(self):
        # for each task, it's a separate dataset
        data_list = []
        #n_list = [1000000]
        n_list = [100]
        d_ = 20
        #[0.45537, 0.38443, 0.33567, 0.28521, 0.19732, 0.11079]
        ratio_ = 0.19732
        for n_ in tqdm(n_list):
            all_start_t = time.time()
            for i in tqdm(range(20)):
                this_start_t = time.time()                    
                nodes_upper_bound = ratio_ * n_
                nx_graph = generate_graph(n = n_, d = d_, p = None, graph_type = 'reg', random_seed = None)
                generate_graph_t = time.time()
                print("generating the graph takes:"+str(generate_graph_t - this_start_t))
                data = from_networkx(nx_graph)
                transfer_data_t = time.time()
                print("transfer from networkx to pyg:"+str(transfer_data_t - generate_graph_t))
                node_feature = greedy(data)
                greedy_t = time.time()
                print("finish the degree greedy algorithm:"+str(greedy_t - transfer_data_t))
                greedy_num = torch.sum(node_feature)
                node_feature_ga = greedy_poor(data)
                greedy_poor_t = time.time()
                print("finish poor greedy:"+str(greedy_poor_t - greedy_t))
                ga_num = torch.sum(node_feature_ga)
                final_data = Data(x = node_feature, edge_index = data.edge_index, max_set = nodes_upper_bound, greedy_num = greedy_num.item() ,node_feature_ga = node_feature_ga, ga_num = ga_num)
                data_list.append(final_data)         
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

        #import pdb; pdb.set_trace()
# 85.24 / 130.21
if __name__ == '__main__':
    import os
    configs = Path('./configs')
    for cfg in configs.iterdir():
        if str(cfg).startswith("configs/config"):
            cfg_dict = yaml.safe_load(cfg.open('r'))
            dataset = REGULAR_test(cfg_dict['test_20_102'])
