import numpy as np
from collections import Counter
import umap
from random import randint
from sklearn.decomposition import PCA
import pynndescent
from plots import *


def distance(v1,v2):
    return np.linalg.norm(v1-v2)

def set_frac(set1,set2,sizes):
    l1=len(set(set1).intersection(set2))
    l2=l1/sizes

    return l2


import importlib
import embedding as embed
embed = importlib.reload(embed)



#Generate directed  K-NN graph




#Two block simulations
def distribute_edges(sets,probabilities):

    l=len(probabilities)
    pr_range=np.zeros((l+1))

    t=0
    for i in range(l):
        t=t+probabilities[i]
        pr_range[i+1]=t


    toss=np.random.uniform(0,1)

    for i in range(l):
        if(toss>= pr_range[i] and toss<pr_range[i+1]):
            pos=i
            break
    
    
    v=randint(sets[pos],sets[pos+1]-1)

    return v





def graph_simulation(sizes,prob_mat):


    size1=[0]
    t=0
    n_c=len(sizes)
    n=sum(sizes)

    for i in range(n_c):
        t=t+sizes[i]
        size1.append(t)

    label=[]
    n=sum(sizes)

    kchoice=20
    hashmap={}

    for ell in range(len(sizes)):
        for i in range(sizes[ell]):
            label.append(ell)


    for i in range(n_c):

        for u in range(size1[i],size1[i+1]):

            vv=set()
            while(len(vv)<kchoice):
            
#            for ell1 in range(kchoice):

                v=distribute_edges(size1,prob_mat[i,:])

                
                if(u!=v):
                    hashmap[u,v]=1
                    vv.add(v)



    edge_list=[]
    for (u,v) in hashmap:
        edge_list.append((u,v))
    
    vlist=[i for i in range(n)]

    
    return edge_list,vlist,label,label



    










#Geometric simulations
def geometric_simulations_gmm(n_cluster,d,center,sizes,alpha_spread):


    print("number of points aimed to be added=",sum(sum(sizes)))


    label=[]
    sub_label=[]
    X=[]
    t=0
    for j in range(n_cluster):
        for i in range(sizes[j,0]):

            var=np.random.multivariate_normal(center[j], alpha_spread[j,0]*np.identity(d))
            distances=[np.linalg.norm(var-center[ell]) for ell in range(n_cluster)]

            #if(distances[j]>=min(distances)): #mar10: changed <= to >=
            if(j==1):
                X.append(var)
                sub_label.append(t)
                label.append(j)
            else:
                #if(max(distances)<max(c_distances[j])):
                X.append(var)
                sub_label.append(t)
                label.append(j) 

        t=t+1
        for i in range(sizes[j,1]):
            var=np.random.multivariate_normal(center[j], (alpha_spread[j,1])*np.identity(d))
            distances=[np.linalg.norm(var-center[ell]) for ell in range(n_cluster)]

            #if(distances[j]<=min(distances)): #mar10: removed this part.
            X.append(var)
            sub_label.append(t)
            label.append(j)

        t=t+1

    X=np.array(X)

    shuffle=np.random.permutation(X.shape[0])
    X=X[shuffle,:]
    label=np.array(label)[shuffle]
    sub_label=np.array(sub_label)[shuffle]

    print(X.shape)
    print(Counter(sub_label))


    
    edge_list,vlist=embed.dir_KNN_graph(X,20,0)
    

    reducer = umap.UMAP()
    embedding = reducer.fit_transform(X)
    embedding.shape
    showPlot(embedding,np.array(sub_label),'Initial shape')


    return edge_list,vlist,X,label,sub_label





