from pathlib import Path
import numpy as np
import scipy.sparse
import random
import matplotlib.pyplot as plt
from sklearn.preprocessing import quantile_transform

from SEPAL.dataloader import DataLoader
from SEPAL.knowledge_graph import KnowledgeGraph
from SEPAL.utils import create_graph


sepal_dir = Path(__file__).absolute().parents[2]


graph = create_graph("mini_yago3")




#### -------------        Diffusion        -------------



### With right-normalized adjacency matrix

# Normalize adjacency to ensure mass conservation
diffusion_matrix = graph.get_right_normalized_adjacency(self_loops=True)

x = np.zeros(graph.num_entities)
diffuser = random.randint(0, graph.num_entities-1)
x[diffuser] = graph.num_entities

print("Diffuser has degree ", int(graph.degrees[diffuser]))

for _ in range(10):
    x = diffusion_matrix * x


plt.hist(x, bins=500)
print(f"{(x > 0).sum() / x.shape[0]:.1%} of all entities have been reached")
print("Entity with highest value has degree", int(graph.degrees[np.argmax(x)]))




### With graph laplacian --> numerically instable

x = np.zeros(graph.num_entities)
diffuser = random.randint(0, graph.num_entities-1)
x[diffuser] = graph.num_entities

print("Diffuser has degree ", int(graph.degrees[diffuser]))
c = 1e-4
for _ in range(100):
    x = x - c * graph.get_laplacian_matrix() * x
    print("Entity with highest value has degree", int(graph.degrees[np.argmax(x)]))
    print("Highest value:", x.max())
    print("Numeric stability?", np.abs(x.sum()-graph.num_entities) < 1)


plt.hist(x, bins=500)
print(f"{(x > 0).sum() / x.shape[0]:.1%} of all entities have been reached")
print("Entity with highest value has degree", int(graph.degrees[np.argmax(x)]))




### With symmetrically normalized graph laplacian --> no mass conservation

x = np.zeros(graph.num_entities)
diffuser = random.randint(0, graph.num_entities-1)
x[diffuser] = graph.num_entities

print("Diffuser has degree ", int(graph.degrees[diffuser]))
c = 0.5
for _ in range(100):
    x = x - c * graph.get_symmetric_normalized_laplacian_matrix() * x
    print("Entity with highest value has degree", int(graph.degrees[np.argmax(x)]))
    print("Highest value:", x.max())
    print(f"Proportion of initial mass: {x.sum()/graph.num_entities:.1%}")


plt.hist(x, bins=500)
print(f"{(x > 0).sum() / x.shape[0]:.1%} of all entities have been reached")
print("Entity with highest value has degree", int(graph.degrees[np.argmax(x)]))




### With right normalized graph laplacian

x = np.zeros(graph.num_entities)
diffuser = random.randint(0, graph.num_entities-1)
x[diffuser] = graph.num_entities

print("Diffuser has degree ", int(graph.degrees[diffuser]))
c = .9
for _ in range(100):
    x = x - c * graph.get_right_normalized_laplacian_matrix() * x
    print("Entity with highest value has degree", int(graph.degrees[np.argmax(x)]))
    print("Highest value:", x.max())
    print("Numeric stability?", np.abs(x.sum()-graph.num_entities) < 1)


plt.hist(x, bins=500)
print(f"{(x > 0).sum() / x.shape[0]:.1%} of all entities have been reached")
print("Entity with highest value has degree", int(graph.degrees[np.argmax(x)]))