from collections import defaultdict

import numpy as np
from scipy.sparse import csc_matrix

# Initialize an empty adjacency list dictionary
adjacency_list = defaultdict(set)  # Using sets to avoid duplicate edges

# Read the .txt file
with open('real_network_data/flickr-growth-sorted.txt', 'r') as file:
    lines = file.readlines()

# Process each line and populate the adjacency list
for line in lines:
    parts = line.strip().split('\t')
    vertex1 = int(parts[0])
    vertex2 = int(parts[1])

    # Check if the edge has not been added before
    if vertex2 not in adjacency_list[vertex1]:
        adjacency_list[vertex1].add(vertex2)
        adjacency_list[vertex2].add(vertex1)  # Add the reverse edge for undirected graph

total_edges = sum(len(neighbors) for neighbors in adjacency_list.values()) // 2

print("Total number of edges:", total_edges, flush=True)

# Create a dictionary to count the degrees
degree_count = defaultdict(int)

# Count the degrees for each node
for node, neighbors in adjacency_list.items():
    degree = len(neighbors)
    degree_count[degree] += 1

# Convert the degree_count dictionary into an array
max_degree = max(degree_count.keys())
degree_array = [degree_count[degree] for degree in range(max_degree + 1)]

print("Degree array:", degree_array, flush=True)

# Calculate N_1 (G)
N_1G = 0
index = 1
while index < len(degree_array):
    N_1G += degree_array[index]
    index += 1

# Calculate N_p (G)
N_pG = 0
p = 0.75
index = 1
while index < len(degree_array):
    N_pG += p * degree_array[index] * (1 - (1-p)**index)
    index += 1

# Estimate the coefficient sigma
est_sigma = (np.log(N_1G) - np.log(N_pG))/(- np.log(p)) - 1

print("The estimator for sigma is:", est_sigma, flush=True)

# Determine the maximum vertex number
max_vertex = max(adjacency_list.keys())

# Convert the adjacency list to a CSC matrix
rows = []
cols = []
for vertex, neighbors in adjacency_list.items():
    for neighbor in neighbors:
        rows.append(vertex)
        cols.append(neighbor)

adj_matrix = csc_matrix((np.ones(len(rows)), (rows, cols)), shape=(max_vertex + 1, max_vertex + 1))

# Finally, we have to delete all zero rows and columns from the csc matrix,
# which yields the matrix adj_matrix_clean
def remove_zero_rows(x):
    # X is a scipy sparse matrix
    nonzero_row_ind, _ = x.nonzero()
    unique_nonzero_ind = np.unique(nonzero_row_ind)
    return x[unique_nonzero_ind]


def remove_zero_columns(x):
    # X is a scipy sparse matrix
    _, nonzero_col_ind = x.nonzero()
    unique_nonzero_ind = np.unique(nonzero_col_ind)
    return x[:, unique_nonzero_ind]


adj_matrix_clean = remove_zero_columns(remove_zero_rows(adj_matrix))
