# Import helper functions for input/output
from lmtune_helpers import input_data, output_results

# Import standard libraries for data analysis
import networkx as nx
import numpy as np
from math import log2


def compute_entropy(values):
    """Compute Shannon entropy of a 1-D array of positive values."""
    vals = np.asarray(values, dtype=float)
    total = vals.sum()
    if total == 0:
        return 0.0
    probs = vals / total
    probs = probs[probs > 0]
    return float(-(probs * np.log2(probs)).sum())


def gini_coefficient(x):
    """Compute Gini coefficient for a 1-D array of non-negative values."""
    arr = np.sort(np.asarray(x, dtype=float))
    n = arr.size
    if n == 0:
        return 0.0
    cumulative = np.cumsum(arr)
    sum_vals = cumulative[-1]
    if sum_vals == 0:
        return 0.0
    gini = (n + 1 - 2 * (cumulative.sum() / sum_vals)) / n
    return float(gini)


def centrality_stats(G):
    """Return average and std of closeness centrality for customer nodes and depot centrality."""
    closeness = nx.closeness_centrality(G, distance='weight')
    depot_cent = closeness.get(0, 0.0)
    customer_vals = [v for k, v in closeness.items() if k != 0]
    if not customer_vals:
        return depot_cent, 0.0, 0.0
    return depot_cent, float(np.mean(customer_vals)), float(np.std(customer_vals))


def skewness(values):
    vals = np.asarray(values, dtype=float)
    n = vals.size
    if n < 2:
        return 0.0
    mean = vals.mean()
    std = vals.std(ddof=0)
    if std == 0:
        return 0.0
    return float(((vals - mean) ** 3).mean() / std ** 3)


def kurtosis(values):
    vals = np.asarray(values, dtype=float)
    n = vals.size
    if n < 2:
        return 0.0
    mean = vals.mean()
    std = vals.std(ddof=0)
    if std == 0:
        return 0.0
    return float(((vals - mean) ** 4).mean() / std ** 4)


def main():
    # Get the instance data using the helper function
    instance_data = input_data()

    # Basic parameters
    N = int(instance_data.get('N', 0))
    capacity = float(instance_data.get('Capacity', 0))
    demand = np.array(instance_data.get('Demand', []), dtype=float)
    dist_flat = np.array(instance_data.get('Distance', []), dtype=float)

    n_nodes = N + 1  # including depot

    # Build distance matrix
    if dist_flat.size != n_nodes * n_nodes:
        # fallback: reshape as best as possible, pad if needed
        dist_flat = np.resize(dist_flat, n_nodes * n_nodes)
    dist_matrix = dist_flat.reshape((n_nodes, n_nodes))

    # Build a complete weighted undirected graph
    G = nx.Graph()
    for i in range(n_nodes):
        G.add_node(i)
    for i in range(n_nodes):
        for j in range(i + 1, n_nodes):
            weight = dist_matrix[i, j]
            G.add_edge(i, j, weight=weight)

    # Demand statistics
    total_demand = float(demand.sum()) if demand.size else 0.0
    avg_demand = float(demand.mean()) if demand.size else 0.0
    std_demand = float(demand.std(ddof=0)) if demand.size else 0.0
    min_demand = float(demand.min()) if demand.size else 0.0
    max_demand = float(demand.max()) if demand.size else 0.0
    demand_range = max_demand - min_demand
    demand_skew = skewness(demand)
    demand_kurt = kurtosis(demand)
    demand_entropy = compute_entropy(demand)
    demand_gini = gini_coefficient(demand)

    demand_to_capacity_ratio = total_demand / capacity if capacity else 0.0
    avg_demand_to_capacity_ratio = avg_demand / capacity if capacity else 0.0
    max_demand_to_capacity_ratio = max_demand / capacity if capacity else 0.0
    pct_customers_high_demand = float((demand > 0.2 * capacity).sum()) / N if N else 0.0

    # Route estimates
    estimated_n_routes = int(np.ceil(total_demand / capacity)) if capacity else 0
    avg_customers_per_route = N / estimated_n_routes if estimated_n_routes else 0.0
    capacity_slack_per_vehicle = (estimated_n_routes * capacity) - total_demand if estimated_n_routes else 0.0

    # Distance statistics (pairwise excluding diagonal)
    upper_tri_indices = np.triu_indices(n_nodes, k=1)
    pairwise_distances = dist_matrix[upper_tri_indices]

    avg_pairwise_distance = float(pairwise_distances.mean()) if pairwise_distances.size else 0.0
    std_pairwise_distance = float(pairwise_distances.std(ddof=0)) if pairwise_distances.size else 0.0
    min_pairwise_distance = float(pairwise_distances.min()) if pairwise_distances.size else 0.0
    max_pairwise_distance = float(pairwise_distances.max()) if pairwise_distances.size else 0.0
    distance_variance = float(pairwise_distances.var(ddof=0)) if pairwise_distances.size else 0.0
    distance_cv = std_pairwise_distance / avg_pairwise_distance if avg_pairwise_distance else 0.0

    distance_q1 = float(np.percentile(pairwise_distances, 25)) if pairwise_distances.size else 0.0
    distance_median = float(np.percentile(pairwise_distances, 50)) if pairwise_distances.size else 0.0
    distance_q3 = float(np.percentile(pairwise_distances, 75)) if pairwise_distances.size else 0.0

    distance_skew = skewness(pairwise_distances)
    distance_kurt = kurtosis(pairwise_distances)
    distance_entropy = compute_entropy(pairwise_distances)

    pct_short_edges = float((pairwise_distances < avg_pairwise_distance).sum()) / pairwise_distances.size if pairwise_distances.size else 0.0
    pct_long_edges = float((pairwise_distances > (avg_pairwise_distance + std_pairwise_distance)).sum()) / pairwise_distances.size if pairwise_distances.size else 0.0

    # Distances to depot (node 0)
    distances_to_depot = dist_matrix[0, 1:]
    avg_distance_to_depot = float(distances_to_depot.mean()) if distances_to_depot.size else 0.0
    std_distance_to_depot = float(distances_to_depot.std(ddof=0)) if distances_to_depot.size else 0.0
    min_distance_to_depot = float(distances_to_depot.min()) if distances_to_depot.size else 0.0
    max_distance_to_depot = float(distances_to_depot.max()) if distances_to_depot.size else 0.0

    # Correlation between demand and distance to depot
    if demand.size and distances_to_depot.size:
        demand_distance_corr = float(np.corrcoef(demand, distances_to_depot)[0, 1])
    else:
        demand_distance_corr = 0.0

    # Graph based metrics
    num_edges = G.number_of_edges()
    possible_edges = n_nodes * (n_nodes - 1) / 2
    graph_density = num_edges / possible_edges if possible_edges else 0.0

    # MST
    mst = nx.minimum_spanning_tree(G, weight='weight')
    mst_edges = [d['weight'] for _, _, d in mst.edges(data=True)]
    mst_total_length = float(sum(mst_edges))
    mst_avg_edge_length = float(np.mean(mst_edges)) if mst_edges else 0.0
    mst_std_edge_length = float(np.std(mst_edges, ddof=0)) if mst_edges else 0.0

    # Weighted shortest paths
    lengths = dict(nx.all_pairs_dijkstra_path_length(G, weight='weight'))
    all_shortest = []
    for i in lengths:
        for j in lengths[i]:
            if j > i:  # avoid duplicates
                all_shortest.append(lengths[i][j])
    if all_shortest:
        weighted_diameter = float(max(all_shortest))
        graph_avg_shortest = float(np.mean(all_shortest))
    else:
        weighted_diameter = 0.0
        graph_avg_shortest = 0.0

    clustering_coeff = float(nx.average_clustering(G))

    depot_cent, avg_customer_cent, customer_cent_std = centrality_stats(G)

    # Estimated total distance simple heuristic: two times avg depot distance per route
    estimated_total_distance = float(estimated_n_routes * 2 * avg_distance_to_depot)
    distance_per_customer_ratio = float(pairwise_distances.sum()) / N if N else 0.0

    # Build results dictionary with README first and exactly 50 additional parameters
    readme_text = (
        "This analysis constructs a weighted complete graph with the depot plus N customer "
        "nodes where edge weights equal the provided distance matrix. Using NetworkX we derive "
        "structural indicators such as density, clustering coefficient, minimum-spanning-tree "
        "statistics, weighted diameter and closeness centrality. Statistical features of the "
        "customer demand vector (moments, Gini, entropy) quantify heterogeneity that affects "
        "capacity reasoning. Pairwise and depot distance distributions supply dispersion and "
        "entropy measures that influence search neighbourhood size. We also compute simple "
        "workload estimates, e.g., minimal vehicle count given total demand and capacity. "
        "Correlating customer demand with distance to the depot captures structure exploited by "
        "routing heuristics. Edge percentiles and coefficient of variation summarise spatial "
        "skew, while MST length and edge variance proxy for overall spread. All 50 numeric "
        "characteristics cover size (n_customers), resource tightness (demand-to-capacity ratios), "
        "spatial layout, topological complexity and heterogeneity. These dimensions are known to "
        "strongly influence branching behaviour, constraint propagation and the effectiveness of "
        "cuts in VRP models, allowing the automatic configurator to relate observed performance to "
        "instance structure and pick the most beneficial search and decomposition parameters."
    )

    results = {
        "README": readme_text,
        "n_customers": N,
        "vehicle_capacity": capacity,
        "total_demand": total_demand,
        "avg_demand": avg_demand,
        "std_demand": std_demand,
        "min_demand": min_demand,
        "max_demand": max_demand,
        "demand_range": demand_range,
        "demand_skewness": demand_skew,
        "demand_kurtosis": demand_kurt,
        "demand_entropy": demand_entropy,
        "demand_gini_index": demand_gini,
        "demand_to_capacity_ratio": demand_to_capacity_ratio,
        "avg_demand_to_capacity_ratio": avg_demand_to_capacity_ratio,
        "max_demand_to_capacity_ratio": max_demand_to_capacity_ratio,
        "pct_customers_high_demand": pct_customers_high_demand,
        "estimated_n_routes": estimated_n_routes,
        "avg_customers_per_route": avg_customers_per_route,
        "capacity_slack_per_vehicle": capacity_slack_per_vehicle,
        "avg_distance_to_depot": avg_distance_to_depot,
        "std_distance_to_depot": std_distance_to_depot,
        "min_distance_to_depot": min_distance_to_depot,
        "max_distance_to_depot": max_distance_to_depot,
        "avg_pairwise_distance": avg_pairwise_distance,
        "std_pairwise_distance": std_pairwise_distance,
        "min_pairwise_distance": min_pairwise_distance,
        "max_pairwise_distance": max_pairwise_distance,
        "distance_variance": distance_variance,
        "distance_coefficient_variation": distance_cv,
        "distance_q1": distance_q1,
        "distance_median": distance_median,
        "distance_q3": distance_q3,
        "distance_skewness": distance_skew,
        "distance_kurtosis": distance_kurt,
        "distance_entropy": distance_entropy,
        "pct_short_edges": pct_short_edges,
        "pct_long_edges": pct_long_edges,
        "mst_total_length": mst_total_length,
        "mst_avg_edge_length": mst_avg_edge_length,
        "mst_std_edge_length": mst_std_edge_length,
        "graph_density": graph_density,
        "weighted_graph_diameter": weighted_diameter,
        "graph_avg_shortest_path_length": graph_avg_shortest,
        "graph_clustering_coefficient": clustering_coeff,
        "depot_closeness_centrality": depot_cent,
        "avg_customer_closeness_centrality": avg_customer_cent,
        "customer_centrality_std": customer_cent_std,
        "demand_distance_correlation": demand_distance_corr,
        "estimated_total_distance": estimated_total_distance,
        "distance_per_customer_ratio": distance_per_customer_ratio
    }

    # Return the results using the helper function
    output_results(results)


if __name__ == "__main__":
    main()
