# Import helper functions for input/output
from lmtune_helpers import input_data, output_results

# Import standard libraries for data analysis
import networkx as nx
import numpy as np


def safe_div(num, den):
    """Utility function to avoid ZeroDivisionError and return 0 when denominator is 0."""
    return num / den if den else 0.0


def compute_skewness(data):
    """Compute Fisher-Pearson coefficient of skewness for a 1-D numpy array."""
    n = len(data)
    if n < 3:
        return 0.0
    mean = data.mean()
    std = data.std(ddof=0)
    if std == 0:
        return 0.0
    skew = ((data - mean) ** 3).mean() / (std ** 3)
    return float(skew)


def compute_kurtosis(data):
    """Compute excess kurtosis (kurtosis – 3) for a 1-D numpy array."""
    n = len(data)
    if n < 4:
        return 0.0
    mean = data.mean()
    std = data.std(ddof=0)
    if std == 0:
        return 0.0
    kurt = ((data - mean) ** 4).mean() / (std ** 4) - 3.0
    return float(kurt)


def analyze_instance(instance):
    """Compute a broad set of characteristics for a VRP instance described in the prompt."""

    # ------------------------------------------------------------------
    # 1. Basic data extraction
    # ------------------------------------------------------------------
    N = int(instance.get("N", 0))  # number of customers
    capacity = float(instance.get("Capacity", 0))
    demand = np.array(instance.get("Demand", []), dtype=float)
    distance_flat = np.array(instance.get("Distance", []), dtype=float)

    # Sanity checks & shape construction
    n_nodes = N + 1  # include depot
    expected_size = n_nodes * n_nodes
    if distance_flat.size != expected_size:
        # Attempt to pad/truncate if data is malformed
        distance_flat = np.resize(distance_flat, expected_size)

    distance_matrix = distance_flat.reshape((n_nodes, n_nodes))

    # ------------------------------------------------------------------
    # 2. Demand statistics
    # ------------------------------------------------------------------
    total_demand = float(demand.sum())
    avg_demand = float(demand.mean()) if N else 0.0
    std_demand = float(demand.std(ddof=0)) if N else 0.0
    min_demand = float(demand.min()) if N else 0.0
    max_demand = float(demand.max()) if N else 0.0
    skew_demand = compute_skewness(demand) if N else 0.0
    kurt_demand = compute_kurtosis(demand) if N else 0.0
    var_demand = float(demand.var(ddof=0)) if N else 0.0
    coeff_var_demand = safe_div(std_demand, avg_demand)

    demand_to_capacity_ratio = safe_div(total_demand, capacity)
    avg_demand_to_capacity_ratio = safe_div(avg_demand, capacity)
    max_demand_to_capacity_ratio = safe_div(max_demand, capacity)

    pct_customers_high_demand = safe_div(np.sum(demand > 0.5 * capacity), N)

    # ------------------------------------------------------------------
    # 3. Simple routing heuristics
    # ------------------------------------------------------------------
    estimated_n_routes = int(np.ceil(safe_div(total_demand, capacity))) if capacity else 0
    capacity_utilization = safe_div(total_demand, estimated_n_routes * capacity) if estimated_n_routes else 0.0
    avg_customers_per_route = safe_div(N, estimated_n_routes)

    # ------------------------------------------------------------------
    # 4. Distance statistics (to depot and pairwise)
    # ------------------------------------------------------------------
    dist_to_depot = distance_matrix[0, 1:]
    avg_dist_depot = float(dist_to_depot.mean()) if N else 0.0
    std_dist_depot = float(dist_to_depot.std(ddof=0)) if N else 0.0
    min_dist_depot = float(dist_to_depot.min()) if N else 0.0
    max_dist_depot = float(dist_to_depot.max()) if N else 0.0

    # Pairwise customer-customer distances (exclude depot and diagonal)
    if N > 1:
        cust_pairwise = distance_matrix[1:, 1:]
        mask = ~np.eye(N, dtype=bool)
        pairwise_vals = cust_pairwise[mask]
    else:
        pairwise_vals = np.array([])

    avg_pairwise = float(pairwise_vals.mean()) if pairwise_vals.size else 0.0
    std_pairwise = float(pairwise_vals.std(ddof=0)) if pairwise_vals.size else 0.0
    min_pairwise = float(pairwise_vals.min()) if pairwise_vals.size else 0.0
    max_pairwise = float(pairwise_vals.max()) if pairwise_vals.size else 0.0

    # Overall distance distribution stats
    all_distances = distance_matrix.flatten()
    std_distance_overall = float(all_distances.std(ddof=0))
    skew_distance = compute_skewness(all_distances)
    kurt_distance = compute_kurtosis(all_distances)

    # ------------------------------------------------------------------
    # 5. Graph-based analysis
    # ------------------------------------------------------------------
    G = nx.Graph()
    for i in range(n_nodes):
        G.add_node(i)
    for i in range(n_nodes):
        for j in range(i + 1, n_nodes):
            weight = distance_matrix[i, j]
            G.add_edge(i, j, weight=weight)

    graph_density = nx.density(G)
    graph_avg_degree = float(np.mean([deg for _, deg in G.degree()]))

    try:
        graph_diameter = nx.diameter(G, e=None)
    except Exception:
        graph_diameter = 0.0
    try:
        graph_avg_shortest_path = nx.average_shortest_path_length(G, weight='weight')
    except Exception:
        graph_avg_shortest_path = 0.0

    clustering_coeff = nx.average_clustering(G)

    closeness_centrality = nx.closeness_centrality(G, distance='weight')
    depot_centrality = float(closeness_centrality.get(0, 0.0))
    customer_centralities = [v for k, v in closeness_centrality.items() if k != 0]
    avg_customer_cc = float(np.mean(customer_centralities)) if customer_centralities else 0.0
    std_customer_cc = float(np.std(customer_centralities, ddof=0)) if customer_centralities else 0.0

    betweenness = nx.betweenness_centrality(G, weight='weight')
    depot_bc = float(betweenness.get(0, 0.0))
    customer_bcs = [v for k, v in betweenness.items() if k != 0]
    avg_customer_bc = float(np.mean(customer_bcs)) if customer_bcs else 0.0
    std_customer_bc = float(np.std(customer_bcs, ddof=0)) if customer_bcs else 0.0

    # Minimum Spanning Tree metrics
    mst = nx.minimum_spanning_tree(G, weight='weight')
    mst_edge_weights = [d['weight'] for _, _, d in mst.edges(data=True)]
    mst_total = float(sum(mst_edge_weights))
    mst_avg_edge = float(np.mean(mst_edge_weights)) if mst_edge_weights else 0.0
    mst_std_edge = float(np.std(mst_edge_weights, ddof=0)) if mst_edge_weights else 0.0

    # ------------------------------------------------------------------
    # 6. Model size estimations (variables & constraints)
    # ------------------------------------------------------------------
    binary_vars = (n_nodes) * (n_nodes)  # x[i,j] for 0..N,0..N
    int_vars = N  # u[i]
    total_vars = binary_vars + int_vars

    total_constraints = (2 * N) + 2 + (N * N) + N  # derived from model description
    var_to_con_ratio = safe_div(total_vars, total_constraints)
    arc_var_ratio = safe_div(binary_vars, total_constraints)

    # ------------------------------------------------------------------
    # 7. Assemble results dictionary (ensuring exactly 50 characteristics)
    # ------------------------------------------------------------------
    results = {
        "README": (
            "The VRP instance is interpreted as a fully connected weighted graph where node 0 is the depot and "
            "nodes 1..N are customers. Using the flattened distance matrix we reconstructed an undirected graph "
            "in NetworkX and computed global properties (density, diameter, average weighted path length, clustering). "
            "We also calculated node-level centrality metrics to capture how central the depot is relative to customers, "
            "as routing difficulty often depends on depot accessibility. Demand statistics (mean, variance, skewness, "
            "kurtosis) were extracted to reflect load variability, while ratios to vehicle capacity highlight loading "
            "tightness. Simple heuristics such as the minimum number of routes and capacity utilisation give an early "
            "estimate of solution structure. Distance features include depot-to-customer and customer-to-customer "
            "statistics, overall edge-weight distribution, and MST metrics which approximate network backbone length. "
            "Finally, counts of MiniZinc decision variables and constraints produce a crude size measure of the CP model, "
            "and ratios such as variables per constraint estimate propagation workload. Collectively, these 50 numeric "
            "characteristics summarise size, load distribution, spatial structure and graph complexity, providing a rich "
            "feature vector for downstream algorithm selection and parameter tuning."),

        # Demand/size related characteristics
        "characteristic_1": N,  # number of customers
        "characteristic_2": capacity,  # vehicle capacity
        "characteristic_3": total_demand,  # total customer demand
        "characteristic_4": avg_demand,  # average demand
        "characteristic_5": std_demand,  # demand std dev
        "characteristic_6": min_demand,  # min demand
        "characteristic_7": max_demand,  # max demand
        "characteristic_8": skew_demand,  # demand skewness
        "characteristic_9": kurt_demand,  # demand kurtosis
        "characteristic_10": demand_to_capacity_ratio,  # total demand / capacity
        "characteristic_11": avg_demand_to_capacity_ratio,  # avg demand / capacity
        "characteristic_12": max_demand_to_capacity_ratio,  # max demand / capacity
        "characteristic_13": pct_customers_high_demand,  # pct demand > 0.5*cap
        "characteristic_14": estimated_n_routes,  # min vehicles
        "characteristic_15": capacity_utilization,  # utilisation estimate
        "characteristic_16": avg_customers_per_route,  # customers per route
        # Distance features to depot
        "characteristic_17": avg_dist_depot,  # avg distance to depot
        "characteristic_18": std_dist_depot,  # std distance to depot
        "characteristic_19": min_dist_depot,  # min distance to depot
        "characteristic_20": max_dist_depot,  # max distance to depot
        # Customer pairwise distance features
        "characteristic_21": avg_pairwise,  # avg customer distance
        "characteristic_22": std_pairwise,  # std customer distance
        "characteristic_23": min_pairwise,  # min customer distance
        "characteristic_24": max_pairwise,  # max customer distance
        # Graph wide metrics
        "characteristic_25": graph_density,  # graph density
        "characteristic_26": graph_avg_degree,  # avg degree
        "characteristic_27": graph_diameter,  # diameter (unweighted)
        "characteristic_28": graph_avg_shortest_path,  # avg weighted path length
        "characteristic_29": clustering_coeff,  # global clustering
        # Centrality metrics
        "characteristic_30": depot_centrality,  # depot closeness
        "characteristic_31": avg_customer_cc,  # avg customer closeness
        "characteristic_32": std_customer_cc,  # std customer closeness
        "characteristic_33": depot_bc,  # depot betweenness
        "characteristic_34": avg_customer_bc,  # avg customer betweenness
        "characteristic_35": std_customer_bc,  # std customer betweenness
        # MST metrics
        "characteristic_36": mst_total,  # total MST length
        "characteristic_37": mst_avg_edge,  # avg MST edge length
        "characteristic_38": mst_std_edge,  # std MST edge length
        # Overall distance distribution
        "characteristic_39": std_distance_overall,  # std of all distances
        "characteristic_40": skew_distance,  # skewness of distances
        "characteristic_41": kurt_distance,  # kurtosis of distances
        # Model size metrics
        "characteristic_42": binary_vars,  # number of binary vars
        "characteristic_43": int_vars,  # number of int vars
        "characteristic_44": total_vars,  # total vars
        "characteristic_45": total_constraints,  # constraint count
        "characteristic_46": var_to_con_ratio,  # vars / constraints
        "characteristic_47": arc_var_ratio,  # binary var / constraints
        # Demand variance
        "characteristic_48": var_demand,  # demand variance
        "characteristic_49": coeff_var_demand,  # coeff of variation
        # Spare characteristic: ratio of avg pairwise dist to avg depot dist
        "characteristic_50": safe_div(avg_pairwise, avg_dist_depot)  # spatial dispersion ratio
    }

    return results


def main():
    # Obtain instance via helper
    instance_data = input_data()

    try:
        results = analyze_instance(instance_data)
    except Exception as err:
        # Fail gracefully and provide error message
        results = {"README": "Error encountered during analysis.", "error": str(err)}
    
    output_results(results)


if __name__ == "__main__":
    main()
