# Import helper functions for input/output
# IMPORTANT: You MUST import these functions at the beginning of your script
# DO NOT define your own versions of these functions!
from lmtune_helpers import input_data, output_results

# Import standard libraries for data analysis
import networkx as nx
import numpy as np


def main():
    """Extract instance characteristics from the problem data."""
    
    # Get the instance data using the helper function
    instance_data = input_data()
    
    # Initialize the results with README and empty placeholders
    results = {
        "README": (
            "This instance was analyzed by constructing a VRP graph where nodes "
            "represent the depot (node 0) and customers (nodes 1..N). We reshaped "
            "the flattened distance matrix into an (N+1)x(N+1) array and built an "
            "undirected weighted graph using NetworkX. Demand and distance arrays "
            "were processed with NumPy to compute statistical moments (mean, std, "
            "skewness, kurtosis), distribution measures (entropy, Gini index), "
            "and ratios relative to capacity. Graph structural metrics include "
            "density, degree statistics, clustering, transitivity, shortest path "
            "lengths, diameter, connectivity, and minimum spanning tree weights. "
            "Centrality measures (degree, closeness, betweenness) were computed "
            "for the depot and aggregated over customers to capture network roles. "
            "Route-related proxies like demand-to-capacity ratios, customer demand "
            "percentiles, and spatial variability (distance-to-depot statistics) "
            "are provided. This comprehensive 50-dimensional feature vector captures "
            "problem size, constraint tightness, spatial structure, and network "
            "complexity, supporting solver parameter tuning and performance prediction."
        )
    }
    
    try:
        # Basic instance parameters
        n = int(instance_data.get('N', 0))
        capacity = float(instance_data.get('Capacity', 0))
        demands = np.array(instance_data.get('Demand', []), dtype=float)
        dist_flat = np.array(instance_data.get('Distance', []), dtype=float)
        
        # Derived parameters
        size = n + 1  # including depot
        total_demand = demands.sum()
        avg_demand = demands.mean() if n > 0 else 0.0
        std_demand = demands.std(ddof=0) if n > 0 else 0.0
        min_demand = demands.min() if n > 0 else 0.0
        max_demand = demands.max() if n > 0 else 0.0
        
        # Skewness and kurtosis for demand
        if std_demand > 0:
            skew_demand = ((demands - avg_demand)**3).mean() / (std_demand**3)
            kurt_demand = ((demands - avg_demand)**4).mean() / (std_demand**4) - 3
        else:
            skew_demand = 0.0
            kurt_demand = 0.0
        
        # Demand entropy (Shannon) and Gini index
        vals, counts = np.unique(demands, return_counts=True)
        p = counts / counts.sum() if counts.sum() > 0 else counts
        demand_entropy = -np.sum(p * np.log2(p + 1e-12))
        # Gini index
        diff_matrix = np.abs(demands[:,None] - demands[None,:])
        demand_gini = diff_matrix.sum() / (2 * n**2 * avg_demand) if avg_demand > 0 else 0.0
        
        # Distance matrix and stats
        dist_matrix = dist_flat.reshape((size, size))
        dists = dist_flat[dist_flat > 0]  # exclude self
        total_dist_sum = dists.sum()
        avg_dist = dists.mean() if dists.size > 0 else 0.0
        std_dist = dists.std(ddof=0) if dists.size > 0 else 0.0
        min_dist = dists.min() if dists.size > 0 else 0.0
        max_dist = dists.max() if dists.size > 0 else 0.0
        
        # Skewness and kurtosis for distances
        if std_dist > 0:
            skew_dist = ((dists - avg_dist)**3).mean() / (std_dist**3)
            kurt_dist = ((dists - avg_dist)**4).mean() / (std_dist**4) - 3
        else:
            skew_dist = 0.0
            kurt_dist = 0.0
        # Entropy of distances
        d_vals, d_counts = np.unique(dists, return_counts=True)
        dp = d_counts / d_counts.sum() if d_counts.sum() > 0 else d_counts
        dist_entropy = -np.sum(dp * np.log2(dp + 1e-12))
        
        # Build graph
        G = nx.Graph()
        G.add_nodes_from(range(size))
        for i in range(size):
            for j in range(i+1, size):
                G.add_edge(i, j, weight=dist_matrix[i,j])
        
        # Graph statistics
        density = nx.density(G)
        degrees = np.array([d for _, d in G.degree()])
        avg_deg = degrees.mean() if degrees.size > 0 else 0.0
        std_deg = degrees.std(ddof=0) if degrees.size > 0 else 0.0
        avg_clust = nx.average_clustering(G, weight='weight')
        transitivity = nx.transitivity(G)
        try:
            avg_shortest = nx.average_shortest_path_length(G, weight='weight')
            diameter = nx.diameter(G)
        except nx.NetworkXError:
            avg_shortest = float('inf')
            diameter = float('inf')
        components = list(nx.connected_components(G))
        num_comp = len(components)
        
        # Minimum spanning tree
        T = nx.minimum_spanning_tree(G, weight='weight')
        mst_weight = sum(d['weight'] for _,_,d in T.edges(data=True))
        mst_avg_edge = mst_weight / (size - 1) if size > 1 else 0.0
        
        # Centrality measures
        deg_cent = nx.degree_centrality(G)
        clos_cent = nx.closeness_centrality(G, distance='weight')
        betw_cent = nx.betweenness_centrality(G, weight='weight')
        depot_deg_cent = deg_cent.get(0, 0.0)
        cust_deg = [deg_cent[i] for i in range(1, size)] if n > 0 else [0.0]
        avg_deg_cent_c = np.mean(cust_deg)
        max_deg_cent_c = np.max(cust_deg)
        cust_close = [clos_cent[i] for i in range(1, size)] if n > 0 else [0.0]
        avg_close_c = np.mean(cust_close)
        max_close_c = np.max(cust_close)
        cust_betw = [betw_cent[i] for i in range(1, size)] if n > 0 else [0.0]
        avg_betw_c = np.mean(cust_betw)
        max_betw_c = np.max(cust_betw)
        
        # Demand/capacity ratios
        demand_cap_ratio = total_demand / (capacity * n) if capacity * n > 0 else 0.0
        avg_demand_ratio = avg_demand / capacity if capacity > 0 else 0.0
        max_demand_ratio = max_demand / capacity if capacity > 0 else 0.0
        pct_high = np.sum(demands > 0.75*capacity) / n if n > 0 else 0.0
        pct_low = np.sum(demands < 0.25*capacity) / n if n > 0 else 0.0
        
        # Distance metrics relative to depot
        depot_dists = dist_matrix[0,1:]
        avg_depot_dist = depot_dists.mean() if depot_dists.size > 0 else 0.0
        std_depot_dist = depot_dists.std(ddof=0) if depot_dists.size > 0 else 0.0
        min_depot_dist = depot_dists.min() if depot_dists.size > 0 else 0.0
        max_depot_dist = depot_dists.max() if depot_dists.size > 0 else 0.0
        spatial_var = std_depot_dist
        
        # Coefficients of variation
        cov_pairwise = std_dist / avg_dist if avg_dist > 0 else 0.0
        cov_demand = std_demand / avg_demand if avg_demand > 0 else 0.0
        # Clustering variance
        local_clust = np.array(list(nx.clustering(G, weight='weight').values()))
        clust_var = local_clust.var() if local_clust.size > 0 else 0.0
        
        # Populate 50 characteristics
        features = [
            n, size, capacity, total_demand, avg_demand, std_demand, min_demand, max_demand,
            skew_demand, kurt_demand, demand_entropy, demand_gini, total_dist_sum,
            avg_dist, std_dist, min_dist, max_dist, skew_dist, kurt_dist, dist_entropy,
            density, avg_deg, std_deg, diameter, avg_clust, transitivity,
            avg_shortest, num_comp, mst_weight, mst_avg_edge, depot_deg_cent,
            avg_deg_cent_c, max_deg_cent_c, avg_close_c, max_close_c, avg_betw_c,
            max_betw_c, demand_cap_ratio, avg_demand_ratio, max_demand_ratio,
            pct_high, pct_low, spatial_var, avg_depot_dist, std_depot_dist,
            min_depot_dist, max_depot_dist, cov_pairwise, cov_demand, clust_var
        ]
        for idx, val in enumerate(features, start=1):
            results[f'characteristic_{idx}'] = float(val)

    except Exception as e:
        results['error'] = str(e)
    
    # Return the results using the helper function
    output_results(results)

if __name__ == "__main__":
    main()
