# Import helper functions for input/output
from lmtune_helpers import input_data, output_results

# Import standard libraries for data analysis
import networkx as nx
import numpy as np


def compute_skewness(data, mean_val, std_val):
    # Compute sample skewness manually
    if std_val == 0:
        return 0.0
    return np.mean((data - mean_val)**3) / (std_val**3)


def compute_kurtosis(data, mean_val, std_val):
    # Compute sample kurtosis manually
    if std_val == 0:
        return 0.0
    return np.mean((data - mean_val)**4) / (std_val**4)


def compute_entropy(data_array, bins=10):
    # Compute entropy from histogram
    hist, _ = np.histogram(data_array, bins=bins, density=True)
    hist = hist[hist > 0]  
    entropy = -np.sum(hist * np.log(hist))
    return float(entropy)


def main():
    """Extract instance characteristics from the VRP problem instance."""
    results = {}

    try:
        # Get instance data from helper function (no file I/O required)
        instance_data = input_data()

        # Extract basic parameters
        n = instance_data.get('N', 0)  # number of customer nodes
        capacity = instance_data.get('Capacity', 0)
        demand_list = instance_data.get('Demand', [])
        distance_flat = instance_data.get('Distance', [])

        # Convert lists to numpy arrays for analysis
        demands = np.array(demand_list, dtype=float)
        total_demand = float(np.sum(demands)) if demands.size > 0 else 0.0
        avg_demand = float(np.mean(demands)) if demands.size > 0 else 0.0
        std_demand = float(np.std(demands)) if demands.size > 0 else 0.0
        min_demand = float(np.min(demands)) if demands.size > 0 else 0.0
        max_demand = float(np.max(demands)) if demands.size > 0 else 0.0
        demand_skewness = compute_skewness(demands, avg_demand, std_demand) if demands.size > 0 else 0.0
        demand_kurtosis = compute_kurtosis(demands, avg_demand, std_demand) if demands.size > 0 else 0.0

        # Reconstruct distance matrix from the flattened list
        num_nodes = n + 1  # including depot
        if len(distance_flat) != num_nodes * num_nodes:
            raise ValueError('Distance array length does not match expected size (N+1)^2.')
        distance_matrix = np.array(distance_flat, dtype=float).reshape((num_nodes, num_nodes))
        avg_distance = float(np.mean(distance_matrix))
        std_distance = float(np.std(distance_matrix))
        # Exclude zero distances for non-diagonal min distance
        mask = ~np.eye(num_nodes, dtype=bool)
        non_diag_distances = distance_matrix[mask]
        min_distance = float(np.min(non_diag_distances)) if non_diag_distances.size > 0 else 0.0
        max_distance = float(np.max(distance_matrix))

        # Distance from depot (node 0) to customers
        depot_distances = distance_matrix[0, 1:]
        avg_distance_to_depot = float(np.mean(depot_distances)) if depot_distances.size > 0 else 0.0
        std_distance_to_depot = float(np.std(depot_distances)) if depot_distances.size > 0 else 0.0
        min_distance_to_depot = float(np.min(depot_distances)) if depot_distances.size > 0 else 0.0
        max_distance_to_depot = float(np.max(depot_distances)) if depot_distances.size > 0 else 0.0

        # Compute pairwise distances (upper triangular excluding diagonal)
        indices = np.triu_indices(num_nodes, k=1)
        pairwise_distances = distance_matrix[indices]
        avg_pairwise_distance = float(np.mean(pairwise_distances)) if pairwise_distances.size > 0 else 0.0
        std_pairwise_distance = float(np.std(pairwise_distances)) if pairwise_distances.size > 0 else 0.0
        min_pairwise_distance = float(np.min(pairwise_distances)) if pairwise_distances.size > 0 else 0.0
        max_pairwise_distance = float(np.max(pairwise_distances)) if pairwise_distances.size > 0 else 0.0

        # Build a complete graph using NetworkX with nodes 0..n and edge weights from distance matrix
        G = nx.Graph()
        for i in range(num_nodes):
            G.add_node(i)
        for i in range(num_nodes):
            for j in range(i+1, num_nodes):
                G.add_edge(i, j, weight=distance_matrix[i, j])

        graph_avg_degree = float(np.mean([d for _, d in G.degree()]))
        graph_density = nx.density(G)
        graph_clustering = nx.average_clustering(G)
        # For diameter and average shortest path, use unweighted graph (as the structure is complete, these are trivial)
        H = nx.complete_graph(num_nodes)
        graph_diameter = nx.diameter(H)
        graph_avg_shortest_path = nx.average_shortest_path_length(H)
        depot_centrality = nx.degree_centrality(G)[0]  # centrality of depot

        # Compute Minimum Spanning Tree (MST) and its statistics
        MST = nx.minimum_spanning_tree(G, weight='weight')
        mst_edges = [d['weight'] for (u, v, d) in MST.edges(data=True)]
        mst_total_length = float(np.sum(mst_edges)) if mst_edges else 0.0
        mst_std_edge_length = float(np.std(mst_edges)) if mst_edges else 0.0

        # Entropy measures for distances and demands (using 10 bins)
        distance_entropy = compute_entropy(pairwise_distances, bins=10)
        demand_entropy = compute_entropy(demands, bins=10) if demands.size > 0 else 0.0

        # Correlation between depot distances and demands for customers
        if depot_distances.size == demands.size and demands.size > 1:
            corr_matrix = np.corrcoef(depot_distances, demands)
            distance_demand_correlation = float(corr_matrix[0, 1])
        else:
            distance_demand_correlation = 0.0

        capacity_utilization_estimate = total_demand / capacity if capacity != 0 else 0.0
        pct_customers_high_demand = float(np.sum(demands > avg_demand) / len(demands)) if demands.size > 0 else 0.0
        std_demand_ratio = std_demand / avg_demand if avg_demand != 0 else 0.0
        avg_demand_to_capacity_ratio = avg_demand / capacity if capacity != 0 else 0.0
        max_demand_to_capacity_ratio = max_demand / capacity if capacity != 0 else 0.0
        min_demand_to_capacity_ratio = min_demand / capacity if capacity != 0 else 0.0

        # Dummy values for spatial and clustering properties not available in VRP (since no coordinates provided)
        customer_x_std = 0.0
        customer_y_std = 0.0
        spatial_entropy = 0.0
        spatial_gini_index = 0.0
        kmeans_inertia = 0.0
        silhouette_score = 0.0
        n_clusters_kmeans = 0
        avg_cluster_size = 0.0
        cluster_size_std = 0.0
        inter_cluster_distance = 0.0

        # Additional measure: distance per customer ratio (average depot distance divided by number of customers)
        distance_per_customer_ratio = avg_distance_to_depot / n if n != 0 else 0.0

        # Assemble the results dictionary with exactly 50 characteristic parameters
        results = {
            "README": "This VRP instance was analyzed by first extracting the basic parameters from the JSON input, including the number of customer nodes, vehicle capacity, demands, and the flattened distance matrix. The distance matrix was reshaped into a 2D array to compute comprehensive statistics such as average, standard deviation, minimum and maximum distances across all node pairs, and specifically from the depot to each customer. Statistical properties of the customer demands including total, mean, standard deviation, skewness, and kurtosis were calculated to understand demand distribution. A complete weighted graph was constructed using NetworkX, from which graph-based metrics such as average degree, density, clustering coefficient, diameter, average shortest path, and depot centrality were extracted. A minimum spanning tree was computed to estimate the underlying connectivity and edge length variability. Additionally, entropy metrics for both distances and demands were derived to capture distribution randomness, and a correlation between depot distances and demands was measured. Further, ratios including demand-to-capacity and various relative measures were calculated to assess load distribution, while placeholders were provided for spatial and clustering analyses not applicable due to lack of coordinates. This detailed multi-faceted analysis provides 50 distinct technical indicators that reflect both the statistical and structural complexities of the VRP instance, offering insights that may inform solver configuration and optimization strategies.",
            "characteristic_1": float(n),
            "characteristic_2": float(num_nodes),
            "characteristic_3": float(capacity),
            "characteristic_4": total_demand,
            "characteristic_5": avg_demand,
            "characteristic_6": std_demand,
            "characteristic_7": min_demand,
            "characteristic_8": max_demand,
            "characteristic_9": demand_skewness,
            "characteristic_10": demand_kurtosis,
            "characteristic_11": avg_distance,
            "characteristic_12": std_distance,
            "characteristic_13": min_distance,
            "characteristic_14": max_distance,
            "characteristic_15": avg_distance_to_depot,
            "characteristic_16": std_distance_to_depot,
            "characteristic_17": min_distance_to_depot,
            "characteristic_18": max_distance_to_depot,
            "characteristic_19": avg_pairwise_distance,
            "characteristic_20": std_pairwise_distance,
            "characteristic_21": min_pairwise_distance,
            "characteristic_22": max_pairwise_distance,
            "characteristic_23": graph_avg_degree,
            "characteristic_24": graph_density,
            "characteristic_25": graph_clustering,
            "characteristic_26": float(graph_diameter),
            "characteristic_27": graph_avg_shortest_path,
            "characteristic_28": depot_centrality,
            "characteristic_29": mst_total_length,
            "characteristic_30": mst_std_edge_length,
            "characteristic_31": distance_entropy,
            "characteristic_32": demand_entropy,
            "characteristic_33": distance_demand_correlation,
            "characteristic_34": capacity_utilization_estimate,
            "characteristic_35": pct_customers_high_demand,
            "characteristic_36": std_demand_ratio,
            "characteristic_37": avg_demand_to_capacity_ratio,
            "characteristic_38": max_demand_to_capacity_ratio,
            "characteristic_39": min_demand_to_capacity_ratio,
            "characteristic_40": customer_x_std,
            "characteristic_41": customer_y_std,
            "characteristic_42": spatial_entropy,
            "characteristic_43": spatial_gini_index,
            "characteristic_44": kmeans_inertia,
            "characteristic_45": silhouette_score,
            "characteristic_46": float(n_clusters_kmeans),
            "characteristic_47": avg_cluster_size,
            "characteristic_48": cluster_size_std,
            "characteristic_49": inter_cluster_distance,
            "characteristic_50": distance_per_customer_ratio
        }

    except Exception as e:
        results = {"error": str(e)}

    # Output the results using the provided helper function
    output_results(results)


if __name__ == "__main__":
    main()
