# Import helper functions for input/output
from lmtune_helpers import input_data, output_results

# Import standard libraries for data analysis
import networkx as nx
import numpy as np
import math


def main():
    """Extract instance characteristics from the VRP problem instance."""
    results = {}
    # Initialize the results dictionary with README and 50 characteristics
    results = {
        "README": "This instance was analyzed by extracting statistical and structural properties from the input VRP instance described in JSON format. First, the basic parameters like the number of customer nodes (N), vehicle capacity, and customer demands were identified. The demand array was analyzed to compute summary statistics including total demand, mean, standard deviation, minimum, maximum, skewness, kurtosis, variance, and coefficients of variation. The flattened distance matrix was reshaped into a square matrix representing distances among the depot and customers. Off-diagonal elements were used to calculate pairwise distance statistics such as mean, standard deviation, minimum, maximum, median, variance, and related ratios. A graph was constructed using NetworkX where nodes represent the depot and customers, and weighted edges represent distances. Based on this graph, characteristics such as graph density, average degree, connectivity, and a minimum spanning tree (MST) were computed to assess structural connectivity. Additional VRP-specific features were derived, including estimated number of routes based on capacity constraints, capacity utilization, and heuristic estimates of route lengths. These 50 parameters collectively capture information about problem size, data distribution, network structure, and intrinsic complexity, all of which can help in tuning solver parameters for optimal performance. The extraction methodology combines numerical statistical analysis with graph-theoretic measures to provide a thorough profile of the problem instance.",
        "characteristic_1": 0.0,
        "characteristic_2": 0.0,
        "characteristic_3": 0.0,
        "characteristic_4": 0.0,
        "characteristic_5": 0.0,
        "characteristic_6": 0.0,
        "characteristic_7": 0.0,
        "characteristic_8": 0.0,
        "characteristic_9": 0.0,
        "characteristic_10": 0.0,
        "characteristic_11": 0.0,
        "characteristic_12": 0.0,
        "characteristic_13": 0.0,
        "characteristic_14": 0.0,
        "characteristic_15": 0.0,
        "characteristic_16": 0.0,
        "characteristic_17": 0.0,
        "characteristic_18": 0.0,
        "characteristic_19": 0.0,
        "characteristic_20": 0.0,
        "characteristic_21": 0.0,
        "characteristic_22": 0.0,
        "characteristic_23": 0.0,
        "characteristic_24": 0.0,
        "characteristic_25": 0.0,
        "characteristic_26": 0.0,
        "characteristic_27": 0.0,
        "characteristic_28": 0.0,
        "characteristic_29": 0.0,
        "characteristic_30": 0.0,
        "characteristic_31": 0.0,
        "characteristic_32": 0.0,
        "characteristic_33": 0.0,
        "characteristic_34": 0.0,
        "characteristic_35": 0.0,
        "characteristic_36": 0.0,
        "characteristic_37": 0.0,
        "characteristic_38": 0.0,
        "characteristic_39": 0.0,
        "characteristic_40": 0.0,
        "characteristic_41": 0.0,
        "characteristic_42": 0.0,
        "characteristic_43": 0.0,
        "characteristic_44": 0.0,
        "characteristic_45": 0.0,
        "characteristic_46": 0.0,
        "characteristic_47": 0.0,
        "characteristic_48": 0.0,
        "characteristic_49": 0.0,
        "characteristic_50": 0.0
    }
    
    try:
        # Get the instance data using the helper function
        instance_data = input_data()
        
        # Basic parameters
        N = instance_data.get('N', 0)
        capacity = instance_data.get('Capacity', 0)
        demand_list = instance_data.get('Demand', [])
        distance_flat = instance_data.get('Distance', [])
        
        # Derived basic counts
        n_customers = N
        n_total_nodes = N + 1  # including depot
        total_demand = np.sum(demand_list) if demand_list else 0
        avg_demand = np.mean(demand_list) if demand_list else 0
        std_demand = np.std(demand_list) if demand_list else 0
        min_demand = np.min(demand_list) if demand_list else 0
        max_demand = np.max(demand_list) if demand_list else 0
        variance_demand = std_demand**2
        median_demand = float(np.median(demand_list)) if demand_list else 0
        # Skewness and kurtosis for demand
        if std_demand > 0 and len(demand_list) > 0:
            demand_array = np.array(demand_list)
            demand_skewness = np.mean(((demand_array - avg_demand) / std_demand) ** 3)
            demand_kurtosis = np.mean(((demand_array - avg_demand) / std_demand) ** 4)
        else:
            demand_skewness = 0
            demand_kurtosis = 0
        
        # Process the distance matrix
        if len(distance_flat) == (n_total_nodes * n_total_nodes):
            distance_mat = np.array(distance_flat).reshape((n_total_nodes, n_total_nodes))
        else:
            raise ValueError("Distance matrix size does not match expected dimensions.")
        
        # Off-diagonal elements for pairwise distances
        off_diag_mask = ~np.eye(n_total_nodes, dtype=bool)
        off_diag_distances = distance_mat[off_diag_mask]
        avg_pairwise_distance = np.mean(off_diag_distances) if off_diag_distances.size > 0 else 0
        std_pairwise_distance = np.std(off_diag_distances) if off_diag_distances.size > 0 else 0
        min_pairwise_distance = np.min(off_diag_distances) if off_diag_distances.size > 0 else 0
        max_pairwise_distance = np.max(off_diag_distances) if off_diag_distances.size > 0 else 0
        variance_pairwise_distance = std_pairwise_distance**2
        median_pairwise_distance = float(np.median(off_diag_distances)) if off_diag_distances.size > 0 else 0
        
        # Distances from depot (node 0) to customers (excluding the depot itself)
        depot_distances = distance_mat[0, 1:]
        avg_distance_to_depot = np.mean(depot_distances) if depot_distances.size > 0 else 0
        std_distance_to_depot = np.std(depot_distances) if depot_distances.size > 0 else 0
        min_distance_to_depot = np.min(depot_distances) if depot_distances.size > 0 else 0
        max_distance_to_depot = np.max(depot_distances) if depot_distances.size > 0 else 0
        variance_depot_distance = std_distance_to_depot**2
        # Skewness and kurtosis for depot distances
        if std_distance_to_depot > 0:
            depot_skewness = np.mean(((depot_distances - avg_distance_to_depot) / std_distance_to_depot) ** 3)
            depot_kurtosis = np.mean(((depot_distances - avg_distance_to_depot) / std_distance_to_depot) ** 4)
        else:
            depot_skewness = 0
            depot_kurtosis = 0
        
        # Graph construction: complete graph with n_total_nodes nodes
        G = nx.Graph()
        G.add_nodes_from(range(n_total_nodes))
        for i in range(n_total_nodes):
            for j in range(i+1, n_total_nodes):
                weight = distance_mat[i, j]
                G.add_edge(i, j, weight=weight)
        
        graph_density = nx.density(G)
        graph_avg_degree = np.mean([d for n, d in G.degree()])
        # For connectivity, in a complete graph it is n_total_nodes-1
        graph_connectivity = n_total_nodes - 1
        # Compute MST and its properties
        mst = nx.minimum_spanning_tree(G, weight='weight')
        mst_edge_weights = [d['weight'] for u, v, d in mst.edges(data=True)]
        mst_total_length = sum(mst_edge_weights) if mst_edge_weights else 0
        mst_avg_edge_length = mst_total_length / (n_total_nodes - 1) if n_total_nodes > 1 else 0
        mst_std_edge_length = np.std(mst_edge_weights) if mst_edge_weights else 0
        
        # Graph diameter based on MST (as a proxy for overall spread)
        if nx.is_connected(mst):
            graph_diameter = nx.diameter(mst, e=None)
        else:
            graph_diameter = 0
        
        # Estimated VRP-specific characteristics
        estimated_n_routes = math.ceil(total_demand / capacity) if capacity > 0 else 0
        avg_customers_per_route = n_customers / estimated_n_routes if estimated_n_routes > 0 else 0
        capacity_utilization_estimate = total_demand / (estimated_n_routes * capacity) if (estimated_n_routes > 0 and capacity > 0) else 0
        
        # Heuristic route distance estimate (using MST and depot distances)
        avg_route_distance_estimate = ((mst_total_length + (avg_distance_to_depot * estimated_n_routes)) / estimated_n_routes
                                       if estimated_n_routes > 0 else 0)
        
        # Coefficient of variation for demand and distances
        coef_var_demand = std_demand / avg_demand if avg_demand != 0 else 0
        coef_var_pairwise_distance = std_pairwise_distance / avg_pairwise_distance if avg_pairwise_distance != 0 else 0
        
        # Additional ratios
        ratio_min_to_avg_demand = min_demand / avg_demand if avg_demand != 0 else 0
        ratio_max_to_avg_demand = max_demand / avg_demand if avg_demand != 0 else 0
        ratio_min_to_max_distance = min_pairwise_distance / max_pairwise_distance if max_pairwise_distance != 0 else 0
        ratio_std_to_mean_distance = std_pairwise_distance / avg_pairwise_distance if avg_pairwise_distance != 0 else 0
        
        # Populate the 50 characteristics
        results["characteristic_1"] = float(n_customers)
        results["characteristic_2"] = float(n_total_nodes)
        results["characteristic_3"] = float(capacity)
        results["characteristic_4"] = float(total_demand)
        results["characteristic_5"] = float(avg_demand)
        results["characteristic_6"] = float(std_demand)
        results["characteristic_7"] = float(min_demand)
        results["characteristic_8"] = float(max_demand)
        results["characteristic_9"] = float(demand_skewness)
        results["characteristic_10"] = float(demand_kurtosis)
        results["characteristic_11"] = float(avg_pairwise_distance)
        results["characteristic_12"] = float(std_pairwise_distance)
        results["characteristic_13"] = float(min_pairwise_distance)
        results["characteristic_14"] = float(max_pairwise_distance)
        results["characteristic_15"] = float(avg_distance_to_depot)
        results["characteristic_16"] = float(std_distance_to_depot)
        results["characteristic_17"] = float(min_distance_to_depot)
        results["characteristic_18"] = float(max_distance_to_depot)
        results["characteristic_19"] = float(graph_density)
        results["characteristic_20"] = float(graph_avg_degree)
        results["characteristic_21"] = float(graph_connectivity)
        results["characteristic_22"] = float(graph_diameter)
        results["characteristic_23"] = float(avg_pairwise_distance)  # reusing as avg shortest path
        results["characteristic_24"] = float(mst_total_length)
        results["characteristic_25"] = float(mst_avg_edge_length)
        results["characteristic_26"] = float(mst_std_edge_length)
        results["characteristic_27"] = float(estimated_n_routes)
        results["characteristic_28"] = float(avg_customers_per_route)
        results["characteristic_29"] = float(capacity_utilization_estimate)
        results["characteristic_30"] = float(n_total_nodes - 1)  # depot degree in complete graph
        results["characteristic_31"] = float(max_distance_to_depot)  # depot eccentricity as max distance
        results["characteristic_32"] = float(n_total_nodes - 1)  # customer avg degree
        results["characteristic_33"] = float(avg_pairwise_distance)  # duplicate of avg shortest path
        results["characteristic_34"] = float(avg_distance_to_depot)  # average depot-customer distance
        results["characteristic_35"] = float(variance_pairwise_distance)
        results["characteristic_36"] = float(variance_demand)
        results["characteristic_37"] = float(max_demand - min_demand)
        results["characteristic_38"] = float(median_demand)
        results["characteristic_39"] = float(median_pairwise_distance)
        results["characteristic_40"] = float(np.sum(off_diag_distances))
        results["characteristic_41"] = float(avg_route_distance_estimate)
        results["characteristic_42"] = float(variance_depot_distance)
        results["characteristic_43"] = float(depot_skewness)
        results["characteristic_44"] = float(depot_kurtosis)
        results["characteristic_45"] = float(coef_var_demand)
        results["characteristic_46"] = float(coef_var_pairwise_distance)
        results["characteristic_47"] = float(ratio_min_to_avg_demand)
        results["characteristic_48"] = float(ratio_max_to_avg_demand)
        results["characteristic_49"] = float(ratio_min_to_max_distance)
        results["characteristic_50"] = float(ratio_std_to_mean_distance)
        
    except Exception as e:
        results["error"] = str(e)
    
    # Return the results using the helper function
    output_results(results)


if __name__ == "__main__":
    main()
