# Import helper functions for input/output
from lmtune_helpers import input_data, output_results

# Import standard libraries for data analysis
import networkx as nx
import numpy as np


def main():
    """Extract instance characteristics from the problem data."""
    # Get the instance data using the helper function
    instance_data = input_data()

    # Extract basic parameters
    nb = instance_data.get('nbCharacter', 0)
    length = instance_data.get('codeWordLength', 0)
    num_codes = instance_data.get('numOfCodeWords', 0)
    min_dist = instance_data.get('minDist', 0)
    max_dist = instance_data.get('maxDist', 0)
    dist_list = instance_data.get('dist', [])

    # Convert distance matrix to NumPy array
    dist_mat = np.array(dist_list, dtype=float)
    # Flattened values for statistics
    all_vals = dist_mat.flatten()
    # Diagonal and off-diagonal values
    diag_vals = np.diag(dist_mat) if dist_mat.ndim == 2 else np.array([])
    offdiag_mask = ~np.eye(nb, dtype=bool)
    offdiag_vals = dist_mat[offdiag_mask]

    # Compute basic statistics on distance matrix
    mean_all = np.mean(all_vals) if all_vals.size > 0 else 0.0
    std_all = np.std(all_vals) if all_vals.size > 0 else 0.0
    min_all = np.min(all_vals) if all_vals.size > 0 else 0.0
    max_all = np.max(all_vals) if all_vals.size > 0 else 0.0
    median_all = np.median(all_vals) if all_vals.size > 0 else 0.0
    unique_vals = np.unique(all_vals).size
    prop_zero_off = np.sum(offdiag_vals == 0) / offdiag_vals.size if offdiag_vals.size > 0 else 0.0
    prop_diag_nonzero = np.sum(diag_vals != 0) / diag_vals.size if diag_vals.size > 0 else 0.0
    range_all = max_all - min_all
    # Skewness and kurtosis
    skew_all = np.mean(((all_vals - mean_all) / std_all) ** 3) if std_all > 0 else 0.0
    kurt_all = np.mean(((all_vals - mean_all) / std_all) ** 4) - 3 if std_all > 0 else 0.0

    # Off-diagonal stats
    mean_off = np.mean(offdiag_vals) if offdiag_vals.size > 0 else 0.0
    std_off = np.std(offdiag_vals) if offdiag_vals.size > 0 else 0.0
    # Ratio metrics
    coef_var_all = std_all / mean_all if mean_all != 0 else 0.0
    coef_var_off = std_off / mean_off if mean_off != 0 else 0.0
    diag_sum = np.sum(diag_vals)
    diag_mean = np.mean(diag_vals) if diag_vals.size > 0 else 0.0
    diag_std = np.std(diag_vals) if diag_vals.size > 0 else 0.0
    ratio_off_diag_mean = mean_off / diag_mean if diag_mean != 0 else 0.0

    # Constraint model metrics
    num_variables = nb * length
    num_constraints = num_codes * (num_codes - 1) // 2
    code_space_size = nb ** length if nb > 0 else 0
    relative_code_space = num_codes / code_space_size if code_space_size != 0 else 0.0

    # Build weighted graph of characters
    G = nx.Graph()
    G.add_nodes_from(range(nb))
    # Add edges with distance as weight
    for i in range(nb):
        for j in range(i + 1, nb):
            G.add_edge(i, j, weight=float(dist_mat[i, j]))

    # Graph metrics
    graph_nodes = G.number_of_nodes()
    graph_edges = G.number_of_edges()
    graph_density = nx.density(G)
    # Clustering and connectivity
    graph_clustering = nx.average_clustering(G, weight=None)
    graph_trans = nx.transitivity(G)
    is_connected = 1 if nx.is_connected(G) else 0

    # Shortest paths (weighted)
    # Compute all-pairs shortest path lengths
    path_lens = dict(nx.all_pairs_dijkstra_path_length(G, weight='weight'))
    sp_vals = []
    for u, targets in path_lens.items():
        for v, d in targets.items():
            if u < v:
                sp_vals.append(d)
    avg_sp = float(np.mean(sp_vals)) if sp_vals else 0.0
    diam = float(np.max(sp_vals)) if sp_vals else 0.0

    # Centrality measures
    deg_cent = np.array(list(nx.degree_centrality(G).values()))
    close_cent = np.array(list(nx.closeness_centrality(G, distance=None).values()))
    between_cent = np.array(list(nx.betweenness_centrality(G, weight='weight').values()))
    try:
        eig_cent = np.array(list(nx.eigenvector_centrality_numpy(G, weight='weight').values()))
    except Exception:
        eig_cent = np.zeros(nb)

    # Edge weight distribution relative to mean
    edges_weights = np.array([d for _, _, d in G.edges(data='weight')])
    prop_lt_mean = np.sum(edges_weights < mean_all) / edges_weights.size if edges_weights.size > 0 else 0.0
    prop_gt_mean = np.sum(edges_weights > mean_all) / edges_weights.size if edges_weights.size > 0 else 0.0

    # Assemble results dictionary
    results = {
        "README": (
            "This instance was analyzed by loading the parameters via the helper function and converting the distance matrix into a weighted graph of characters. "
            "We computed basic model metrics such as the number of variables and constraints, and the size of the search space. "
            "Statistical properties of the distance matrix (mean, std, skewness, kurtosis, uniqueness, and off‐diagonal versus diagonal comparisons) reveal cost variability. "
            "Graph‐based features (density, clustering, transitivity, connectivity, shortest path statistics, and centrality measures) capture structural complexity and symmetry in the character interactions. "
            "These 50 standardized characteristics reflect both the constraint model dimensions and the underlying weighted connectivity patterns, providing a comprehensive feature set for solver parameter tuning."
        ),
        "characteristic_1": float(nb),                        # number of characters
        "characteristic_2": float(length),                    # code word length
        "characteristic_3": float(num_codes),                 # number of codewords
        "characteristic_4": float(min_dist),                  # minimum distance constraint
        "characteristic_5": float(max_dist),                  # maximum distance constraint
        "characteristic_6": float(num_variables),             # total decision variables
        "characteristic_7": float(num_constraints),           # total pairwise constraints
        "characteristic_8": float(code_space_size),           # size of full code space
        "characteristic_9": float(relative_code_space),       # ratio of requested codes to space
        "characteristic_10": float(mean_all),                 # mean of all distances
        "characteristic_11": float(std_all),                  # std of all distances
        "characteristic_12": float(min_all),                  # min distance in matrix
        "characteristic_13": float(max_all),                  # max distance in matrix
        "characteristic_14": float(median_all),               # median of all distances
        "characteristic_15": float(unique_vals),              # unique distance values count
        "characteristic_16": float(prop_zero_off),            # proportion off-diagonal zeros
        "characteristic_17": float(prop_diag_nonzero),        # proportion diagonal nonzeros
        "characteristic_18": float(range_all),                # range of distances
        "characteristic_19": float(skew_all),                 # skewness of distance distribution
        "characteristic_20": float(kurt_all),                 # kurtosis of distance distribution
        "characteristic_21": float(graph_nodes),              # graph node count
        "characteristic_22": float(graph_edges),              # graph edge count
        "characteristic_23": float(graph_density),            # graph density
        "characteristic_24": float(graph_clustering),         # average clustering coefficient
        "characteristic_25": float(graph_trans),              # global transitivity
        "characteristic_26": float(is_connected),             # connectivity flag (1/0)
        "characteristic_27": float(avg_sp),                   # average shortest path length
        "characteristic_28": float(diam),                     # graph diameter
        "characteristic_29": float(np.mean(deg_cent)),        # degree centrality mean
        "characteristic_30": float(np.std(deg_cent)),         # degree centrality std
        "characteristic_31": float(np.mean(close_cent)),      # closeness centrality mean
        "characteristic_32": float(np.std(close_cent)),       # closeness centrality std
        "characteristic_33": float(np.mean(between_cent)),    # betweenness centrality mean
        "characteristic_34": float(np.std(between_cent)),     # betweenness centrality std
        "characteristic_35": float(np.mean(eig_cent)),        # eigenvector centrality mean
        "characteristic_36": float(np.std(eig_cent)),         # eigenvector centrality std
        "characteristic_37": float(prop_lt_mean),             # edge weight < mean proportion
        "characteristic_38": float(prop_gt_mean),             # edge weight > mean proportion
        "characteristic_39": float(mean_off),                 # mean off-diagonal distance
        "characteristic_40": float(std_off),                  # std off-diagonal distance
        "characteristic_41": float(diag_sum),                 # sum of diagonal distances
        "characteristic_42": float(diag_mean),                # mean of diagonal distances
        "characteristic_43": float(diag_std),                 # std of diagonal distances
        "characteristic_44": float(ratio_off_diag_mean),      # off/diag mean ratio
        "characteristic_45": float(coef_var_all),             # coeff var of all distances
        "characteristic_46": float(coef_var_off),             # coeff var of off-diagonal
        "characteristic_47": float(diag_std / diag_mean) if diag_mean != 0 else 0.0,  # coeff var diag
        "characteristic_48": float(graph_density * graph_clustering), # combined density/clustering
        "characteristic_49": float(num_constraints / code_space_size) if code_space_size != 0 else 0.0,  # constraint/space density
        "characteristic_50": float(nx.degree_assortativity_coefficient(G))  # degree assortativity
    }

    # Return the results using the helper function
    output_results(results)


if __name__ == "__main__":
    main()
