# Import helper functions for input/output
from lmtune_helpers import input_data, output_results

# Import standard libraries for data analysis
import networkx as nx
import numpy as np

def main():
    """Extract instance characteristics from the problem data."""
    # Get the instance data using the helper function
    instance_data = input_data()

    # Parse basic instance parameters
    nb_char = instance_data.get('nbCharacter', 0)
    code_len = instance_data.get('codeWordLength', 0)
    num_codes = instance_data.get('numOfCodeWords', 0)
    maxDist = instance_data.get('maxDist', 0)
    minDist = instance_data.get('minDist', 0)
    dist_matrix = instance_data.get('dist', [])

    # Convert distance matrix to numpy array for analysis
    dist_arr = np.array(dist_matrix, dtype=float)
    # Flatten all distance values
    flat_dist = dist_arr.flatten()
    # Filter values (e.g., include zeros as well)
    data = flat_dist
    n = data.size

    # Compute basic statistics
    mean_dist = float(np.mean(data)) if n > 0 else 0.0
    std_dist = float(np.std(data)) if n > 0 else 0.0
    min_dist_val = float(np.min(data)) if n > 0 else 0.0
    max_dist_val = float(np.max(data)) if n > 0 else 0.0
    median_dist = float(np.median(data)) if n > 0 else 0.0
    # Skewness and kurtosis
    if std_dist > 0:
        skewness = float(np.mean((data - mean_dist)**3) / (std_dist**3))
        kurtosis = float(np.mean((data - mean_dist)**4) / (std_dist**4) - 3)
    else:
        skewness = 0.0
        kurtosis = 0.0

    # Distance value frequencies for entropy
    unique_vals, counts = np.unique(data, return_counts=True)
    probs = counts / float(n) if n > 0 else np.array([])
    entropy = float(-np.sum(probs * np.log2(probs))) if n > 0 else 0.0

    # Gini coefficient
    if mean_dist > 0:
        # Mean absolute difference
        mad = np.mean(np.abs(data[:, None] - data[None, :]))
        gini_coeff = float(mad / (2 * mean_dist))
    else:
        gini_coeff = 0.0

    # Distance matrix structure
    # Symmetry checks
    sym_diff = np.abs(dist_arr - dist_arr.T)
    max_sym_diff = float(np.max(sym_diff)) if nb_char > 0 else 0.0
    sym_violations = int(np.sum(dist_arr != dist_arr.T) / 2) if nb_char > 0 else 0
    # Diagonal zeros
    diag_vals = np.diag(dist_arr) if nb_char > 0 else np.array([])
    diag_zero_count = int(np.sum(diag_vals == 0))
    diag_zero_ratio = float(diag_zero_count) / nb_char if nb_char > 0 else 0.0

    # Unique rows analysis
    row_tuples = [tuple(row) for row in dist_matrix]
    unique_rows = len(set(row_tuples))
    row_uniques = [len(set(row)) for row in dist_matrix]
    row_unique_mean = float(np.mean(row_uniques)) if nb_char > 0 else 0.0
    row_unique_std = float(np.std(row_uniques)) if nb_char > 0 else 0.0

    # Build graph of characters where edges exist if distance > 0
    G = nx.Graph()
    G.add_nodes_from(range(nb_char))
    for i in range(nb_char):
        for j in range(i + 1, nb_char):
            w = dist_arr[i, j]
            if w > 0:
                G.add_edge(i, j, weight=w)

    # Graph metrics
    graph_nodes = nb_char
    graph_edges = G.number_of_edges()
    graph_density = nx.density(G) if graph_nodes > 1 else 0.0
    degrees = np.array([d for _, d in G.degree()]) if graph_nodes > 0 else np.array([])
    graph_avg_degree = float(np.mean(degrees)) if degrees.size > 0 else 0.0
    graph_degree_std = float(np.std(degrees)) if degrees.size > 0 else 0.0
    graph_clustering = float(nx.average_clustering(G)) if graph_nodes > 0 else 0.0
    # Connected components
    n_components = nx.number_connected_components(G)
    comp_sizes = [len(c) for c in nx.connected_components(G)] if graph_nodes > 0 else []
    largest_comp = max(comp_sizes) if comp_sizes else 0
    # Diameter and average shortest path on largest component
    if largest_comp > 1:
        comp_sub = G.subgraph(max(nx.connected_components(G), key=len))
        try:
            graph_diameter = float(nx.diameter(comp_sub))
            graph_avg_sp = float(nx.average_shortest_path_length(comp_sub))
        except Exception:
            graph_diameter = 0.0
            graph_avg_sp = 0.0
    else:
        graph_diameter = 0.0
        graph_avg_sp = 0.0
    # Edge weight stats
    weights = np.array([d['weight'] for _, _, d in G.edges(data=True)])
    w_mean = float(np.mean(weights)) if weights.size > 0 else 0.0
    w_std = float(np.std(weights)) if weights.size > 0 else 0.0
    w_min = float(np.min(weights)) if weights.size > 0 else 0.0
    w_max = float(np.max(weights)) if weights.size > 0 else 0.0

    # Problem-specific derived metrics
    total_variables = num_codes * code_len
    num_constraints = num_codes * (num_codes - 1) / 2
    constraint_arity = code_len
    matrix_size = nb_char * nb_char
    zero_vals = int(np.sum(data == 0))
    zero_ratio = float(zero_vals) / n if n > 0 else 0.0
    unique_val_count = unique_vals.size
    # Potential max codeword distance
    max_pairwise = code_len * max_dist_val
    maxDist_ratio = float(maxDist) / max_pairwise if max_pairwise > 0 else 0.0
    minDist_ratio = float(minDist) / max_pairwise if max_pairwise > 0 else 0.0
    dist_range_ratio = (max_dist_val - min_dist_val) / max_dist_val if max_dist_val > 0 else 0.0

    # Compile results with exactly 50 characteristics
    results = {
        "README": (
            "This instance was analyzed by converting the provided distance matrix into a numeric array, "
            "then computing statistical descriptors of the distances such as mean, standard deviation, min, max, "
            "median, skewness, kurtosis, entropy, and Gini coefficient. We also performed structural analysis of the distance matrix by checking symmetry properties, diagonal zeros, and unique row patterns. To capture relationships among characters, we constructed a weighted graph where nodes represent characters and edges represent positive distances. Graph metrics including density, average degree, clustering coefficient, connectivity, diameter, and shortest paths were computed. Additional problem-specific features such as variable count, constraint count, constraint arity, and ratios of required distances to theoretical maximum distances were derived. These 50 characteristics summarize both statistical distribution, matrix structure, graph topology, and problem scale, offering a comprehensive descriptor set for solver configuration."
        ),
        "characteristic_1": float(nb_char),                 # nbCharacter
        "characteristic_2": float(code_len),                # codeWordLength
        "characteristic_3": float(num_codes),               # numOfCodeWords
        "characteristic_4": float(maxDist),                 # maxDist
        "characteristic_5": float(minDist),                 # minDist
        "characteristic_6": float(total_variables),         # total decision variables
        "characteristic_7": float(num_constraints),         # number of constraints
        "characteristic_8": float(constraint_arity),        # arity of each constraint
        "characteristic_9": float(matrix_size),             # total entries in distance matrix
        "characteristic_10": float(zero_vals),              # count of zero distances
        "characteristic_11": float(zero_ratio),             # ratio of zero distances
        "characteristic_12": float(unique_val_count),       # number of unique distance values
        "characteristic_13": mean_dist,                     # mean distance
        "characteristic_14": std_dist,                      # std of distances
        "characteristic_15": min_dist_val,                  # min distance value
        "characteristic_16": max_dist_val,                  # max distance value
        "characteristic_17": median_dist,                   # median distance value
        "characteristic_18": skewness,                      # skewness of distance distribution
        "characteristic_19": kurtosis,                      # kurtosis of distance distribution
        "characteristic_20": entropy,                       # entropy of distance distribution
        "characteristic_21": gini_coeff,                    # Gini coefficient of distances
        "characteristic_22": max_sym_diff,                  # max abs symmetry difference
        "characteristic_23": sym_violations,                # symmetry violations count
        "characteristic_24": float(diag_zero_count),        # diagonal zeros count
        "characteristic_25": diag_zero_ratio,               # diagonal zero ratio
        "characteristic_26": float(unique_rows),            # unique row patterns
        "characteristic_27": row_unique_mean,               # avg unique per row
        "characteristic_28": row_unique_std,                # std unique per row
        "characteristic_29": float(graph_nodes),            # graph nodes
        "characteristic_30": float(graph_edges),            # graph edges
        "characteristic_31": graph_density,                 # graph density
        "characteristic_32": graph_avg_degree,              # avg degree
        "characteristic_33": graph_degree_std,              # degree std
        "characteristic_34": graph_clustering,              # clustering coeff
        "characteristic_35": float(n_components),           # connected components
        "characteristic_36": float(largest_comp),           # largest component size
        "characteristic_37": graph_diameter,                # graph diameter
        "characteristic_38": graph_avg_sp,                  # avg shortest path
        "characteristic_39": w_mean,                        # mean edge weight
        "characteristic_40": w_std,                         # std edge weight
        "characteristic_41": w_min,                         # min edge weight
        "characteristic_42": w_max,                         # max edge weight
        "characteristic_43": float(dist_range_ratio),       # normalized distance range
        "characteristic_44": float(maxDist_ratio),          # ratio of maxDist to potential max
        "characteristic_45": float(minDist_ratio),          # ratio of minDist to potential max
        "characteristic_46": float(row_uniques[0]) if row_uniques else 0.0, # first row variety
        "characteristic_47": float(row_uniques[-1]) if row_uniques else 0.0,# last row variety
        "characteristic_48": float(mean_dist / max_dist_val) if max_dist_val>0 else 0.0, # mean/max
        "characteristic_49": float(std_dist / mean_dist) if mean_dist>0 else 0.0, # std/mean
        "characteristic_50": float(gini_coeff * entropy)    # product of gini and entropy
    }

    # Return the results using the helper function
    output_results(results)

if __name__ == '__main__':
    main()
