# Import helper functions for input/output
from lmtune_helpers import input_data, output_results

# Import standard libraries for data analysis
import networkx as nx
import numpy as np

def main():
    """Extract instance characteristics from the FLECC problem data."""
    # Get the instance data using the helper function
    instance_data = input_data()

    # Initialize placeholders
    try:
        # Extract basic parameters
        c = int(instance_data.get('nbCharacter', 0))
        L = int(instance_data.get('codeWordLength', 0))
        M = int(instance_data.get('numOfCodeWords', 0))
        maxDist_instance = float(instance_data.get('maxDist', 0))
        minDist_instance = float(instance_data.get('minDist', 0))
        dist_matrix = np.array(instance_data.get('dist', []), dtype=float)

        # Matrix statistics
        mat_size = c * c
        if mat_size > 0 and dist_matrix.size == mat_size:
            n_zero = int(np.sum(dist_matrix == 0))
            prop_zero = n_zero / mat_size
            mat_min = float(dist_matrix.min())
            mat_max = float(dist_matrix.max())
            n_max_mat = int(np.sum(dist_matrix == mat_max))
            prop_max_mat = n_max_mat / mat_size
            mat_mean = float(dist_matrix.mean())
            mat_std = float(dist_matrix.std(ddof=0))
            mat_median = float(np.median(dist_matrix))
            if mat_std > 0:
                mat_skew = float(np.mean((dist_matrix - mat_mean)**3) / (mat_std**3))
                mat_kurt = float(np.mean((dist_matrix - mat_mean)**4) / (mat_std**4) - 3)
            else:
                mat_skew = 0.0
                mat_kurt = 0.0
        else:
            # Default if matrix is missing or mis-sized
            n_zero = prop_zero = mat_min = mat_max = n_max_mat = prop_max_mat = 0.0
            mat_mean = mat_std = mat_median = mat_skew = mat_kurt = 0.0

        # Core counts
        n_variables = M * L
        n_pair_constraints = M * (M - 1) // 2
        n_binary_constraints = int(L * n_pair_constraints)

        # Constraint ratios and normalized distances
        if L > 0 and mat_max > 0:
            minDist_norm = minDist_instance / (L * mat_max)
            maxDist_norm = maxDist_instance / (L * mat_max)
            dist_diff_norm = (maxDist_instance - minDist_instance) / (L * mat_max)
        else:
            minDist_norm = maxDist_norm = dist_diff_norm = 0.0
        dist_diff = maxDist_instance - minDist_instance
        minDist_per_pos = minDist_instance / L if L > 0 else 0.0
        maxDist_per_pos = maxDist_instance / L if L > 0 else 0.0

        # Build constraint graph: nodes are (codeword, position)
        G = nx.Graph()
        for i in range(M):
            for k in range(L):
                G.add_node((i, k))
        for i in range(M):
            for j in range(i + 1, M):
                for k in range(L):
                    G.add_edge((i, k), (j, k))

        # Graph properties
        graph_n_nodes = G.number_of_nodes()
        graph_n_edges = G.number_of_edges()
        graph_density = nx.density(G) if graph_n_nodes > 1 else 0.0
        comps = list(nx.connected_components(G))
        graph_n_components = len(comps)
        graph_max_comp_size = int(max((len(comp) for comp in comps), default=0))
        graph_avg_clustering = nx.average_clustering(G) if graph_n_nodes > 0 else 0.0
        # Compute diameters and average shortest paths per component
        diameters = []
        avg_sps = []
        for comp in comps:
            subG = G.subgraph(comp)
            if subG.number_of_nodes() > 1:
                try:
                    diameters.append(nx.diameter(subG))
                    avg_sps.append(nx.average_shortest_path_length(subG))
                except Exception:
                    diameters.append(0)
                    avg_sps.append(0.0)
            else:
                diameters.append(0)
                avg_sps.append(0.0)
        graph_diameter = int(max(diameters, default=0))
        graph_avg_shortest_path = float(np.mean(avg_sps)) if avg_sps else 0.0
        # Degree and centrality stats
        degrees = np.array([d for _, d in G.degree()]) if graph_n_nodes > 0 else np.array([])
        graph_avg_degree = float(degrees.mean()) if degrees.size > 0 else 0.0
        deg_cent_vals = np.array(list(nx.degree_centrality(G).values())) if graph_n_nodes > 0 else np.array([])
        graph_degree_centrality_avg = float(deg_cent_vals.mean()) if deg_cent_vals.size > 0 else 0.0
        graph_degree_centrality_std = float(deg_cent_vals.std()) if deg_cent_vals.size > 0 else 0.0
        clust_vals = np.array(list(nx.clustering(G).values())) if graph_n_nodes > 0 else np.array([])
        graph_clustering_variance = float(clust_vals.var()) if clust_vals.size > 0 else 0.0

        # Domain metrics (uniform domains)
        total_domain_size = n_variables * c
        avg_domain_size = float(c)
        min_domain_size = float(c)
        max_domain_size = float(c)
        domain_size_variance = 0.0

        # Constraint participation per variable
        avg_constraints_per_var = float(M - 1)
        std_constraints_per_var = 0.0

        # Constraint density metrics
        binary_constraint_density = (n_binary_constraints / (n_variables * (n_variables - 1) / 2)) if n_variables > 1 else 0.0

        # Lexicographic symmetry-breaking constraints
        n_lex_constraints = max(0, M - 1)
        ratio_lex_to_pair_constraints = (n_lex_constraints / n_pair_constraints) if n_pair_constraints > 0 else 0.0
        ratio_lex_to_binary_constraints = (n_lex_constraints / n_binary_constraints) if n_binary_constraints > 0 else 0.0

        # Number of unordered character pairs
        distinct_char_pairs = float(c * (c - 1) / 2)

        # Prepare result dictionary
        readme_text = (
            "This analysis proceeds by extracting both matrix-based and graph-based features to "
            "comprehensively characterize the FLECC instance’s complexity. We first read the custom" 
            "character distance matrix and compute descriptive statistics (minimum, maximum, mean, "
            "standard deviation, median, skewness, kurtosis) to capture the distributional structure of" 
            "symbol distances. We count zero-cost self-distances and maximal distances to quantify "
            "alphabet homogeneity. Next, we quantify core problem parameters: alphabet size, codeword "
            "length, number of codewords, total variables, and counts of high-level pairwise and binary" 
            "constraints. We model the implicit constraint graph where each variable (a symbol position in" 
            "a codeword) is a node and edges link variables constrained by Hamming distance (aligned "
            "positions across distinct codewords). On this graph, we compute node and edge counts, density," 
            "clustering coefficients, connected component sizes, diameters, average shortest path lengths," 
            "degree distributions, and centrality measures to capture connectivity patterns and constraint" 
            "tightness. We also derive domain metrics (uniform domain sizes) and constraint participation" 
            "per variable, lexicographic symmetry-breaking constraint counts, and normalized tightness "
            "ratios by comparing min/max distance constraints to the maximum possible per-position symbol "
            "distance. These 50 characteristics collectively reflect size, structural complexity, data "
            "distribution, and scaling properties crucial for guiding solver parameter tuning."
        )
        results = {
            "README": readme_text,
            "characteristic_1": float(c),
            "characteristic_2": float(L),
            "characteristic_3": float(M),
            "characteristic_4": float(n_variables),
            "characteristic_5": float(n_pair_constraints),
            "characteristic_6": float(n_binary_constraints),
            "characteristic_7": float(mat_size),
            "characteristic_8": float(n_zero),
            "characteristic_9": float(prop_zero),
            "characteristic_10": float(n_max_mat),
            "characteristic_11": float(prop_max_mat),
            "characteristic_12": float(mat_min),
            "characteristic_13": float(mat_max),
            "characteristic_14": float(mat_mean),
            "characteristic_15": float(mat_std),
            "characteristic_16": float(mat_median),
            "characteristic_17": float(mat_skew),
            "characteristic_18": float(mat_kurt),
            "characteristic_19": float(minDist_instance),
            "characteristic_20": float(maxDist_instance),
            "characteristic_21": float(minDist_norm),
            "characteristic_22": float(maxDist_norm),
            "characteristic_23": float(dist_diff_norm),
            "characteristic_24": float(dist_diff),
            "characteristic_25": float(minDist_per_pos),
            "characteristic_26": float(maxDist_per_pos),
            "characteristic_27": float(graph_n_nodes),
            "characteristic_28": float(graph_n_edges),
            "characteristic_29": float(graph_density),
            "characteristic_30": float(graph_n_components),
            "characteristic_31": float(graph_max_comp_size),
            "characteristic_32": float(graph_avg_clustering),
            "characteristic_33": float(graph_diameter),
            "characteristic_34": float(graph_avg_shortest_path),
            "characteristic_35": float(graph_avg_degree),
            "characteristic_36": float(graph_degree_centrality_avg),
            "characteristic_37": float(graph_degree_centrality_std),
            "characteristic_38": float(graph_clustering_variance),
            "characteristic_39": float(total_domain_size),
            "characteristic_40": float(avg_domain_size),
            "characteristic_41": float(min_domain_size),
            "characteristic_42": float(max_domain_size),
            "characteristic_43": float(domain_size_variance),
            "characteristic_44": float(avg_constraints_per_var),
            "characteristic_45": float(std_constraints_per_var),
            "characteristic_46": float(binary_constraint_density),
            "characteristic_47": float(n_lex_constraints),
            "characteristic_48": float(ratio_lex_to_pair_constraints),
            "characteristic_49": float(ratio_lex_to_binary_constraints),
            "characteristic_50": float(distinct_char_pairs)
        }
    except Exception as e:
        results = {"README": "", "error": str(e)}

    # Return the results using the helper function
    output_results(results)

if __name__ == "__main__":
    main()
