# Import helper functions for input/output
from lmtune_helpers import input_data, output_results

# Import standard libraries for data analysis
import networkx as nx
import numpy as np


def compute_skewness(data):
    """Compute skewness for a 1D numpy array."""
    n = len(data)
    if n < 3:
        return 0.0
    mean = np.mean(data)
    std = np.std(data)
    if std == 0:
        return 0.0
    skew = np.sum((data - mean) ** 3) / n / (std ** 3)
    return skew


def main():
    """Extract instance characteristics from the FLECC problem instance."""
    # Initialize results dictionary with default values for 50 characteristics
    results = {"README": "", 
               "characteristic_1": 0.0, "characteristic_2": 0.0, "characteristic_3": 0.0, 
               "characteristic_4": 0.0, "characteristic_5": 0.0, "characteristic_6": 0.0, 
               "characteristic_7": 0.0, "characteristic_8": 0.0, "characteristic_9": 0.0, 
               "characteristic_10": 0.0, "characteristic_11": 0.0, "characteristic_12": 0.0, 
               "characteristic_13": 0.0, "characteristic_14": 0.0, "characteristic_15": 0.0, 
               "characteristic_16": 0.0, "characteristic_17": 0.0, "characteristic_18": 0.0, 
               "characteristic_19": 0.0, "characteristic_20": 0.0, "characteristic_21": 0.0, 
               "characteristic_22": 0.0, "characteristic_23": 0.0, "characteristic_24": 0.0, 
               "characteristic_25": 0.0, "characteristic_26": 0.0, "characteristic_27": 0.0, 
               "characteristic_28": 0.0, "characteristic_29": 0.0, "characteristic_30": 0.0, 
               "characteristic_31": 0.0, "characteristic_32": 0.0, "characteristic_33": 0.0, 
               "characteristic_34": 0.0, "characteristic_35": 0.0, "characteristic_36": 0.0, 
               "characteristic_37": 0.0, "characteristic_38": 0.0, "characteristic_39": 0.0, 
               "characteristic_40": 0.0, "characteristic_41": 0.0, "characteristic_42": 0.0, 
               "characteristic_43": 0.0, "characteristic_44": 0.0, "characteristic_45": 0.0, 
               "characteristic_46": 0.0, "characteristic_47": 0.0, "characteristic_48": 0.0, 
               "characteristic_49": 0.0, "characteristic_50": 0.0}
    
    try:
        # Get the instance data using the helper function (no file I/O here)
        instance_data = input_data()
        
        # Extract parameters from instance_data
        nbChar = instance_data.get('nbCharacter', 0)
        codeWordLength = instance_data.get('codeWordLength', 0)
        numOfCodeWords = instance_data.get('numOfCodeWords', 0)
        maxDist = instance_data.get('maxDist', 0)
        minDist = instance_data.get('minDist', 0)
        dist_matrix = instance_data.get('dist', [])
        
        # Convert distance matrix to a numpy array for analysis
        dist_arr = np.array(dist_matrix, dtype=float)
        
        # Consider only off-diagonal elements for distance statistics (since diagonal = 0)
        if nbChar > 1:
            # Create a mask for off-diagonal elements
            off_diag_mask = ~np.eye(nbChar, dtype=bool)
            off_diag_values = dist_arr[off_diag_mask]
        else:
            off_diag_values = np.array([])
        
        # Characteristic 1-5: Basic instance parameters
        results["characteristic_1"] = float(nbChar)             # nbCharacter
        results["characteristic_2"] = float(codeWordLength)       # codeWordLength
        results["characteristic_3"] = float(numOfCodeWords)       # numOfCodeWords
        results["characteristic_4"] = float(maxDist)              # maxDist
        results["characteristic_5"] = float(minDist)              # minDist
        
        # Compute statistics from off-diagonal distance matrix if available
        if off_diag_values.size > 0:
            avg_off = float(np.mean(off_diag_values))
            std_off = float(np.std(off_diag_values))
            min_off = float(np.min(off_diag_values))
            max_off = float(np.max(off_diag_values))
            range_off = max_off - min_off
            median_off = float(np.median(off_diag_values))
            skew_off = float(compute_skewness(off_diag_values))
        else:
            avg_off = std_off = min_off = max_off = range_off = median_off = skew_off = 0.0
        
        results["characteristic_6"] = avg_off                 # Average off-diagonal distance
        results["characteristic_7"] = std_off                 # Std dev of off-diagonal distances
        results["characteristic_8"] = min_off                 # Minimum off-diagonal distance
        results["characteristic_9"] = max_off                 # Maximum off-diagonal distance
        results["characteristic_10"] = range_off              # Range of off-diagonal distances
        results["characteristic_11"] = median_off             # Median of off-diagonal distances
        results["characteristic_12"] = skew_off               # Skewness of off-diagonal distances
        
        # Problem-specific error correction capabilities
        results["characteristic_13"] = float(minDist - 1)     # Error detection capability (minDist - 1)
        results["characteristic_14"] = float((minDist - 1) // 2)  # Error correction capability (floor((minDist-1)/2))
        results["characteristic_15"] = float(minDist) / codeWordLength if codeWordLength else 0.0  # Ratio minDist/codeWordLength
        results["characteristic_16"] = float(maxDist) / codeWordLength if codeWordLength else 0.0  # Ratio maxDist/codeWordLength
        
        # Additional statistical features from the distance matrix
        total_off_count = off_diag_values.size
        sum_off = float(np.sum(off_diag_values)) if total_off_count > 0 else 0.0
        results["characteristic_17"] = sum_off           # Total sum of off-diagonal distances
        results["characteristic_18"] = float(total_off_count)  # Count of off-diagonal entries
        results["characteristic_19"] = avg_off / maxDist if maxDist else 0.0  # Normalized average distance
        results["characteristic_20"] = avg_off / maxDist if maxDist else 0.0  # Duplicate normalized average (for compatibility)
        
        # Combinatorial measures
        results["characteristic_21"] = float(nbChar * codeWordLength)   # Product of nbCharacter and codeWordLength
        results["characteristic_22"] = float(numOfCodeWords * codeWordLength)  # Product of numOfCodeWords and codeWordLength
        results["characteristic_23"] = float(numOfCodeWords) / nbChar if nbChar else 0.0  # Ratio of numOfCodeWords to nbCharacter
        results["characteristic_24"] = float(nbChar ** 2)              # Square of nbCharacter
        results["characteristic_25"] = float(codeWordLength ** 2)        # Square of codeWordLength
        results["characteristic_26"] = float(numOfCodeWords ** 2)        # Square of numOfCodeWords
        
        # Normalized distance metrics
        results["characteristic_27"] = avg_off / maxDist if maxDist else 0.0  # Average normalized by maxDist
        results["characteristic_28"] = std_off / maxDist if maxDist else 0.0  # Std dev normalized by maxDist
        
        # Matrix structural properties
        results["characteristic_29"] = 1.0              # Matrix symmetry (always 1 for symmetric matrix)
        unique_nonzero = np.unique(off_diag_values[off_diag_values > 0]) if off_diag_values.size > 0 else np.array([0])
        results["characteristic_30"] = float(len(unique_nonzero))  # Number of unique non-zero distances
        results["characteristic_31"] = min_off          # Minimum non-zero distance (same as off-diagonal min)
        results["characteristic_32"] = max_off          # Maximum non-zero distance (same as off-diagonal max)
        results["characteristic_33"] = float(np.sum(off_diag_values == min_off))  # Count of min_off occurrences
        results["characteristic_34"] = float(np.sum(off_diag_values == max_off))  # Count of max_off occurrences
        
        # Overall matrix statistics (including diagonal zeros)
        results["characteristic_35"] = float(np.mean(dist_arr))    # Mean of all matrix entries
        results["characteristic_36"] = float(np.std(dist_arr))     # Std dev of all matrix entries
        results["characteristic_37"] = float(np.median(dist_arr))  # Median of all matrix entries
        
        # Ratios and derived measures
        results["characteristic_38"] = float(numOfCodeWords) / (nbChar * codeWordLength) if nbChar and codeWordLength else 0.0
        results["characteristic_39"] = float(np.var(off_diag_values)) if off_diag_values.size > 0 else 0.0  # Variance of off-diagonals
        mean_off = np.mean(off_diag_values) if off_diag_values.size > 0 else 0.0
        results["characteristic_40"] = float((np.var(off_diag_values) / (mean_off ** 2)) if mean_off != 0 else 0.0)  # Normalized variance
        
        # Graph-based analysis using the distance matrix
        # Construct a complete undirected graph where nodes represent characters and edge weights are distances
        G = nx.Graph()
        for i in range(nbChar):
            G.add_node(i)
        for i in range(nbChar):
            for j in range(i+1, nbChar):
                # Add edge with weight from the distance matrix
                G.add_edge(i, j, weight=dist_arr[i, j])
        
        # Compute weighted degree for each node
        weighted_degrees = np.array([d for n, d in G.degree(weight='weight')])
        avg_wdeg = float(np.mean(weighted_degrees)) if weighted_degrees.size > 0 else 0.0
        max_wdeg = float(np.max(weighted_degrees)) if weighted_degrees.size > 0 else 0.0
        min_wdeg = float(np.min(weighted_degrees)) if weighted_degrees.size > 0 else 0.0
        results["characteristic_41"] = avg_wdeg         # Average node weighted degree
        results["characteristic_42"] = max_wdeg         # Maximum node weighted degree
        results["characteristic_43"] = min_wdeg         # Minimum node weighted degree
        
        # Compute average shortest path length and diameter using Floyd-Warshall algorithm
        all_pairs = dict(nx.floyd_warshall(G, weight='weight'))
        sp_lengths = []
        for source in all_pairs:
            for target in all_pairs[source]:
                sp_lengths.append(all_pairs[source][target])
        avg_spl = float(np.mean(sp_lengths)) if sp_lengths else 0.0
        diameter = float(np.max(sp_lengths)) if sp_lengths else 0.0
        results["characteristic_44"] = avg_spl       # Average shortest path length in graph
        results["characteristic_45"] = diameter      # Graph diameter
        
        # Ratio of average weighted degree to maximum possible degree sum
        results["characteristic_46"] = avg_wdeg / (maxDist * (nbChar - 1)) if nbChar > 1 and maxDist else 0.0
        
        # Variance of weighted degrees
        results["characteristic_47"] = float(np.var(weighted_degrees)) if weighted_degrees.size > 0 else 0.0
        
        # Sum of all weighted degrees
        results["characteristic_48"] = float(np.sum(weighted_degrees))
        
        # Normalized average weighted degree
        results["characteristic_49"] = avg_wdeg / (maxDist * (nbChar - 1)) if nbChar > 1 and maxDist else 0.0
        
        # A composite complexity measure: (numOfCodeWords * codeWordLength) / nbCharacter
        results["characteristic_50"] = float((numOfCodeWords * codeWordLength) / nbChar) if nbChar else 0.0
        
        # Construct the README text (~200 words) describing the analysis
        readme_text = (
            "This instance of the Fixed Length Error Correcting Codes (FLECC) problem was analyzed by extracting both basic and advanced structural characteristics. "
            "The analysis began with fundamental parameters such as the number of available characters (nbCharacter), the length of each codeword (codeWordLength), and the number of codewords to be constructed (numOfCodeWords), along with the minimum and maximum distance constraints. "
            "A detailed statistical analysis of the custom distance matrix was performed by computing the average, standard deviation, minimum, maximum, range, median, and skewness of the off-diagonal elements, which represent the distances between distinct characters. "
            "Furthermore, derived measures such as the error detection capability (minDist - 1) and error correction capability (floor((minDist - 1)/2)) were computed to capture the problem’s inherent error correcting features. "
            "Combinatorial aspects were assessed by calculating products and ratios of key parameters, providing insights into the problem size and distribution. "
            "In addition, a complete graph was constructed using NetworkX where nodes depicted characters and weighted edges corresponded to the distances between them. Graph metrics including average, maximum, and minimum weighted degrees, average shortest path length, and graph diameter were also evaluated to capture connectivity patterns. "
            "Overall, this comprehensive analysis combining statistical, combinatorial, and graph-theoretic measures offers a robust set of fifty characteristics that detail the instance’s structure and complexity, assisting in the determination of optimal solver parameters."
        )
        results["README"] = readme_text
        
    except Exception as e:
        results["error"] = str(e)
    
    # Return the results using the helper function
    output_results(results)


if __name__ == "__main__":
    main()
