#!/usr/bin/env python3
"""
Advanced Feature Extractor for Constraint Problem Graphs

This module extracts sophisticated graph-theoretic features that are meaningful
for constraint problem analysis and algorithm selection. Each feature is designed
to capture specific aspects of problem structure that influence solver behavior.

Features are modular - new ones can be added by creating a method with the
@feature decorator and following the naming convention extract_feature_*.
"""

import networkx as nx
import numpy as np
from scipy import stats, sparse
from scipy.linalg import eigvalsh
import warnings
import json
from collections import defaultdict, Counter
from functools import wraps
from pathlib import Path

warnings.filterwarnings('ignore')


def feature(description):
    """Decorator to register feature extraction methods with descriptions."""
    def decorator(func):
        func.is_feature = True
        func.description = description
        @wraps(func)
        def wrapper(*args, **kwargs):
            return func(*args, **kwargs)
        return wrapper
    return decorator


class GraphFeatureExtractor:
    """Extract advanced features from constraint problem graphs."""
    
    def __init__(self, graph):
        """
        Initialize with a NetworkX graph.
        
        Args:
            graph: NetworkX graph following our schema (nodes have type/weight, edges have weight)
        """
        self.G = graph
        self.features = {}
        self.feature_descriptions = {}
        
        # Cache commonly used node sets
        self.type0_nodes = [n for n in self.G.nodes() if self.G.nodes[n].get('type', -1) == 0]
        self.type1_nodes = [n for n in self.G.nodes() if self.G.nodes[n].get('type', -1) == 1]
        self.type2_nodes = [n for n in self.G.nodes() if self.G.nodes[n].get('type', -1) == 2]
        
        # Cache weight dictionaries for efficiency
        self.node_weights = {n: self.G.nodes[n].get('weight', 0.5) for n in self.G.nodes()}
        self.edge_weights = {(u, v): self.G[u][v].get('weight', 0.5) for u, v in self.G.edges()}
    
    @feature("""
    Weighted Constraint Propagation Strength measures how effectively constraints can propagate 
    information through the problem structure. It computes the average weighted path strength 
    between all pairs of variable nodes (type 0), where path strength is the product of edge 
    weights along the path. High values indicate strong constraint propagation, suggesting the 
    problem may be easier for propagation-based solvers. Low values suggest weak propagation, 
    indicating local search or decomposition methods might be more effective. The measure accounts 
    for both graph connectivity and edge weights, making it sensitive to constraint tightness.
    """)
    def extract_feature_propagation_strength(self):
        """Compute weighted constraint propagation strength."""
        if len(self.type0_nodes) < 2:
            return 0.0
        
        # Sample pairs for large graphs
        sample_size = min(50, len(self.type0_nodes))
        sampled_vars = np.random.choice(self.type0_nodes, sample_size, replace=False)
        
        propagation_scores = []
        for i, v1 in enumerate(sampled_vars):
            for v2 in sampled_vars[i+1:]:
                try:
                    # Find shortest weighted path
                    path_length = nx.shortest_path_length(self.G, v1, v2, weight='weight')
                    # Convert distance to strength (inverse relationship)
                    strength = 1.0 / (1.0 + path_length)
                    propagation_scores.append(strength)
                except nx.NetworkXNoPath:
                    propagation_scores.append(0.0)
        
        return np.mean(propagation_scores) if propagation_scores else 0.0
    
    @feature("""
    Variable Bottleneck Score identifies critical variables that form bottlenecks in the constraint 
    network. It uses weighted betweenness centrality on type-0 (variable) nodes, normalized by node 
    weight to account for variable importance. High bottleneck scores indicate variables that lie on 
    many shortest paths between other variables, making them critical for solution construction. 
    Problems with high maximum bottleneck scores often benefit from variable ordering heuristics that 
    prioritize these bottleneck variables. The variance in bottleneck scores indicates whether the 
    problem has a few critical variables or many equally important ones, influencing whether adaptive 
    or static search strategies work better.
    """)
    def extract_feature_variable_bottleneck(self):
        """Compute variable bottleneck score based on weighted centrality."""
        if not self.type0_nodes:
            return {'max': 0.0, 'mean': 0.0, 'variance': 0.0}
        
        # Compute weighted betweenness for variable nodes
        var_subgraph = self.G.subgraph(self.type0_nodes)
        if var_subgraph.number_of_edges() == 0:
            return {'max': 0.0, 'mean': 0.0, 'variance': 0.0}
        
        betweenness = nx.betweenness_centrality(var_subgraph, weight='weight', normalized=True)
        
        # Weight by node importance
        weighted_scores = [betweenness.get(n, 0) * self.node_weights[n] for n in self.type0_nodes]
        
        return {
            'max': max(weighted_scores) if weighted_scores else 0.0,
            'mean': np.mean(weighted_scores) if weighted_scores else 0.0,
            'variance': np.var(weighted_scores) if weighted_scores else 0.0
        }
    
    @feature("""
    Constraint Clustering Coefficient measures how tightly constraint nodes (type 1) cluster together 
    in the graph, indicating the level of constraint interaction and problem decomposability. It 
    computes the weighted clustering coefficient for constraint nodes, where weights represent 
    constraint tightness. High clustering among constraints suggests the problem has tightly 
    interrelated constraint groups that may be solved as subproblems. Low clustering indicates 
    constraints are more uniformly distributed, suggesting monolithic solving approaches. This 
    feature is particularly useful for identifying problems amenable to decomposition methods or 
    portfolio approaches that exploit problem structure.
    """)
    def extract_feature_constraint_clustering(self):
        """Compute clustering coefficient for constraint nodes."""
        if not self.type1_nodes:
            return 0.0
        
        # Get subgraph of constraint nodes and their connections
        clustering_scores = []
        for node in self.type1_nodes:
            neighbors = list(self.G.neighbors(node))
            if len(neighbors) < 2:
                clustering_scores.append(0.0)
                continue
            
            # Count triangles weighted by edge weights
            triangles = 0
            possible_triangles = 0
            for i, n1 in enumerate(neighbors):
                for n2 in neighbors[i+1:]:
                    possible_triangles += 1
                    if self.G.has_edge(n1, n2):
                        # Weight triangle by all three edges
                        w1 = self.G[node][n1].get('weight', 0.5)
                        w2 = self.G[node][n2].get('weight', 0.5)
                        w3 = self.G[n1][n2].get('weight', 0.5)
                        triangles += (w1 * w2 * w3) ** (1/3)  # Geometric mean
            
            clustering = triangles / possible_triangles if possible_triangles > 0 else 0
            clustering_scores.append(clustering * self.node_weights[node])
        
        return np.mean(clustering_scores) if clustering_scores else 0.0
    
    @feature("""
    Weighted Path Diversity measures the availability of alternative solution paths in the constraint 
    network. It samples pairs of variable nodes and counts the number of edge-disjoint paths between 
    them, weighted by path quality (product of edge weights). High diversity indicates multiple ways 
    to satisfy constraints, suggesting the problem has flexibility and may be easier to solve. Low 
    diversity indicates rigid constraint structure with few alternatives, often characterizing harder 
    problems. This metric helps identify whether diversification strategies like restarts or parallel 
    search would be beneficial, as problems with high path diversity offer more opportunities for 
    finding alternative solutions.
    """)
    def extract_feature_path_diversity(self):
        """Compute weighted diversity of paths between variables."""
        if len(self.type0_nodes) < 2:
            return 0.0
        
        # Sample variable pairs
        sample_size = min(30, len(self.type0_nodes))
        sampled = np.random.choice(self.type0_nodes, sample_size, replace=False)
        
        diversity_scores = []
        for i, v1 in enumerate(sampled):
            for v2 in sampled[i+1:]:
                try:
                    # Count edge-disjoint paths (approximation)
                    paths = list(nx.edge_disjoint_paths(self.G, v1, v2))
                    
                    # Weight by path quality
                    path_weights = []
                    for path in paths[:5]:  # Limit to first 5 paths
                        path_weight = 1.0
                        for j in range(len(path)-1):
                            edge_weight = self.G[path[j]][path[j+1]].get('weight', 0.5)
                            path_weight *= edge_weight
                        path_weights.append(path_weight)
                    
                    diversity = len(paths) * np.mean(path_weights) if path_weights else 0
                    diversity_scores.append(min(diversity, 1.0))  # Normalize
                except:
                    diversity_scores.append(0.0)
        
        return np.mean(diversity_scores) if diversity_scores else 0.0
    
    @feature("""
    Core-Periphery Structure Score measures the extent to which the constraint graph exhibits a 
    core-periphery organization, where a dense core of highly connected nodes is surrounded by a 
    sparse periphery. This structure is common in real-world constraint problems where critical 
    variables and constraints form a tightly connected core. The score uses weighted k-core 
    decomposition to identify the core and measures the weight concentration in core nodes. High 
    scores indicate clear core-periphery structure, suggesting that focusing search on core variables 
    first may be effective. Low scores indicate more uniform structure, suggesting breadth-first or 
    randomized strategies may work better.
    """)
    def extract_feature_core_periphery(self):
        """Compute core-periphery structure score."""
        if self.G.number_of_nodes() < 4:
            return 0.0
        
        # Compute weighted degree for all nodes
        weighted_degrees = {}
        for node in self.G.nodes():
            weight_sum = sum(self.G[node][nbr].get('weight', 0.5) for nbr in self.G.neighbors(node))
            weighted_degrees[node] = weight_sum * self.node_weights[node]
        
        # Identify core (top 20% by weighted degree)
        sorted_nodes = sorted(weighted_degrees.items(), key=lambda x: x[1], reverse=True)
        core_size = max(1, len(sorted_nodes) // 5)
        core_nodes = set([node for node, _ in sorted_nodes[:core_size]])
        periphery_nodes = set(self.G.nodes()) - core_nodes
        
        if not periphery_nodes:
            return 0.0
        
        # Measure weight concentration in core
        core_weight = sum(self.node_weights[n] for n in core_nodes)
        total_weight = sum(self.node_weights.values())
        
        # Measure edge density difference
        core_subgraph = self.G.subgraph(core_nodes)
        core_density = nx.density(core_subgraph) if len(core_nodes) > 1 else 0
        
        periphery_subgraph = self.G.subgraph(periphery_nodes)
        periphery_density = nx.density(periphery_subgraph) if len(periphery_nodes) > 1 else 0
        
        # Combine metrics
        weight_concentration = core_weight / total_weight
        density_ratio = (core_density / (periphery_density + 0.01))  # Avoid division by zero
        
        return min(1.0, weight_concentration * (density_ratio ** 0.5))
    
    @feature("""
    Constraint Tightness Distribution analyzes the statistical distribution of constraint weights 
    (type 1 nodes), providing insights into problem heterogeneity. It computes mean, variance, 
    skewness, and kurtosis of constraint weights. Uniform distributions (low variance, near-zero 
    skewness) suggest homogeneous problems that may respond well to uniform strategies. Skewed 
    distributions indicate a mix of easy and hard constraints, suggesting adaptive strategies that 
    handle different constraint types differently. High kurtosis indicates extreme constraints 
    (very tight or very loose), which often benefit from preprocessing to identify and handle 
    these special cases. This feature helps select between uniform versus adaptive solving strategies.
    """)
    def extract_feature_tightness_distribution(self):
        """Analyze statistical distribution of constraint tightness."""
        if not self.type1_nodes:
            return {'mean': 0.5, 'variance': 0.0, 'skewness': 0.0, 'kurtosis': 0.0}
        
        constraint_weights = [self.node_weights[n] for n in self.type1_nodes]
        
        return {
            'mean': np.mean(constraint_weights),
            'variance': np.var(constraint_weights),
            'skewness': stats.skew(constraint_weights) if len(constraint_weights) > 2 else 0.0,
            'kurtosis': stats.kurtosis(constraint_weights) if len(constraint_weights) > 3 else 0.0
        }
    
    @feature("""
    Variable-Constraint Bipartite Modularity measures how well the graph separates into variable 
    and constraint communities, indicating problem structure clarity. Using the natural bipartite 
    structure of constraint problems (variables connected to constraints), it computes the modularity 
    of this partition weighted by node and edge weights. High modularity indicates clear separation 
    between variable and constraint layers with minimal cross-connections, suggesting structured 
    problems amenable to level-based or phase-based solving. Low modularity indicates heavy 
    interconnection between all components, suggesting need for global solving strategies. This 
    helps identify whether local or global consistency techniques would be more effective.
    """)
    def extract_feature_bipartite_modularity(self):
        """Compute modularity of variable-constraint bipartite structure."""
        if not self.type0_nodes or not self.type1_nodes:
            return 0.0
        
        # Create partition: type 0 in one community, type 1 in another
        partition = []
        partition.append(set(self.type0_nodes))
        partition.append(set(self.type1_nodes))
        
        # Add type 2 nodes to the partition they're most connected to
        for node in self.type2_nodes:
            type0_weight = sum(self.G[node][nbr].get('weight', 0.5) 
                             for nbr in self.G.neighbors(node) if nbr in self.type0_nodes)
            type1_weight = sum(self.G[node][nbr].get('weight', 0.5) 
                             for nbr in self.G.neighbors(node) if nbr in self.type1_nodes)
            
            if type0_weight > type1_weight:
                partition[0].add(node)
            else:
                partition[1].add(node)
        
        # Compute weighted modularity
        try:
            modularity = nx.algorithms.community.modularity(self.G, partition, weight='weight')
        except:
            modularity = 0.0
        
        return modularity
    
    @feature("""
    Weighted Eccentricity Variance measures the variability in weighted eccentricity across nodes, 
    where eccentricity is the maximum weighted distance from a node to any other node. High variance 
    indicates uneven problem structure with some variables being much more central than others, 
    suggesting variable ordering heuristics based on centrality would be effective. Low variance 
    indicates uniform structure where all variables are similarly positioned, suggesting random or 
    round-robin strategies might work as well as sophisticated heuristics. The weighted version 
    accounts for both graph topology and constraint/variable importance, providing a more nuanced 
    measure than pure topological eccentricity.
    """)
    def extract_feature_eccentricity_variance(self):
        """Compute variance in weighted eccentricity."""
        if not nx.is_connected(self.G):
            # Use largest connected component
            largest_cc = max(nx.connected_components(self.G), key=len)
            if len(largest_cc) < 2:
                return 0.0
            subgraph = self.G.subgraph(largest_cc)
        else:
            subgraph = self.G
        
        # Sample nodes for large graphs
        sample_size = min(50, subgraph.number_of_nodes())
        sampled_nodes = np.random.choice(list(subgraph.nodes()), sample_size, replace=False)
        
        eccentricities = []
        for node in sampled_nodes:
            # Compute weighted eccentricity
            lengths = nx.single_source_dijkstra_path_length(subgraph, node, weight='weight')
            if lengths:
                max_dist = max(lengths.values())
                # Weight by node importance
                weighted_ecc = max_dist / (self.node_weights[node] + 0.1)
                eccentricities.append(weighted_ecc)
        
        return np.var(eccentricities) if eccentricities else 0.0
    
    @feature("""
    Critical Edge Ratio identifies the proportion of edges whose removal would significantly impact 
    graph connectivity, specifically bridge edges and near-bridge edges weighted by their importance. 
    Critical edges represent constraints that cannot be easily bypassed or substituted. High ratios 
    indicate fragile problem structure where many constraints are essential, suggesting careful 
    handling and possibly exact methods. Low ratios indicate robust structure with redundancy, 
    suggesting local search or approximation methods could work well. The measure considers both 
    pure bridges and edges with high betweenness centrality, weighted by edge importance to identify 
    truly critical constraint relationships.
    """)
    def extract_feature_critical_edges(self):
        """Compute ratio of critical edges (bridges and high-betweenness edges)."""
        if self.G.number_of_edges() == 0:
            return 0.0
        
        # Find bridge edges
        bridges = set(nx.bridges(self.G)) if nx.is_connected(self.G) else set()
        
        # Find high-betweenness edges (approximation for large graphs)
        if self.G.number_of_edges() < 1000:
            edge_betweenness = nx.edge_betweenness_centrality(self.G, weight='weight', normalized=True)
            threshold = np.percentile(list(edge_betweenness.values()), 90) if edge_betweenness else 0
            high_betweenness = set(e for e, b in edge_betweenness.items() if b > threshold)
        else:
            high_betweenness = set()
        
        critical_edges = bridges | high_betweenness
        
        # Weight by edge importance
        critical_weight = sum(self.edge_weights.get((u, v), self.edge_weights.get((v, u), 0.5))
                            for u, v in critical_edges)
        total_weight = sum(self.edge_weights.values())
        
        return critical_weight / total_weight if total_weight > 0 else 0.0
    
    @feature("""
    Constraint Overlap Density measures how much constraints share variables, computed as the 
    average number of common neighbors between constraint nodes weighted by constraint importance. 
    High overlap indicates constraints are tightly interrelated, making the problem harder as 
    satisfying one constraint affects many others. This suggests need for global consistency 
    techniques and careful propagation. Low overlap indicates more independent constraints that 
    can potentially be satisfied separately, suggesting decomposition or parallel solving strategies. 
    The weighted version prioritizes overlap between important constraints, providing better 
    prediction of propagation complexity than simple overlap counts.
    """)
    def extract_feature_constraint_overlap(self):
        """Measure how much constraints share variables."""
        if len(self.type1_nodes) < 2:
            return 0.0
        
        overlap_scores = []
        
        # Sample constraint pairs for large graphs
        sample_size = min(50, len(self.type1_nodes))
        sampled_constraints = np.random.choice(self.type1_nodes, sample_size, replace=False)
        
        for i, c1 in enumerate(sampled_constraints):
            c1_neighbors = set(self.G.neighbors(c1))
            c1_weight = self.node_weights[c1]
            
            for c2 in sampled_constraints[i+1:]:
                c2_neighbors = set(self.G.neighbors(c2))
                c2_weight = self.node_weights[c2]
                
                # Common neighbors (shared variables)
                common = c1_neighbors & c2_neighbors
                if not common:
                    continue
                
                # Weight by constraint importance and overlap size
                overlap = len(common) / len(c1_neighbors | c2_neighbors)
                weighted_overlap = overlap * (c1_weight * c2_weight) ** 0.5
                overlap_scores.append(weighted_overlap)
        
        return np.mean(overlap_scores) if overlap_scores else 0.0
    
    @feature("""
    Weighted Assortativity measures the tendency of nodes to connect to other nodes with similar 
    weight values, indicating whether hard constraints connect to other hard constraints or to easy 
    ones. Positive assortativity means similar-weight nodes cluster together, suggesting the problem 
    has distinct easy and hard regions that could be tackled separately. Negative assortativity means 
    high-weight nodes connect to low-weight ones, indicating mixed difficulty throughout the problem. 
    Near-zero assortativity suggests random mixing. This helps determine whether to use uniform 
    strategies or adaptive ones that handle different regions differently, and whether problem 
    decomposition along difficulty lines would be beneficial.
    """)
    def extract_feature_weighted_assortativity(self):
        """Compute assortativity based on node weights."""
        if self.G.number_of_edges() == 0:
            return 0.0
        
        # Create node attribute dictionary for weights
        nx.set_node_attributes(self.G, self.node_weights, 'weight_attr')
        
        try:
            assortativity = nx.numeric_assortativity_coefficient(self.G, 'weight_attr', weight='weight')
        except:
            assortativity = 0.0
        
        return assortativity
    
    @feature("""
    Spectral Gap measures the difference between the two largest eigenvalues of the weighted graph 
    Laplacian, providing insight into graph connectivity and expansion properties. Large spectral 
    gaps indicate good expansion and mixing properties, suggesting the problem has no bottlenecks 
    and information can flow freely - often easier for propagation-based solvers. Small gaps indicate 
    poor expansion, bottlenecks, or near-decomposability into separate components, suggesting the 
    problem might benefit from decomposition methods or specialized handling of bottlenecks. The 
    weighted version accounts for edge strengths, providing a more accurate measure of information 
    flow in the constraint network than the unweighted spectral gap.
    """)
    def extract_feature_spectral_gap(self):
        """Compute spectral gap from weighted Laplacian eigenvalues."""
        if self.G.number_of_nodes() < 3:
            return 0.0
        
        try:
            # Get largest connected component for meaningful spectral analysis
            if not nx.is_connected(self.G):
                largest_cc = max(nx.connected_components(self.G), key=len)
                subgraph = self.G.subgraph(largest_cc)
            else:
                subgraph = self.G
            
            if subgraph.number_of_nodes() < 3:
                return 0.0
            
            # Compute weighted Laplacian
            L = nx.laplacian_matrix(subgraph, weight='weight').astype(float)
            
            # Get eigenvalues (only need top 2)
            if subgraph.number_of_nodes() > 100:
                # Use sparse computation for large graphs
                from scipy.sparse.linalg import eigsh
                eigenvalues = eigsh(L, k=min(3, subgraph.number_of_nodes()-1), 
                                   which='SM', return_eigenvectors=False)
                eigenvalues = sorted(eigenvalues)
            else:
                eigenvalues = eigvalsh(L.todense())
            
            # Spectral gap is difference between second and first non-zero eigenvalue
            # First eigenvalue is always 0 for connected graphs
            non_zero_eigs = [e for e in eigenvalues if e > 1e-10]
            if len(non_zero_eigs) >= 2:
                return non_zero_eigs[1] - non_zero_eigs[0]
            elif len(non_zero_eigs) == 1:
                return non_zero_eigs[0]
            else:
                return 0.0
        except:
            return 0.0
    
    @feature("""
    Weighted Rich-Club Coefficient measures whether high-degree nodes (hubs) preferentially connect 
    to each other, forming a "rich club" that dominates problem structure. High coefficients indicate 
    presence of a hub-based architecture where key variables or constraints form a tightly connected 
    core. This suggests focusing on these hubs first in search, as they heavily influence the rest 
    of the problem. Low coefficients indicate more democratic structure without dominant hubs. The 
    weighted version considers both node degree and edge weights, identifying truly influential hubs 
    rather than just highly connected nodes. This helps identify whether hub-focused or uniform 
    strategies would be more effective.
    """)
    def extract_feature_rich_club(self):
        """Compute weighted rich-club coefficient."""
        if self.G.number_of_nodes() < 4:
            return 0.0
        
        # Compute weighted degree
        weighted_degrees = {}
        for node in self.G.nodes():
            w_degree = sum(self.G[node][nbr].get('weight', 0.5) for nbr in self.G.neighbors(node))
            weighted_degrees[node] = w_degree * self.node_weights[node]
        
        # Identify rich nodes (top 20%)
        sorted_nodes = sorted(weighted_degrees.items(), key=lambda x: x[1], reverse=True)
        rich_size = max(2, len(sorted_nodes) // 5)
        rich_nodes = set([node for node, _ in sorted_nodes[:rich_size]])
        
        if len(rich_nodes) < 2:
            return 0.0
        
        # Compute edge density within rich club
        rich_subgraph = self.G.subgraph(rich_nodes)
        actual_edges = rich_subgraph.number_of_edges()
        possible_edges = len(rich_nodes) * (len(rich_nodes) - 1) / 2
        
        if possible_edges == 0:
            return 0.0
        
        # Weight by edge strengths within rich club
        rich_edge_weight = sum(rich_subgraph[u][v].get('weight', 0.5) 
                              for u, v in rich_subgraph.edges())
        
        # Normalized rich-club coefficient
        density = actual_edges / possible_edges
        avg_weight = rich_edge_weight / max(actual_edges, 1)
        
        return density * avg_weight
    
    @feature("""
    Constraint Hierarchical Structure measures the hierarchical organization of constraints using 
    flow hierarchy analysis. High hierarchy indicates constraints can be organized into levels with 
    clear dependencies, suggesting level-based or topological solving approaches. Low hierarchy 
    indicates cyclic dependencies and feedback loops, requiring iterative or fixpoint-based solving. 
    The measure uses weighted flow to identify dominant directions in the constraint graph, with 
    higher weights indicating stronger dependencies. This helps determine whether forward-chaining, 
    backward-chaining, or iterative propagation strategies would be most effective, and whether the 
    problem admits a natural solving order.
    """)
    def extract_feature_hierarchical_structure(self):
        """Measure hierarchical structure in constraint dependencies."""
        if not self.type1_nodes or len(self.type1_nodes) < 2:
            return 0.0
        
        # Create directed version based on node weights (higher weight = higher level)
        DG = nx.DiGraph()
        
        # Add edges directed from lower to higher weight nodes
        for u, v in self.G.edges():
            u_weight = self.node_weights[u]
            v_weight = self.node_weights[v]
            edge_weight = self.G[u][v].get('weight', 0.5)
            
            if u_weight < v_weight:
                DG.add_edge(u, v, weight=edge_weight)
            elif v_weight < u_weight:
                DG.add_edge(v, u, weight=edge_weight)
            else:
                # Equal weight - add both directions with half weight
                DG.add_edge(u, v, weight=edge_weight * 0.5)
                DG.add_edge(v, u, weight=edge_weight * 0.5)
        
        # Measure hierarchy using flow
        if DG.number_of_edges() == 0:
            return 0.0
        
        # Compute strongly connected components
        sccs = list(nx.strongly_connected_components(DG))
        
        # Hierarchy score based on SCC structure
        n_sccs = len(sccs)
        max_scc_size = max(len(scc) for scc in sccs)
        
        # Perfect hierarchy = many small SCCs, poor hierarchy = one large SCC
        hierarchy_score = (n_sccs - 1) / max(self.G.number_of_nodes() - 1, 1)
        size_penalty = max_scc_size / self.G.number_of_nodes()
        
        return hierarchy_score * (1 - size_penalty)
    
    @feature("""
    Problem Phase Transition Indicator estimates proximity to phase transition region where problems 
    shift from under-constrained to over-constrained. It analyzes the balance between constraints 
    and variables, weighted by their importance and connectivity. Values near 0.5 indicate problems 
    near the phase transition, typically the hardest instances requiring sophisticated solving. Values 
    near 0 indicate under-constrained problems (many solutions, easy to find one), while values near 
    1 indicate over-constrained problems (few or no solutions, easy to prove unsatisfiability). This 
    helps select between satisfaction-focused strategies (for under-constrained) versus 
    conflict-focused strategies (for over-constrained).
    """)
    def extract_feature_phase_transition(self):
        """Estimate proximity to phase transition region."""
        if not self.type0_nodes or not self.type1_nodes:
            return 0.5
        
        # Constraint-to-variable ratio
        base_ratio = len(self.type1_nodes) / len(self.type0_nodes)
        
        # Weight by average constraint tightness
        avg_constraint_weight = np.mean([self.node_weights[n] for n in self.type1_nodes])
        
        # Weight by average variable flexibility (inverse of weight)
        avg_variable_flex = np.mean([1 - self.node_weights[n] for n in self.type0_nodes])
        
        # Combine metrics (normalized to [0,1])
        weighted_ratio = base_ratio * avg_constraint_weight / (avg_variable_flex + 0.1)
        
        # Map to [0,1] with sigmoid-like function
        phase_indicator = 1 / (1 + np.exp(-2 * (weighted_ratio - 1)))
        
        return phase_indicator
    
    def extract_all_features(self):
        """
        Extract all registered features.
        
        Returns:
            dict: All features with their values and descriptions
        """
        # Find all methods with @feature decorator
        feature_methods = []
        for method_name in dir(self):
            if method_name.startswith('extract_feature_'):
                method = getattr(self, method_name)
                if hasattr(method, 'is_feature') and method.is_feature:
                    feature_methods.append((method_name, method))
        
        # Extract each feature
        for method_name, method in feature_methods:
            feature_name = method_name.replace('extract_feature_', '')
            try:
                value = method()
                
                # Handle dict returns (multi-value features)
                if isinstance(value, dict):
                    for k, v in value.items():
                        self.features[f'{feature_name}_{k}'] = v
                        self.feature_descriptions[f'{feature_name}_{k}'] = method.description.strip()
                else:
                    self.features[feature_name] = value
                    self.feature_descriptions[feature_name] = method.description.strip()
            except Exception as e:
                print(f"Warning: Failed to extract {feature_name}: {e}")
                self.features[feature_name] = 0.0
                self.feature_descriptions[feature_name] = method.description.strip()
        
        # Add basic statistics
        self._add_basic_features()
        
        return self.features
    
    def _add_basic_features(self):
        """Add basic graph statistics."""
        self.features['n_nodes'] = self.G.number_of_nodes()
        self.features['n_edges'] = self.G.number_of_edges()
        self.features['density'] = nx.density(self.G)
        self.features['n_type0'] = len(self.type0_nodes)
        self.features['n_type1'] = len(self.type1_nodes)
        self.features['n_type2'] = len(self.type2_nodes)
        
        # Basic descriptions
        self.feature_descriptions['n_nodes'] = "Total number of nodes in the graph"
        self.feature_descriptions['n_edges'] = "Total number of edges in the graph"
        self.feature_descriptions['density'] = "Graph density (edges / possible edges)"
        self.feature_descriptions['n_type0'] = "Number of variable nodes (type 0)"
        self.feature_descriptions['n_type1'] = "Number of constraint nodes (type 1)"
        self.feature_descriptions['n_type2'] = "Number of resource/objective nodes (type 2)"
    
    def save_features(self, output_path):
        """
        Save features to JSON file with descriptions.
        
        Args:
            output_path: Path for output JSON file
        """
        output = {
            'features': self.features,
            'descriptions': self.feature_descriptions,
            'metadata': {
                'graph_file': str(output_path),
                'num_features': len(self.features),
                'extraction_method': 'GraphFeatureExtractor'
            }
        }
        
        with open(output_path, 'w') as f:
            json.dump(output, f, indent=2)
    
    def get_feature_vector(self):
        """
        Get features as numpy array in consistent order.
        
        Returns:
            np.array: Feature vector
        """
        if not self.features:
            self.extract_all_features()
        
        # Sort features by name for consistency
        sorted_features = sorted(self.features.items())
        return np.array([v for k, v in sorted_features])
    
    def get_feature_names(self):
        """
        Get feature names in same order as feature vector.
        
        Returns:
            list: Feature names
        """
        if not self.features:
            self.extract_all_features()
        
        return sorted(self.features.keys())
    
    def print_feature_summary(self):
        """Print a summary of extracted features with descriptions."""
        if not self.features:
            self.extract_all_features()
        
        print("=" * 80)
        print("ADVANCED FEATURE EXTRACTION SUMMARY")
        print("=" * 80)
        print(f"Total features extracted: {len(self.features)}\n")
        
        # Group features by category
        categories = defaultdict(list)
        for name, value in sorted(self.features.items()):
            if '_' in name:
                category = name.split('_')[0]
            else:
                category = 'basic'
            categories[category].append((name, value))
        
        for category, features in sorted(categories.items()):
            print(f"\n{category.upper()} Features:")
            print("-" * 40)
            for name, value in features:
                if isinstance(value, float):
                    print(f"  {name:30} = {value:8.4f}")
                else:
                    print(f"  {name:30} = {value}")
                
                # Print description if available
                if name in self.feature_descriptions:
                    desc = self.feature_descriptions[name]
                    # Wrap description text
                    words = desc.split()
                    lines = []
                    current_line = []
                    current_length = 0
                    
                    for word in words:
                        if current_length + len(word) + 1 > 70:
                            lines.append(' '.join(current_line))
                            current_line = [word]
                            current_length = len(word)
                        else:
                            current_line.append(word)
                            current_length += len(word) + 1
                    
                    if current_line:
                        lines.append(' '.join(current_line))
                    
                    for line in lines:
                        print(f"    {line}")
                    print()


def test_advanced_features():
    """Test the advanced feature extractor on a sample graph."""
    # Create a sample constraint problem graph
    G = nx.Graph()
    
    # Add variable nodes (type 0)
    for i in range(5):
        G.add_node(f'var_{i}', type=0, weight=0.5 + i*0.1)
    
    # Add constraint nodes (type 1)
    for i in range(3):
        G.add_node(f'cons_{i}', type=1, weight=0.7 + i*0.1)
    
    # Add resource node (type 2)
    G.add_node('resource', type=2, weight=0.8)
    
    # Add edges with weights
    edges = [
        ('var_0', 'cons_0', 0.9),
        ('var_1', 'cons_0', 0.8),
        ('var_2', 'cons_1', 0.7),
        ('var_3', 'cons_1', 0.6),
        ('var_4', 'cons_2', 0.9),
        ('var_0', 'cons_2', 0.5),
        ('cons_0', 'resource', 0.8),
        ('cons_1', 'resource', 0.7),
        ('var_0', 'var_1', 0.4),
        ('var_2', 'var_3', 0.4),
    ]
    
    for u, v, w in edges:
        G.add_edge(u, v, weight=w)
    
    # Extract features
    extractor = GraphFeatureExtractor(G)
    features = extractor.extract_all_features()
    
    # Print summary
    extractor.print_feature_summary()
    
    # Save to file
    extractor.save_features('test_features.json')
    print(f"\nFeatures saved to test_features.json")


if __name__ == "__main__":
    test_advanced_features()