#!/usr/bin/env python3
"""
Graph converter for monitor_1id problem.
Converter created with subagent_prompt.md v_02

This problem is about network monitoring where monitors must be placed on network nodes
to ensure all nodes can be uniquely identified through measurement paths.
Key challenges: minimizing monitors while ensuring 1-identifiability constraint and coverage.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the monitor placement problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create bipartite graph with explicit constraint nodes
    - Network nodes (type 0) weighted by importance/constraint count
    - Route constraint nodes (type 1) weighted by coverage and criticality
    - Coverage constraint nodes (type 1) for node coverage requirements  
    - 1-identifiability constraint nodes (type 1) for distinguishability
    - Edges connect nodes to constraints they participate in
    """
    # Access data directly from json_data dict
    n = json_data.get('n', 0)
    r = json_data.get('r', 0)
    b = json_data.get('b', 0)
    routes_ends = json_data.get('routes_ends', [])
    bi_comp = json_data.get('bi_comp', [])
    leaf_nodes = json_data.get('leaf_nodes', [])
    
    # Convert routes_ends from flat array to pairs
    route_pairs = []
    if len(routes_ends) >= 2:
        for i in range(0, len(routes_ends)-1, 2):
            if i+1 < len(routes_ends):
                route_pairs.append((routes_ends[i], routes_ends[i+1]))
    
    G = nx.Graph()
    
    # Add network node variables (type 0)
    # Weight by criticality: leaf nodes (forced monitors) + constraint involvement
    for node in range(1, n+1):
        # Base weight for being a network node
        weight = 0.3
        
        # Higher weight for leaf nodes (forced monitors)
        if node in leaf_nodes:
            weight += 0.4
            
        # Count route involvement to measure centrality
        route_count = sum(1 for start, end in route_pairs if start == node or end == node)
        if r > 0:
            route_involvement = route_count / r
            weight += route_involvement * 0.3
            
        # Higher weight for nodes in biconnected components
        in_bicomp = any(node in comp for comp in bi_comp)
        if in_bicomp:
            weight += 0.2
            
        G.add_node(f'node_{node}', type=0, weight=min(weight, 1.0))
    
    # Add route endpoint constraint nodes (type 1)
    # Each route creates a constraint that both endpoints must be monitors
    for route_idx, (start, end) in enumerate(route_pairs):
        # Weight by route criticality - how many other routes share endpoints
        shared_start = sum(1 for s, e in route_pairs if s == start or e == start) - 1
        shared_end = sum(1 for s, e in route_pairs if s == end or e == end) - 1
        
        # Higher weight for routes with highly connected endpoints
        max_connections = max(r-1, 1)  # Avoid division by zero
        connectivity = (shared_start + shared_end) / (2 * max_connections)
        
        # Routes with more shared endpoints are more critical
        tightness = 0.6 + connectivity * 0.4
        
        constraint_id = f'route_constraint_{route_idx}'
        G.add_node(constraint_id, type=1, weight=tightness)
        
        # Connect constraint to both endpoint nodes
        G.add_edge(f'node_{start}', constraint_id, weight=0.9)
        G.add_edge(f'node_{end}', constraint_id, weight=0.9)
    
    # Add coverage constraint nodes (type 1)
    # Each node must be covered by at least one measurement path
    for node in range(1, n+1):
        # Count how many routes could potentially cover this node
        # (routes that start or end at this node)
        covering_routes = sum(1 for start, end in route_pairs if start == node or end == node)
        
        # Higher weight for nodes with fewer covering options (harder to cover)
        if covering_routes > 0:
            scarcity = 1.0 - min(covering_routes / max(r//4, 1), 1.0)
        else:
            scarcity = 1.0  # No covering routes = very hard to cover
            
        coverage_tightness = 0.4 + scarcity * 0.6
        
        constraint_id = f'coverage_{node}'
        G.add_node(constraint_id, type=1, weight=coverage_tightness)
        
        # Connect to the node being covered
        G.add_edge(f'node_{node}', constraint_id, weight=0.8)
        
        # Connect to routes that could cover this node (endpoint routes)
        for route_idx, (start, end) in enumerate(route_pairs):
            if start == node or end == node:
                route_constraint = f'route_constraint_{route_idx}'
                # Weaker connection representing potential coverage
                G.add_edge(constraint_id, route_constraint, weight=0.3)
    
    # Add 1-identifiability constraint nodes (type 1)
    # For pairs of nodes that need to be distinguishable
    pair_count = 0
    for node_a in range(1, n+1):
        for node_b in range(node_a+1, n+1):
            # Create constraint for distinguishing node_a from node_b
            # Weight by how similar the nodes are (harder to distinguish similar nodes)
            
            # Similarity based on shared route endpoints
            routes_a = {i for i, (start, end) in enumerate(route_pairs) if start == node_a or end == node_a}
            routes_b = {i for i, (start, end) in enumerate(route_pairs) if start == node_b or end == node_b}
            
            if routes_a or routes_b:
                shared_routes = len(routes_a & routes_b)
                total_routes = len(routes_a | routes_b)
                similarity = shared_routes / max(total_routes, 1)
            else:
                similarity = 1.0  # No routes = hard to distinguish
                
            # Higher weight for more similar nodes (harder to distinguish)
            difficulty = 0.3 + similarity * 0.7
            
            constraint_id = f'distinguish_{node_a}_{node_b}'
            G.add_node(constraint_id, type=1, weight=difficulty)
            
            # Connect to both nodes that need distinguishing
            G.add_edge(f'node_{node_a}', constraint_id, weight=0.7)
            G.add_edge(f'node_{node_b}', constraint_id, weight=0.7)
            
            # Connect to routes that could help distinguish them
            # (routes that involve exactly one of the nodes)
            for route_idx, (start, end) in enumerate(route_pairs):
                involves_a = (start == node_a or end == node_a)
                involves_b = (start == node_b or end == node_b)
                
                if involves_a != involves_b:  # XOR - helps distinguish
                    route_constraint = f'route_constraint_{route_idx}'
                    G.add_edge(constraint_id, route_constraint, weight=0.4)
            
            pair_count += 1
            # Limit pairs to avoid explosion for large networks
            if pair_count > min(n*5, 200):
                break
        if pair_count > min(n*5, 200):
            break
    
    # Add biconnected component constraint nodes (type 1)
    for comp_idx, comp in enumerate(bi_comp):
        if not comp:  # Skip empty components
            continue
            
        # Weight by component size - larger components harder to cover
        comp_size = len(comp)
        size_factor = min(comp_size / max(n//4, 1), 1.0)
        comp_tightness = 0.5 + size_factor * 0.5
        
        constraint_id = f'bicomp_{comp_idx}'
        G.add_node(constraint_id, type=1, weight=comp_tightness)
        
        # Connect to all nodes in the component
        for node in comp:
            if 1 <= node <= n:  # Validate node index
                G.add_edge(f'node_{node}', constraint_id, weight=0.6)
    
    # Add conflict edges between nodes in same biconnected components
    # Only if component has many nodes (creates selection conflicts)
    for comp in bi_comp:
        if len(comp) >= 4:  # Only for larger components
            comp_nodes = [node for node in comp if 1 <= node <= n]
            # Add edges between nodes to represent competition for monitor placement
            for i, node_a in enumerate(comp_nodes[:5]):  # Limit to avoid explosion
                for node_b in comp_nodes[i+1:6]:
                    # Weight by inverse of component size (smaller comp = stronger conflict)
                    conflict_strength = max(0.2, 1.0 - len(comp_nodes)/10)
                    G.add_edge(f'node_{node_a}', f'node_{node_b}', weight=conflict_strength)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()