#!/usr/bin/env python3
"""
Graph converter for model_2 problem.
Created using subagent_prompt.md version: v_02

This problem is about compiler instruction selection optimization using the UNISON approach.
It involves selecting instruction patterns (matches) to cover all operations while minimizing 
latency cost subject to register allocation, control flow, and data dependency constraints.

Key challenges: Complex interdependencies between instruction selection, register allocation,
and block scheduling. The problem requires coordinating many types of constraints including
dominance, location sharing, and block succession constraints.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the instruction selection problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph with constraint nodes for different types of constraints.
    - Variables: operations, data values, matches, blocks (type 0)
    - Constraints: coverage, definition, dominance, location sharing, block succession (type 1)
    - Resources: registers/locations, blocks (type 2)
    
    The complexity comes from the interaction between instruction selection and register allocation.
    Hard instances have tight register pressure, complex control flow, and many constraint interactions.
    """
    
    # Extract problem dimensions
    num_ops = json_data.get('numOperationsInFunction', 0)
    num_data = json_data.get('numDataInFunction', 0) 
    num_matches = json_data.get('numMatches', 0)
    num_blocks = json_data.get('numBlocksInFunction', 0)
    num_locations = json_data.get('numLocations', 0)
    
    # Extract constraint arrays
    loc_domain = json_data.get('locDomain', [])
    same_loc = json_data.get('sameLoc', [])
    in_block = json_data.get('inBlock', [])
    in_block_succ = json_data.get('inBlockSucc', [])
    non_copy_matches = set(json_data.get('nonCopyMatches', []))
    
    G = nx.Graph()
    
    # === Variable nodes (type 0) ===
    
    # Operation nodes - weighted by criticality (more constrained ops are harder)
    for op in range(num_ops):
        # Operations with more potential matches are less constrained
        covering_matches = sum(1 for m in range(num_matches) if op in json_data.get('operationsCoveredByMatch', {}).get(m, []))
        criticality = 1.0 / max(covering_matches, 1)  # Fewer options = more critical
        G.add_node(f'op_{op}', type=0, weight=min(criticality, 1.0))
    
    # Data nodes - weighted by usage complexity  
    for d in range(num_data):
        # Data with location constraints are more complex
        has_location_constraints = any(
            d in loc_domain[i:i+4] for i in range(0, len(loc_domain), 4)
        ) if loc_domain else False
        complexity = 0.8 if has_location_constraints else 0.4
        G.add_node(f'data_{d}', type=0, weight=complexity)
    
    # Match nodes - weighted by importance and complexity
    for m in range(num_matches):
        # Non-copy matches are typically more important
        is_non_copy = m in non_copy_matches
        base_weight = 0.7 if is_non_copy else 0.3
        
        # Matches with more constraints are more complex
        constraint_count = 0
        constraint_count += sum(1 for i in range(0, len(same_loc), 3) if same_loc[i] == m)
        constraint_count += sum(1 for i in range(0, len(in_block), 2) if in_block[i] == m)
        constraint_count += sum(1 for i in range(0, len(in_block_succ), 3) if in_block_succ[i] == m)
        
        complexity_bonus = min(constraint_count * 0.1, 0.3)
        G.add_node(f'match_{m}', type=0, weight=min(base_weight + complexity_bonus, 1.0))
    
    # === Resource nodes (type 2) ===
    
    # Location/register nodes - weighted by scarcity
    for loc in range(num_locations):
        # Locations with more domain restrictions are scarcer
        restriction_count = 0
        if loc_domain:
            for i in range(0, len(loc_domain), 4):
                if i + 3 < len(loc_domain) and loc_domain[i + 1] == loc:
                    range_size = loc_domain[i + 3] - loc_domain[i + 2] + 1
                    restriction_count += 1.0 / max(range_size, 1)
        
        scarcity = min(restriction_count / max(num_matches / 10, 1), 1.0)
        G.add_node(f'loc_{loc}', type=2, weight=max(scarcity, 0.3))
    
    # Block nodes - weighted by execution frequency impact
    for b in range(num_blocks):
        # Blocks that appear more frequently in constraints are more important
        constraint_frequency = 0
        constraint_frequency += sum(1 for i in range(0, len(in_block), 2) if in_block[i + 1] == b)
        constraint_frequency += sum(1 for i in range(0, len(in_block_succ), 3) 
                                  if i + 1 < len(in_block_succ) and in_block_succ[i + 1] == b)
        
        importance = min(constraint_frequency / max(num_matches / 5, 1), 1.0)
        G.add_node(f'block_{b}', type=2, weight=max(importance, 0.4))
    
    # === Constraint nodes (type 1) ===
    
    # Coverage constraints - one per operation (must be covered by exactly one match)
    for op in range(num_ops):
        # More constrained operations have tighter constraints
        covering_matches = sum(1 for m in range(num_matches) if op in json_data.get('operationsCoveredByMatch', {}).get(m, []))
        tightness = 1.0 - min(covering_matches / max(num_matches / 10, 1), 0.8)
        G.add_node(f'coverage_{op}', type=1, weight=max(tightness, 0.3))
    
    # Definition constraints - one per data value
    for d in range(num_data):
        defining_matches = sum(1 for m in range(num_matches) if d in json_data.get('dataDefinedByMatch', {}).get(m, []))
        tightness = 1.0 - min(defining_matches / max(num_matches / 10, 1), 0.8)  
        G.add_node(f'definition_{d}', type=1, weight=max(tightness, 0.3))
    
    # Location sharing constraints (sameLoc)
    if same_loc:
        for i in range(0, len(same_loc), 3):
            if i + 2 < len(same_loc):
                match_id = same_loc[i]
                data1 = same_loc[i + 1]
                data2 = same_loc[i + 2]
                # Location sharing constraints are typically tight
                G.add_node(f'same_loc_{match_id}_{data1}_{data2}', type=1, weight=0.8)
    
    # Block placement constraints (inBlock)
    if in_block:
        for i in range(0, len(in_block), 2):
            if i + 1 < len(in_block):
                match_id = in_block[i]
                block_id = in_block[i + 1]
                G.add_node(f'in_block_{match_id}_{block_id}', type=1, weight=0.6)
    
    # Block succession constraints (inBlockSucc)
    if in_block_succ:
        for i in range(0, len(in_block_succ), 3):
            if i + 2 < len(in_block_succ):
                match_id = in_block_succ[i]
                block1 = in_block_succ[i + 1]
                block2 = in_block_succ[i + 2]
                # Control flow constraints are critical
                G.add_node(f'block_succ_{match_id}_{block1}_{block2}', type=1, weight=0.9)
    
    # Location domain constraints 
    if loc_domain:
        for i in range(0, len(loc_domain), 4):
            if i + 3 < len(loc_domain):
                match_id = loc_domain[i]
                data_id = loc_domain[i + 1] 
                low = loc_domain[i + 2]
                high = loc_domain[i + 3]
                domain_size = max(high - low + 1, 1)
                # Smaller domains are tighter constraints  
                tightness = 1.0 - min(domain_size / max(num_locations, 1), 0.8)
                G.add_node(f'loc_domain_{match_id}_{data_id}', type=1, weight=min(max(tightness, 0.4), 1.0))
    
    # === Edges ===
    
    # Connect operations to their coverage constraints
    for op in range(num_ops):
        G.add_edge(f'op_{op}', f'coverage_{op}', weight=1.0)
    
    # Connect data to their definition constraints  
    for d in range(num_data):
        G.add_edge(f'data_{d}', f'definition_{d}', weight=1.0)
    
    # Connect matches to operations they can cover
    ops_covered = json_data.get('operationsCoveredByMatch', {})
    for m in range(num_matches):
        if m in ops_covered:
            for op in ops_covered[m]:
                if op < num_ops:
                    G.add_edge(f'match_{m}', f'coverage_{op}', weight=0.7)
    
    # Connect matches to data they define
    data_defined = json_data.get('dataDefinedByMatch', {})
    for m in range(num_matches):
        if m in data_defined:
            for d in data_defined[m]:
                if d < num_data:
                    G.add_edge(f'match_{m}', f'definition_{d}', weight=0.7)
    
    # Connect constraint nodes to relevant variables
    
    # Location sharing edges
    if same_loc:
        for i in range(0, len(same_loc), 3):
            if i + 2 < len(same_loc):
                match_id = same_loc[i]
                data1 = same_loc[i + 1] 
                data2 = same_loc[i + 2]
                constraint_node = f'same_loc_{match_id}_{data1}_{data2}'
                if match_id < num_matches:
                    G.add_edge(f'match_{match_id}', constraint_node, weight=0.8)
                if data1 < num_data:
                    G.add_edge(f'data_{data1}', constraint_node, weight=0.6)
                if data2 < num_data:
                    G.add_edge(f'data_{data2}', constraint_node, weight=0.6)
    
    # Block placement edges
    if in_block:
        for i in range(0, len(in_block), 2):
            if i + 1 < len(in_block):
                match_id = in_block[i]
                block_id = in_block[i + 1]
                constraint_node = f'in_block_{match_id}_{block_id}'
                if match_id < num_matches:
                    G.add_edge(f'match_{match_id}', constraint_node, weight=0.7)
                if block_id < num_blocks:
                    G.add_edge(f'block_{block_id}', constraint_node, weight=0.5)
    
    # Block succession edges
    if in_block_succ:
        for i in range(0, len(in_block_succ), 3):
            if i + 2 < len(in_block_succ):
                match_id = in_block_succ[i]
                block1 = in_block_succ[i + 1]
                block2 = in_block_succ[i + 2]
                constraint_node = f'block_succ_{match_id}_{block1}_{block2}'
                if match_id < num_matches:
                    G.add_edge(f'match_{match_id}', constraint_node, weight=0.9)
                if block1 < num_blocks:
                    G.add_edge(f'block_{block1}', constraint_node, weight=0.7)
                if block2 < num_blocks:
                    G.add_edge(f'block_{block2}', constraint_node, weight=0.7)
    
    # Location domain edges
    if loc_domain:
        for i in range(0, len(loc_domain), 4):
            if i + 3 < len(loc_domain):
                match_id = loc_domain[i]
                data_id = loc_domain[i + 1]
                low = loc_domain[i + 2]
                high = loc_domain[i + 3]
                constraint_node = f'loc_domain_{match_id}_{data_id}'
                if match_id < num_matches:
                    G.add_edge(f'match_{match_id}', constraint_node, weight=0.6)
                if data_id < num_data:
                    G.add_edge(f'data_{data_id}', constraint_node, weight=0.5)
                # Connect to affected locations
                for loc in range(low, min(high + 1, num_locations)):
                    G.add_edge(f'loc_{loc}', constraint_node, weight=0.4)
    
    # Add some resource contention edges for high-pressure scenarios
    # Matches competing for same locations create conflicts
    location_users = {}
    if loc_domain:
        for i in range(0, len(loc_domain), 4):
            if i + 3 < len(loc_domain):
                match_id = loc_domain[i]
                low = loc_domain[i + 2]
                high = loc_domain[i + 3]
                for loc in range(low, min(high + 1, num_locations)):
                    if loc not in location_users:
                        location_users[loc] = []
                    location_users[loc].append(match_id)
    
    # Add contention edges for locations with many users
    for loc, users in location_users.items():
        if len(users) > num_matches / 10:  # High contention
            for i, m1 in enumerate(users[:5]):  # Limit to avoid too many edges
                for m2 in users[i+1:6]:
                    if m1 < num_matches and m2 < num_matches:
                        contention = min(len(users) / (num_matches / 5), 1.0)
                        G.add_edge(f'match_{m1}', f'match_{m2}', weight=contention * 0.8)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()