#!/usr/bin/env python3
"""
Graph converter for Unison problem.
Created using subagent_prompt.md version: v_02

This problem is about compiler optimization, specifically instruction selection, 
register allocation, and instruction scheduling. The Unison compiler generates 
constraint models for optimizing low-level code generation.

Key challenges: Register conflicts, resource constraints, instruction dependencies,
scheduling conflicts, and complex resource utilization patterns.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the Unison compiler optimization problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model the complex interactions in compiler optimization using available data
    - Operations as variable nodes - instruction selection decisions
    - Operands as variable nodes - register allocation decisions  
    - Basic blocks as resource nodes - code organization units
    - Resource constraints, adjacency constraints, and dependencies as constraint nodes
    - Use adjacency information, preassignments, and resource data for edges
    """
    # Extract key parameters
    max_o = json_data.get('MAXO', 0)  # operations
    max_p = json_data.get('MAXP', 0)  # operands
    max_t = json_data.get('MAXT', 0)  # temporaries
    max_c = json_data.get('MAXC', 60)  # max cycles
    max_r = json_data.get('MAXR', 1000)  # max registers
    
    # Basic block information
    bb_ops = json_data.get('bb_ops', [])
    bb_operands = json_data.get('bb_operands', [])
    bb_temps = json_data.get('bb_temps', [])
    bb_frequency = json_data.get('bb_frequency', [])
    bb_maxcycle = json_data.get('bb_maxcycle', [])
    
    # Adjacency information (key structural data)
    adj_from = json_data.get('adj_from', [])
    adj_to = json_data.get('adj_to', [])
    
    # Resource information
    res_cap = json_data.get('res_cap', [])
    res_con = json_data.get('res_con', [])
    
    # Preassignment constraints
    preassign_operand = json_data.get('preassign_operand', [])
    preassign_reg = json_data.get('preassign_reg', [])
    
    # Register differences constraints  
    diffreg = json_data.get('diffreg', [])
    
    G = nx.Graph()
    
    # Add operation nodes (type 0 - variable-like: instruction selection decisions)
    max_freq = max(bb_frequency) if bb_frequency else 1
    for o in range(max_o + 1):
        # Weight by operation frequency (which BB it belongs to)
        freq_weight = 0.3
        for bb_idx in range(len(bb_ops) - 1):
            bb_start = bb_ops[bb_idx] if bb_idx < len(bb_ops) else 0
            bb_end = bb_ops[bb_idx + 1] if bb_idx + 1 < len(bb_ops) else max_o + 1
            if bb_start <= o < bb_end and bb_idx < len(bb_frequency):
                freq_weight = bb_frequency[bb_idx] / max_freq
                break
        
        # Operations involved in adjacencies are more critical
        adj_factor = 1.0
        if o in adj_from or o in adj_to:
            adj_factor = 1.5
            
        # Combine frequency and adjacency using non-linear weighting
        weight = 0.2 + 0.8 * math.sqrt(freq_weight * min(adj_factor, 1.0))
        G.add_node(f'op_{o}', type=0, weight=weight)
    
    # Add operand nodes (type 0 - variable-like: register allocation decisions)
    for p in range(max_p + 1):
        # Weight by preassignment constraints (preassigned = more critical)
        preassign_factor = 0.5
        if p in preassign_operand:
            preassign_factor = 1.0
            
        # Weight by adjacency involvement
        adj_factor = 0.5
        if p in adj_from or p in adj_to:
            adj_factor = 1.0
            
        weight = 0.2 + 0.8 * math.sqrt(preassign_factor * adj_factor)
        G.add_node(f'operand_{p}', type=0, weight=weight)
    
    # Add temporary nodes (type 0 - variable-like: lifetime and register decisions)
    for t in range(max_t + 1):
        # Weight by involvement in different constraint types
        constraint_involvement = 0.3
        
        # Check if temporary is in basic block ranges (rough approximation)
        for bb_idx in range(len(bb_temps) - 1):
            bb_start = bb_temps[bb_idx] if bb_idx < len(bb_temps) else 0
            bb_end = bb_temps[bb_idx + 1] if bb_idx + 1 < len(bb_temps) else max_t + 1
            if bb_start <= t < bb_end:
                constraint_involvement = 0.7
                break
                
        weight = 0.3 + 0.7 * constraint_involvement
        G.add_node(f'temp_{t}', type=0, weight=weight)
    
    # Add basic block nodes (type 2 - resource-like: code organization units)
    num_bbs = len(bb_frequency)
    for bb in range(num_bbs):
        if bb < len(bb_frequency):
            # Weight by frequency and cycle budget pressure
            freq_importance = bb_frequency[bb] / max_freq
            max_cycles = bb_maxcycle[bb] if bb < len(bb_maxcycle) else max_c
            cycle_pressure = max_cycles / max_c if max_c > 0 else 0.5
            
            # High frequency blocks with tight cycle budgets are critical
            weight = 0.3 + 0.7 * math.exp(-1.5 * (1.0 - freq_importance * cycle_pressure))
            G.add_node(f'bb_{bb}', type=2, weight=weight)
    
    # Add resource constraint nodes (type 1 - constraint-like)
    for res in range(len(res_cap)):
        capacity = res_cap[res]
        # Estimate tightness based on resource consumption patterns
        if res_con and len(res_con) > res * 21:  # res_con appears to be flattened
            # Sample some resource consumption values
            start_idx = res * 21
            end_idx = min(start_idx + 21, len(res_con))
            consumptions = res_con[start_idx:end_idx]
            avg_consumption = sum(consumptions) / len(consumptions) if consumptions else 0
            tightness = avg_consumption / max(capacity, 1)
        else:
            tightness = 0.5
            
        weight = 0.4 + 0.6 * min(tightness, 1.0)
        G.add_node(f'resource_{res}', type=1, weight=weight)
    
    # Add adjacency constraint nodes (type 1) - critical for register allocation
    for i in range(min(10, len(adj_from))):  # Limit to avoid too many constraint nodes
        # Weight by operand indices involved (lower indices often more critical)
        from_op = adj_from[i] if i < len(adj_from) else 0
        to_op = adj_to[i] if i < len(adj_to) else 0
        criticality = 1.0 - min((from_op + to_op) / (2 * max_p), 1.0) if max_p > 0 else 0.5
        weight = 0.5 + 0.5 * criticality
        G.add_node(f'adj_constraint_{i}', type=1, weight=weight)
    
    # Add preassignment constraint nodes (type 1)
    for i in range(min(len(preassign_operand), 5)):  # Limit constraint nodes
        # Preassignment constraints are always critical
        weight = 0.8 + 0.2 * (i / max(len(preassign_operand), 1))
        G.add_node(f'preassign_constraint_{i}', type=1, weight=weight)
    
    # Add edges for basic block containment (operations to basic blocks)
    for bb in range(len(bb_ops) - 1):
        bb_start = bb_ops[bb]
        bb_end = bb_ops[bb + 1] if bb + 1 < len(bb_ops) else max_o + 1
        
        # Connect operations to their basic blocks
        for o in range(bb_start, min(bb_end, max_o + 1)):
            if f'bb_{bb}' in G and f'op_{o}' in G:
                # Weight by operation position in block (boundaries more critical)
                op_pos = (o - bb_start) / max(bb_end - bb_start, 1)
                boundary_factor = 1.0 - abs(op_pos - 0.5) * 2
                weight = 0.5 + 0.5 * boundary_factor
                G.add_edge(f'op_{o}', f'bb_{bb}', weight=weight)
    
    # Add edges for adjacency relationships (bipartite: operands to adjacency constraints)
    for i in range(min(10, len(adj_from))):
        constraint_node = f'adj_constraint_{i}'
        if constraint_node in G:
            from_op = adj_from[i] if i < len(adj_from) else 0
            to_op = adj_to[i] if i < len(adj_to) else 0
            
            if from_op <= max_p and f'operand_{from_op}' in G:
                weight = 0.7 + 0.3 * (1.0 - i / max(len(adj_from), 1))
                G.add_edge(f'operand_{from_op}', constraint_node, weight=weight)
                
            if to_op <= max_p and f'operand_{to_op}' in G:
                weight = 0.7 + 0.3 * (1.0 - i / max(len(adj_to), 1))
                G.add_edge(f'operand_{to_op}', constraint_node, weight=weight)
    
    # Add edges for preassignment constraints (bipartite: operands to preassign constraints)
    for i in range(min(len(preassign_operand), 5)):
        constraint_node = f'preassign_constraint_{i}'
        operand = preassign_operand[i]
        
        if constraint_node in G and operand <= max_p and f'operand_{operand}' in G:
            # Preassignments are strong constraints
            weight = 0.8 + 0.2 * (1.0 - i / max(len(preassign_operand), 1))
            G.add_edge(f'operand_{operand}', constraint_node, weight=weight)
    
    # Add edges for resource consumption (operations to resource constraints)
    if res_con and len(res_cap) > 0:
        ops_per_res = 21  # Based on structure observation
        for res in range(len(res_cap)):
            resource_node = f'resource_{res}'
            if resource_node in G:
                start_idx = res * ops_per_res
                for op_offset in range(min(ops_per_res, max_o + 1)):
                    con_idx = start_idx + op_offset
                    if con_idx < len(res_con) and res_con[con_idx] > 0:
                        o = op_offset
                        if f'op_{o}' in G:
                            capacity = res_cap[res]
                            utilization = res_con[con_idx] / max(capacity, 1)
                            weight = min(0.3 + 0.7 * utilization, 1.0)
                            G.add_edge(f'op_{o}', resource_node, weight=weight)
    
    # Add conflict edges between operands (from adjacency and difference constraints)
    # Limit conflicts to avoid too many edges
    for i in range(min(len(adj_from), 50)):
        from_op = adj_from[i]
        to_op = adj_to[i]
        
        if (from_op <= max_p and to_op <= max_p and 
            f'operand_{from_op}' in G and f'operand_{to_op}' in G):
            # Adjacency implies potential register allocation conflict
            weight = 0.4 + 0.6 * (1.0 - i / max(len(adj_from), 1))
            G.add_edge(f'operand_{from_op}', f'operand_{to_op}', weight=weight)
    
    # Add temporary conflicts within basic blocks (limited to avoid excessive edges)
    for bb in range(len(bb_temps) - 1):
        bb_start_temp = bb_temps[bb]
        bb_end_temp = bb_temps[bb + 1] if bb + 1 < len(bb_temps) else max_t + 1
        
        temps_in_bb = list(range(bb_start_temp, min(bb_end_temp, max_t + 1)))
        # Limit conflicts to avoid excessive edges
        for i, t1 in enumerate(temps_in_bb[:20]):  # Limit temps considered
            for t2 in temps_in_bb[i+1:i+4]:  # Limit conflicts per temp
                if f'temp_{t1}' in G and f'temp_{t2}' in G:
                    # Temporary conflict weight based on position
                    pos_factor = 1.0 - (i / max(len(temps_in_bb), 1))
                    weight = 0.3 + 0.7 * pos_factor
                    G.add_edge(f'temp_{t1}', f'temp_{t2}', weight=weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()