#!/usr/bin/env python3
"""
Graph converter for evmopt-generic problem.
Created using subagent_prompt.md version: v_02

This problem is about EVM (Ethereum Virtual Machine) super-compilation optimization.
Key challenges: Minimizing the number of operations while respecting stack constraints,
operation bounds, and gas/size costs. Different operations have different gas costs,
sizes, and timing bounds, creating complex optimization trade-offs.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the EVM optimization problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model the problem as a bipartite graph where:
    - Variable nodes represent operations that can be performed (ZEROARY, UNARY, BINARY, PUSH, STOR)
    - Constraint nodes represent different types of constraints (bounds, gas limits, ordering)
    - Edge weights reflect operation costs and constraint tightness
    
    What makes instances hard:
    - Large numbers of available operations with complex interactions
    - Tight timing bounds (lb/ub constraints)
    - High gas costs relative to budget
    - Complex precedence relationships
    """
    
    G = nx.Graph()
    
    # Extract problem parameters
    s = json_data.get('s', 1)  # max steps
    n = json_data.get('n', 1)  # max stack size
    
    # Extract operation arrays
    zeroops = json_data.get('ZEROARYOP', [])
    unaryops = json_data.get('UNARYOP', [])
    binaryops = json_data.get('BINARYOP', []) 
    pushops = json_data.get('PUSHOP', [])
    storops = json_data.get('STOROP', [])
    
    # Extract cost/bound arrays
    zerogas = json_data.get('zerogas', [])
    zerosz = json_data.get('zerosz', [])
    zerolb = json_data.get('zerolb', [])
    zeroub = json_data.get('zeroub', [])
    
    ungas = json_data.get('ungas', [])
    unsz = json_data.get('unsz', [])
    unlb = json_data.get('unlb', [])
    unub = json_data.get('unub', [])
    
    bingas = json_data.get('bingas', [])
    binsz = json_data.get('binsz', [])
    binlb = json_data.get('binlb', [])
    binub = json_data.get('binub', [])
    
    pushgas = json_data.get('pushgas', [])
    pushsz = json_data.get('pushsz', [])
    pushlb = json_data.get('pushlb', [])
    pushub = json_data.get('pushub', [])
    
    storgas = [3] * len(storops)  # Store ops have fixed gas cost of 3
    storlb = json_data.get('storlb', [])
    storub = json_data.get('storub', [])
    
    # Calculate max gas for normalization
    all_gas = zerogas + ungas + bingas + pushgas + storgas
    max_gas = max(all_gas) if all_gas else 1
    
    # Calculate max size for normalization
    all_sizes = zerosz + unsz + binsz + pushsz + [1] * len(storops)
    max_size = max(all_sizes) if all_sizes else 1
    
    # Variable nodes: Operations that can be performed
    node_id = 0
    
    # Zero-ary operations
    for i, op in enumerate(zeroops):
        if i < len(zerogas) and i < len(zerosz):
            # Weight by inverse of gas cost (higher gas = lower weight = less desirable)
            gas_cost = zerogas[i] if zerogas[i] > 0 else 1
            size_cost = zerosz[i] if zerosz[i] > 0 else 1
            # Use exponential decay for gas costs - expensive operations get much lower weights
            weight = math.exp(-3.0 * gas_cost / max_gas) * math.exp(-size_cost / max_size)
            G.add_node(f'zero_{op}', type=0, weight=weight)
            node_id += 1
    
    # Unary operations
    for i, op in enumerate(unaryops):
        if i < len(ungas) and i < len(unsz):
            gas_cost = ungas[i] if ungas[i] > 0 else 1
            size_cost = unsz[i] if unsz[i] > 0 else 1
            weight = math.exp(-3.0 * gas_cost / max_gas) * math.exp(-size_cost / max_size)
            G.add_node(f'unary_{op}', type=0, weight=weight)
            node_id += 1
    
    # Binary operations
    for i, op in enumerate(binaryops):
        if i < len(bingas) and i < len(binsz):
            gas_cost = bingas[i] if bingas[i] > 0 else 1
            size_cost = binsz[i] if binsz[i] > 0 else 1
            weight = math.exp(-3.0 * gas_cost / max_gas) * math.exp(-size_cost / max_size)
            G.add_node(f'binary_{op}', type=0, weight=weight)
            node_id += 1
    
    # Push operations
    for i, op in enumerate(pushops):
        if i < len(pushgas) and i < len(pushsz):
            gas_cost = pushgas[i] if pushgas[i] > 0 else 1
            size_cost = pushsz[i] if pushsz[i] > 0 else 1
            # Large push operations are generally more expensive
            weight = math.exp(-3.0 * gas_cost / max_gas) * math.exp(-2.0 * size_cost / max_size)
            G.add_node(f'push_{op}', type=0, weight=weight)
            node_id += 1
    
    # Store operations  
    for i, op in enumerate(storops):
        # Store operations are expensive and remove 2 stack elements
        weight = math.exp(-3.0 * 3 / max_gas) * 0.5  # Fixed gas cost of 3, penalty for stack reduction
        G.add_node(f'stor_{op}', type=0, weight=weight)
        node_id += 1
    
    # Constraint nodes: Different types of constraints
    
    # Timing bound constraints for zero-ary operations
    for i, op in enumerate(zeroops):
        if i < len(zerolb) and i < len(zeroub):
            lb, ub = zerolb[i], zeroub[i]
            if lb < ub:
                # Tightness: smaller window = higher weight = more constraining
                window_size = max(ub - lb, 1)
                tightness = 1.0 - math.log(window_size + 1) / math.log(s + 1)
                G.add_node(f'zero_bound_{op}', type=1, weight=tightness)
                # Connect operation to its bound constraint
                G.add_edge(f'zero_{op}', f'zero_bound_{op}', weight=tightness)
    
    # Timing bound constraints for unary operations
    for i, op in enumerate(unaryops):
        if i < len(unlb) and i < len(unub):
            lb, ub = unlb[i], unub[i]
            if lb < ub:
                window_size = max(ub - lb, 1)
                tightness = 1.0 - math.log(window_size + 1) / math.log(s + 1)
                G.add_node(f'unary_bound_{op}', type=1, weight=tightness)
                G.add_edge(f'unary_{op}', f'unary_bound_{op}', weight=tightness)
    
    # Timing bound constraints for binary operations
    for i, op in enumerate(binaryops):
        if i < len(binlb) and i < len(binub):
            lb, ub = binlb[i], binub[i]
            if lb < ub:
                window_size = max(ub - lb, 1)
                tightness = 1.0 - math.log(window_size + 1) / math.log(s + 1)
                G.add_node(f'binary_bound_{op}', type=1, weight=tightness)
                G.add_edge(f'binary_{op}', f'binary_bound_{op}', weight=tightness)
    
    # Timing bound constraints for push operations  
    for i, op in enumerate(pushops):
        if i < len(pushlb) and i < len(pushub):
            lb, ub = pushlb[i], pushub[i]
            if lb < ub:
                window_size = max(ub - lb, 1)
                tightness = 1.0 - math.log(window_size + 1) / math.log(s + 1)
                G.add_node(f'push_bound_{op}', type=1, weight=tightness)
                G.add_edge(f'push_{op}', f'push_bound_{op}', weight=tightness)
    
    # Timing bound constraints for store operations
    for i, op in enumerate(storops):
        if i < len(storlb) and i < len(storub):
            lb, ub = storlb[i], storub[i]
            if lb < ub:
                window_size = max(ub - lb, 1)
                tightness = 1.0 - math.log(window_size + 1) / math.log(s + 1)
                G.add_node(f'stor_bound_{op}', type=1, weight=tightness)
                G.add_edge(f'stor_{op}', f'stor_bound_{op}', weight=tightness)
    
    # Global resource constraints
    
    # Stack size constraint - all operations must respect stack limits
    stack_pressure = (len(zeroops) + len(unaryops) + len(pushops)) / max(n, 1)
    stack_constraint_weight = min(1.0, stack_pressure / 2.0)  # More operations relative to stack = tighter
    G.add_node('stack_limit', type=1, weight=stack_constraint_weight)
    
    # Connect stack-affecting operations to stack constraint
    # Push operations and zero-ary operations increase stack
    for i, op in enumerate(zeroops):
        G.add_edge(f'zero_{op}', 'stack_limit', weight=0.7)
    for i, op in enumerate(pushops):
        G.add_edge(f'push_{op}', 'stack_limit', weight=0.8)
    # Store operations decrease stack significantly
    for i, op in enumerate(storops):
        G.add_edge(f'stor_{op}', 'stack_limit', weight=0.6)
    
    # Step budget constraint - all operations consume steps
    total_min_ops = len([op for op in zeroops + unaryops + binaryops + pushops + storops])
    step_pressure = total_min_ops / max(s, 1)
    step_constraint_weight = min(1.0, step_pressure)
    G.add_node('step_budget', type=1, weight=step_constraint_weight)
    
    # Connect all operations to step budget
    for i, op in enumerate(zeroops):
        G.add_edge(f'zero_{op}', 'step_budget', weight=0.5)
    for i, op in enumerate(unaryops):
        G.add_edge(f'unary_{op}', 'step_budget', weight=0.5)
    for i, op in enumerate(binaryops):
        G.add_edge(f'binary_{op}', 'step_budget', weight=0.5)
    for i, op in enumerate(pushops):
        G.add_edge(f'push_{op}', 'step_budget', weight=0.5)
    for i, op in enumerate(storops):
        G.add_edge(f'stor_{op}', 'step_budget', weight=0.5)
    
    # Gas budget constraint - high gas operations create conflicts
    if max_gas > 10:  # Only create if there are expensive operations
        G.add_node('gas_budget', type=1, weight=0.8)
        # Connect expensive operations (above average gas cost)
        avg_gas = sum(all_gas) / len(all_gas) if all_gas else 1
        
        for i, op in enumerate(zeroops):
            if i < len(zerogas) and zerogas[i] > avg_gas:
                gas_intensity = zerogas[i] / max_gas
                G.add_edge(f'zero_{op}', 'gas_budget', weight=gas_intensity)
        
        for i, op in enumerate(unaryops):
            if i < len(ungas) and ungas[i] > avg_gas:
                gas_intensity = ungas[i] / max_gas
                G.add_edge(f'unary_{op}', 'gas_budget', weight=gas_intensity)
        
        for i, op in enumerate(binaryops):
            if i < len(bingas) and bingas[i] > avg_gas:
                gas_intensity = bingas[i] / max_gas
                G.add_edge(f'binary_{op}', 'gas_budget', weight=gas_intensity)
        
        for i, op in enumerate(pushops):
            if i < len(pushgas) and pushgas[i] > avg_gas:
                gas_intensity = pushgas[i] / max_gas
                G.add_edge(f'push_{op}', 'gas_budget', weight=gas_intensity)
    
    # Add precedence constraints from before/after arrays
    before_ops = json_data.get('before', [])
    after_ops = json_data.get('after', [])
    
    for i in range(min(len(before_ops), len(after_ops))):
        before_op = before_ops[i]
        after_op = after_ops[i]
        # Create precedence constraint
        G.add_node(f'precedence_{i}', type=1, weight=1.0)
        # This is a hard constraint, so high weight
        # Connect the operations that must be ordered
        G.add_edge(f'precedence_{i}', f'precedence_ops_{before_op}_{after_op}', weight=1.0)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()