#!/usr/bin/env python3
"""
Graph converter for Linear-to-Program problem.
# Converter created with subagent_prompt.md v_02

This problem is about finding shortest programs to compute linear combinations
using only binary plus and unary minus operations. The program must work 
correctly on a set of examples. 

Key challenges: 
- Program synthesis with limited operations (only + and -)
- Must satisfy multiple examples simultaneously  
- Optimization for program length while maintaining correctness
- Complex dependency structure between program lines
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the linear-to-program instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model the program synthesis problem as a bipartite graph
    - Variable nodes (type 0): Program line positions and computation results
    - Constraint nodes (type 1): Dependencies, ordering, and example consistency  
    - Resource nodes (type 2): Available operations (plus/minus)
    
    The difficulty comes from:
    1. Dependency ordering between program lines
    2. Ensuring consistency across multiple examples
    3. Limited operation types (only + and -)
    4. Minimizing program length
    """
    
    # Extract problem parameters
    M = json_data.get('M', 1)       # number of input parameters
    N = json_data.get('N', 10)      # maximum number of program lines  
    Np = json_data.get('Np', 5)     # number of plus operations
    Smax = json_data.get('Smax', 1) # number of examples
    RP = json_data.get('RP', [])    # example parameter values
    
    # Calculate derived values
    Nm = N - Np  # number of minus operations
    total_vars = M + N  # total program variables (inputs + lines)
    
    G = nx.Graph()
    
    # === VARIABLE NODES (Type 0) ===
    
    # Input parameter nodes - weighted by usage complexity across examples
    for p in range(M):
        # Calculate complexity based on parameter value variance across examples
        if Smax > 1 and len(RP) >= Smax * M:
            param_values = [RP[s * M + p] for s in range(min(Smax, len(RP) // M))]
            variance = sum((v - sum(param_values)/len(param_values))**2 for v in param_values) / len(param_values)
            weight = min(math.sqrt(variance) / 100.0, 1.0)  # Normalize variance
        else:
            # Single example or limited data - use absolute value
            if p < len(RP):
                weight = min(abs(RP[p]) / 50.0, 1.0)
            else:
                weight = 0.3
        
        G.add_node(f'param_{p}', type=0, weight=weight)
    
    # Program line nodes - weighted by position criticality
    for r in range(N):
        # Earlier lines are more critical (affect more subsequent lines)
        position_weight = 1.0 - (r / N) * 0.5  # Decreases from 1.0 to 0.5
        
        # Plus operations are more constrained (need two inputs)
        if r < Np:
            operation_weight = 0.8  # Plus operations
        else:
            operation_weight = 0.6  # Minus operations
            
        weight = position_weight * operation_weight
        G.add_node(f'line_{r}', type=0, weight=weight)
    
    # === CONSTRAINT NODES (Type 1) ===
    
    # Ordering constraints - ensure program lines respect dependencies
    for r in range(N):
        # Weight by scope - lines with more dependencies are tighter
        if r < Np:
            scope = 2  # Plus operations need 2 inputs
        else:
            scope = 1  # Minus operations need 1 input
        
        tightness = scope / 2.0  # Normalize to [0,1]
        G.add_node(f'order_constraint_{r}', type=1, weight=tightness)
        
        # Connect to the program line
        G.add_edge(f'line_{r}', f'order_constraint_{r}', weight=0.9)
        
        # Connect to input dependencies (approximated)
        for dep in range(min(scope, M + r)):
            if dep < M:
                G.add_edge(f'param_{dep}', f'order_constraint_{r}', weight=0.7)
            else:
                G.add_edge(f'line_{dep - M}', f'order_constraint_{r}', weight=0.8)
    
    # Symmetry breaking constraints for equivalent operations
    if Np > 1:
        # Plus operations symmetry
        plus_symmetry_weight = min(Np / 10.0, 1.0)  # More plus ops = tighter constraint
        G.add_node('plus_symmetry', type=1, weight=plus_symmetry_weight)
        for r in range(Np - 1):
            G.add_edge(f'line_{r}', 'plus_symmetry', weight=0.6)
    
    if Nm > 1:
        # Minus operations symmetry  
        minus_symmetry_weight = min(Nm / 10.0, 1.0)
        G.add_node('minus_symmetry', type=1, weight=minus_symmetry_weight)
        for r in range(Np, N - 1):
            G.add_edge(f'line_{r}', 'minus_symmetry', weight=0.6)
    
    # Example consistency constraints - one per example
    for s in range(Smax):
        # Weight by example complexity (magnitude of parameter values)
        if s * M < len(RP):
            example_complexity = 0
            for p in range(M):
                if s * M + p < len(RP):
                    example_complexity += abs(RP[s * M + p])
            weight = min(example_complexity / (M * 50.0), 1.0)
        else:
            weight = 0.5
            
        G.add_node(f'example_{s}', type=1, weight=weight)
        
        # Connect to all program lines (they must be consistent with this example)
        for r in range(N):
            consistency_weight = 0.7 + 0.2 * (r / N)  # Later lines more critical
            G.add_edge(f'line_{r}', f'example_{s}', weight=consistency_weight)
        
        # Connect to input parameters
        for p in range(M):
            G.add_edge(f'param_{p}', f'example_{s}', weight=0.8)
    
    # === RESOURCE NODES (Type 2) ===
    
    # Plus operation resource - limited by Np
    if Np > 0:
        utilization = Np / N  # How much of program is plus operations
        G.add_node('plus_operations', type=2, weight=utilization)
        
        # Connect to plus operation lines
        for r in range(Np):
            G.add_edge(f'line_{r}', 'plus_operations', weight=1.0)
    
    # Minus operation resource
    if Nm > 0:
        utilization = Nm / N  # How much of program is minus operations  
        G.add_node('minus_operations', type=2, weight=utilization)
        
        # Connect to minus operation lines
        for r in range(Np, N):
            G.add_edge(f'line_{r}', 'minus_operations', weight=1.0)
    
    # Program length resource - represents the objective to minimize
    length_pressure = min(N / 20.0, 1.0)  # Longer max programs = more pressure
    G.add_node('program_length', type=2, weight=length_pressure)
    
    # Connect all program lines to length resource
    for r in range(N):
        # Weight by position - later positions contribute more to length
        position_cost = (r + 1) / N
        G.add_edge(f'line_{r}', 'program_length', weight=position_cost)
    
    # === ADDITIONAL COMPLEXITY EDGES ===
    
    # Add conflict edges between operations competing for early positions
    if N > 2:
        for r1 in range(min(3, N)):
            for r2 in range(r1 + 1, min(3, N)):
                # Early positions are valuable - create competition
                conflict_weight = math.exp(-2.0 * abs(r1 - r2))  # Exponential decay
                G.add_edge(f'line_{r1}', f'line_{r2}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()