#!/usr/bin/env python3
"""
Graph converter for de Bruijn binary sequences problem.
Created using subagent_prompt.md version: v_02

This problem is about finding de Bruijn sequences - cyclic sequences where 
every n-length string over an alphabet appears exactly once as a substring.
Key challenges: exponential sequence length, complex overlapping constraints,
and global cardinality requirements.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the de Bruijn sequence problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model the problem as a bipartite graph with:
    - Position nodes (type 0): Each position in the sequence
    - Constraint nodes (type 1): Different types of constraints
    - Resource nodes (type 2): Alphabet symbols and their usage
    
    The complexity comes from overlapping substring constraints and
    the exponential growth of sequence length with problem size.
    """
    base = json_data.get('base', 2)
    n = json_data.get('n', 3)
    
    # Calculate sequence length: base^n
    m = int(math.pow(base, n))
    
    G = nx.Graph()
    
    # Position nodes (type 0) - decision variables for sequence positions
    # Weight based on position criticality (ends are more constrained due to wraparound)
    for i in range(m):
        # Positions near start/end are more critical due to wraparound constraint
        distance_from_center = abs(i - m//2)
        criticality = 1.0 - (distance_from_center / (m//2)) if m > 1 else 1.0
        # Add extra weight for wraparound positions
        if i < n or i >= m - n:
            criticality = min(criticality + 0.3, 1.0)
        
        G.add_node(f'pos_{i}', type=0, weight=criticality)
    
    # Binary representation nodes (type 0) - the n-bit representations
    for i in range(m):
        for j in range(n):
            # Bit positions - earlier bits are more significant
            significance = (n - j) / n
            G.add_node(f'bit_{i}_{j}', type=0, weight=significance)
    
    # Alphabet symbol nodes (type 2) - resources being allocated
    for symbol in range(base):
        # Weight by expected usage frequency (equal for balanced de Bruijn)
        expected_freq = m / base if base > 0 else 0.5
        scarcity = 1.0 / expected_freq if expected_freq > 0 else 0.5
        G.add_node(f'symbol_{symbol}', type=2, weight=min(scarcity, 1.0))
    
    # All-different constraint node (type 1)
    # This is critical - ensures each number appears exactly once
    tightness = 1.0 - (1.0 / m) if m > 1 else 1.0  # Gets tighter with more positions
    G.add_node('alldiff_constraint', type=1, weight=tightness)
    
    # Connect all positions to all-different constraint
    for i in range(m):
        G.add_edge(f'pos_{i}', 'alldiff_constraint', weight=1.0/m)
    
    # De Bruijn overlap constraints (type 1)
    # Each consecutive pair must have overlapping bits
    for i in range(m):
        next_i = (i + 1) % m  # Wraparound for cyclic sequence
        overlap_weight = (n - 1) / n  # n-1 bits must match
        
        constraint_id = f'overlap_{i}_{next_i}'
        G.add_node(constraint_id, type=1, weight=overlap_weight)
        
        # Connect to relevant bit positions
        for j in range(1, n):  # Last n-1 bits of pos i
            G.add_edge(f'bit_{i}_{j}', constraint_id, weight=1.0)
        for j in range(n-1):   # First n-1 bits of pos next_i  
            G.add_edge(f'bit_{next_i}_{j}', constraint_id, weight=1.0)
    
    # Number conversion constraints (type 1)
    # Connect binary representations to sequence positions
    for i in range(m):
        conversion_weight = math.log(base) / math.log(2)  # Complexity scales with base
        G.add_node(f'tonum_{i}', type=1, weight=min(conversion_weight, 1.0))
        
        # Connect position and all its bits
        G.add_edge(f'pos_{i}', f'tonum_{i}', weight=0.8)
        for j in range(n):
            bit_weight = math.pow(base, n-1-j) / (math.pow(base, n) - 1)  # Weight by bit significance
            G.add_edge(f'bit_{i}_{j}', f'tonum_{i}', weight=bit_weight)
    
    # Global cardinality constraints (type 1)
    if m % base == 0:
        # Perfect balance required - very tight constraint
        gcc_weight = 1.0 - 1.0/(base*2)  # Very high weight for exact balance
        G.add_node('gcc_balance', type=1, weight=gcc_weight)
        
        # Connect to all symbols and first bit of each position (determines symbol)
        for symbol in range(base):
            G.add_edge(f'symbol_{symbol}', 'gcc_balance', weight=1.0)
        for i in range(m):
            G.add_edge(f'bit_{i}_0', 'gcc_balance', weight=1.0/m)
    else:
        # Approximate balance - less tight
        gcc_weight = 0.7
        G.add_node('gcc_approx', type=1, weight=gcc_weight)
        
        for symbol in range(base):
            G.add_edge(f'symbol_{symbol}', 'gcc_approx', weight=0.8)
        for i in range(m):
            G.add_edge(f'bit_{i}_0', 'gcc_approx', weight=0.8/m)
    
    # Symmetry breaking constraint (type 1)
    # Minimum element must be first - helps with search
    sym_weight = 0.6  # Moderately important for search efficiency
    G.add_node('symmetry_break', type=1, weight=sym_weight)
    G.add_edge('pos_0', 'symmetry_break', weight=1.0)
    for i in range(1, m):
        G.add_edge(f'pos_{i}', 'symmetry_break', weight=1.0/m)
    
    # Connect bit representations to alphabet symbols
    # First bit of each position determines the symbol
    for i in range(m):
        for symbol in range(base):
            # Exponential decay based on symbol value for non-uniform weighting
            symbol_weight = math.exp(-0.5 * symbol) if symbol > 0 else 1.0
            G.add_edge(f'bit_{i}_0', f'symbol_{symbol}', weight=symbol_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()