#!/usr/bin/env python3
"""
Graph converter for Golomb Ruler problem.
Created using subagent_prompt.md version: v_02

This problem is about finding a set of m marks on a ruler such that no two pairs 
of marks are the same distance apart. The objective is to minimize the length 
of the ruler (position of the last mark).

Key challenges: 
- All pairwise differences must be distinct (alldifferent constraint)
- Combinatorial explosion as m increases (O(m^2) differences to track)
- Complex interaction between mark positions and difference constraints
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the Golomb Ruler problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph modeling the complex constraint interactions
    - Mark variables (type 0): The m positions on the ruler
    - Difference variables (type 0): The m(m-1)/2 pairwise differences  
    - Ordering constraints (type 1): Ensure marks are in ascending order
    - Alldifferent constraint (type 1): Ensure all differences are distinct
    - Difference definition constraints (type 1): Link marks to differences
    
    The alldifferent constraint on differences is the main complexity source,
    creating high connectivity and making instances harder as m increases.
    """
    m = json_data.get('m', 3)
    
    # Calculate derived values
    n_marks = m
    n_differences = (m * (m - 1)) // 2
    max_ruler_length = m * m  # As defined in the model
    
    G = nx.Graph()
    
    # Mark variable nodes (type 0) - decision variables for ruler positions
    # Weight by position importance: first and last marks are critical
    for i in range(m):
        if i == 0:
            # First mark fixed at 0, but still influences differences
            importance = 0.8
        elif i == m - 1:
            # Last mark determines objective value
            importance = 1.0
        else:
            # Middle marks have varying criticality based on position
            centrality = abs(i - m/2) / (m/2)  # Distance from center
            importance = 0.4 + 0.4 * centrality
        
        G.add_node(f'mark_{i}', type=0, weight=importance)
    
    # Difference variable nodes (type 0) - derived variables for pairwise differences
    # Weight by impact on constraint satisfaction
    diff_idx = 0
    for i in range(m):
        for j in range(i + 1, m):
            # Distance between marks in the sequence affects constraint tightness
            mark_distance = j - i
            # Smaller gaps are harder to satisfy distinctly
            difficulty = math.exp(-0.5 * mark_distance)
            G.add_node(f'diff_{i}_{j}', type=0, weight=difficulty)
            diff_idx += 1
    
    # Ordering constraint nodes (type 1) - ensure mark[i] < mark[i+1]
    for i in range(m - 1):
        # Later ordering constraints are more constrained by earlier decisions
        constraint_tightness = (i + 1) / m
        G.add_node(f'order_{i}', type=1, weight=constraint_tightness)
    
    # Alldifferent constraint node (type 1) - the main complexity source
    # Weight reflects the massive constraint scope and interaction complexity
    alldiff_complexity = min(1.0, math.log(n_differences) / math.log(100))
    G.add_node('alldifferent', type=1, weight=alldiff_complexity)
    
    # Difference definition constraint nodes (type 1) - diff[i,j] = mark[j] - mark[i]
    for i in range(m):
        for j in range(i + 1, m):
            # These constraints link marks to differences
            G.add_node(f'def_{i}_{j}', type=1, weight=0.6)
    
    # Symmetry breaking constraint (type 1)
    G.add_node('symmetry_break', type=1, weight=0.4)
    
    # Edges: Variable-Constraint participation (bipartite structure)
    
    # Mark variables participate in ordering constraints
    for i in range(m - 1):
        # Each ordering constraint involves two adjacent marks
        G.add_edge(f'mark_{i}', f'order_{i}', weight=0.8)
        G.add_edge(f'mark_{i+1}', f'order_{i}', weight=0.8)
    
    # Mark variables participate in difference definition constraints
    for i in range(m):
        for j in range(i + 1, m):
            # Each difference is defined by two marks
            participation_strength = 1.0 / math.sqrt(j - i)  # Closer marks have stronger coupling
            G.add_edge(f'mark_{i}', f'def_{i}_{j}', weight=min(participation_strength, 1.0))
            G.add_edge(f'mark_{j}', f'def_{i}_{j}', weight=min(participation_strength, 1.0))
    
    # Difference variables participate in difference definition constraints
    for i in range(m):
        for j in range(i + 1, m):
            G.add_edge(f'diff_{i}_{j}', f'def_{i}_{j}', weight=1.0)
    
    # All difference variables participate in the alldifferent constraint
    for i in range(m):
        for j in range(i + 1, m):
            # Weight by how critical each difference is to the alldifferent constraint
            # Shorter gaps between marks are harder to make distinct
            gap_difficulty = 1.0 / (j - i)
            G.add_edge(f'diff_{i}_{j}', 'alldifferent', weight=min(gap_difficulty, 1.0))
    
    # Symmetry breaking constraint involves first and last differences
    if m > 2:
        G.add_edge(f'diff_0_1', 'symmetry_break', weight=0.8)
        G.add_edge(f'diff_{m-2}_{m-1}', 'symmetry_break', weight=0.8)
    
    # Add some conflict edges between differences that are likely to interfere
    # (differences involving nearby marks are more likely to be similar)
    for i1 in range(m):
        for j1 in range(i1 + 1, m):
            for i2 in range(m):
                for j2 in range(i2 + 1, m):
                    if i1 != i2 or j1 != j2:  # Different differences
                        # Check if they involve overlapping or nearby marks
                        marks1 = {i1, j1}
                        marks2 = {i2, j2}
                        
                        if marks1 & marks2:  # Share a mark
                            shared_weight = 0.7
                            G.add_edge(f'diff_{i1}_{j1}', f'diff_{i2}_{j2}', weight=shared_weight)
                        elif abs(j1 - i1) == abs(j2 - i2):  # Same gap size - high conflict potential
                            gap_conflict = 0.5
                            G.add_edge(f'diff_{i1}_{j1}', f'diff_{i2}_{j2}', weight=gap_conflict)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()