#!/usr/bin/env python3
"""
Graph converter for Nonogram (non_1) problem.
Converter created with subagent_prompt.md v_02

This problem is about solving Nonogram puzzles (Paint by Numbers).
Key challenges: Grid filling based on row/column clues, constraint satisfaction
where sequences of consecutive filled cells must match given patterns.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the Nonogram instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Bipartite graph modeling grid cells and constraints
    - Grid cells (type 0) represent decision variables (filled/empty)
    - Row constraints (type 1) enforce horizontal clue patterns  
    - Column constraints (type 1) enforce vertical clue patterns
    - Weight cells by constraint complexity and centrality
    - Weight constraints by clue complexity and tightness
    """
    # Access data directly from json_data dict
    X = json_data.get('X', 0)  # columns
    Y = json_data.get('Y', 0)  # rows
    maxlen = json_data.get('maxlen', 0)
    rows_data = json_data.get('rows', [])
    cols_data = json_data.get('cols', [])
    
    G = nx.Graph()
    
    # Parse row clues (each row has maxlen elements)
    row_clues = []
    for i in range(Y):
        start_idx = i * maxlen
        clue = [rows_data[start_idx + j] for j in range(maxlen) 
                if start_idx + j < len(rows_data) and rows_data[start_idx + j] >= 0]
        row_clues.append(clue)
    
    # Parse column clues (each col has maxlen elements)
    col_clues = []
    for i in range(X):
        start_idx = i * maxlen
        clue = [cols_data[start_idx + j] for j in range(maxlen) 
                if start_idx + j < len(cols_data) and cols_data[start_idx + j] >= 0]
        col_clues.append(clue)
    
    # Add grid cell nodes (type 0) with centrality and constraint-based weights
    for r in range(Y):
        for c in range(X):
            # Central cells are typically more constrained
            center_r, center_c = Y // 2, X // 2
            distance_from_center = abs(r - center_r) + abs(c - center_c)
            max_distance = center_r + center_c
            centrality = 1.0 - (distance_from_center / max(max_distance, 1))
            
            # Count constraints affecting this cell
            row_complexity = len([x for x in row_clues[r] if x > 0])
            col_complexity = len([x for x in col_clues[c] if x > 0])
            total_complexity = row_complexity + col_complexity
            max_complexity = 2 * maxlen  # theoretical maximum
            
            # Combine centrality and constraint complexity non-linearly
            complexity_weight = math.sqrt(total_complexity / max(max_complexity, 1))
            weight = 0.3 * centrality + 0.7 * complexity_weight
            
            G.add_node(f'cell_{r}_{c}', type=0, weight=min(weight, 1.0))
    
    # Add row constraint nodes (type 1) with clue complexity weights
    for r in range(Y):
        clue = row_clues[r]
        # Count number of filled segments and total filled cells
        num_segments = len([x for x in clue if x > 0])
        total_filled = sum(x for x in clue if x > 0)
        
        # Calculate constraint tightness: how much of the row must be filled
        if X > 0:
            fill_ratio = total_filled / X
            # More segments = more complex constraint
            segment_complexity = math.log(num_segments + 1) / math.log(maxlen + 1)
            weight = 0.5 * fill_ratio + 0.5 * segment_complexity
        else:
            weight = 0.5
            
        G.add_node(f'row_constraint_{r}', type=1, weight=min(weight, 1.0))
    
    # Add column constraint nodes (type 1) with clue complexity weights
    for c in range(X):
        clue = col_clues[c]
        # Count number of filled segments and total filled cells
        num_segments = len([x for x in clue if x > 0])
        total_filled = sum(x for x in clue if x > 0)
        
        # Calculate constraint tightness
        if Y > 0:
            fill_ratio = total_filled / Y
            segment_complexity = math.log(num_segments + 1) / math.log(maxlen + 1)
            weight = 0.5 * fill_ratio + 0.5 * segment_complexity
        else:
            weight = 0.5
            
        G.add_node(f'col_constraint_{c}', type=1, weight=min(weight, 1.0))
    
    # Add bipartite edges: cells to their row/column constraints
    for r in range(Y):
        row_clue = row_clues[r]
        num_segments = len([x for x in row_clue if x > 0])
        
        for c in range(X):
            col_clue = col_clues[c]
            col_segments = len([x for x in col_clue if x > 0])
            
            # Edge weight based on constraint complexity
            # More complex constraints have stronger influence
            row_weight = math.sqrt(num_segments / max(maxlen, 1))
            col_weight = math.sqrt(col_segments / max(maxlen, 1))
            
            G.add_edge(f'cell_{r}_{c}', f'row_constraint_{r}', 
                      weight=min(row_weight, 1.0))
            G.add_edge(f'cell_{r}_{c}', f'col_constraint_{c}', 
                      weight=min(col_weight, 1.0))
    
    # Add conflict edges between cells in same row/column with complex clues
    # This helps capture the interdependence of decisions within constraints
    for r in range(Y):
        clue = row_clues[r]
        num_segments = len([x for x in clue if x > 0])
        if num_segments >= 2:  # Multiple segments create interdependence
            for c1 in range(X):
                for c2 in range(c1 + 1, min(c1 + 4, X)):  # Nearby cells
                    distance = c2 - c1
                    # Exponential decay with distance
                    conflict_weight = (num_segments / maxlen) * math.exp(-0.5 * distance)
                    if conflict_weight > 0.1:
                        G.add_edge(f'cell_{r}_{c1}', f'cell_{r}_{c2}', 
                                  weight=min(conflict_weight, 1.0))
    
    for c in range(X):
        clue = col_clues[c]
        num_segments = len([x for x in clue if x > 0])
        if num_segments >= 2:  # Multiple segments create interdependence
            for r1 in range(Y):
                for r2 in range(r1 + 1, min(r1 + 4, Y)):  # Nearby cells
                    distance = r2 - r1
                    # Exponential decay with distance
                    conflict_weight = (num_segments / maxlen) * math.exp(-0.5 * distance)
                    if conflict_weight > 0.1:
                        G.add_edge(f'cell_{r1}_{c}', f'cell_{r2}_{c}', 
                                  weight=min(conflict_weight, 1.0))
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()