#!/usr/bin/env python3
"""
Graph converter for Sudoku Fixed problem.
Created using subagent_prompt.md version: v_01

This problem is about solving standard Sudoku puzzles with fixed given cells.
Key challenges: 
- Number of givens (fewer givens = harder)
- Distribution of givens across regions
- Constraint tightness based on region occupancy
- Symmetry and pattern structure
"""

import sys
import json
import networkx as nx
from pathlib import Path
import math


def build_graph(mzn_file, json_data):
    """
    Build bipartite graph representation of the Sudoku instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Use bipartite model with explicit constraint nodes
    - Variable nodes (type 0): Each cell position
    - Constraint nodes (type 1): Row, column, and region constraints
    - Edge weights reflect constraint participation and difficulty
    - Node weights reflect structural importance and constraint tightness
    """
    n = json_data.get('n', 9)
    given_values = json_data.get('x', [])
    
    # Calculate region size (assumes square regions)
    reg = int(math.ceil(math.sqrt(n)))
    
    G = nx.Graph()
    
    # Create mapping of given values to positions
    # The given_values list contains only the non-zero (given) values
    # We need to reconstruct their positions from the original grid
    given_positions = set()
    given_count = 0
    
    # Since we only have the given values, we need to estimate positions
    # For now, we'll assume a reasonable distribution
    total_cells = n * n
    num_givens = len(given_values)
    given_ratio = num_givens / total_cells
    
    # Variable nodes (type 0): Grid cells
    for r in range(n):
        for c in range(n):
            cell_id = f'cell_{r}_{c}'
            
            # Calculate centrality weight - central cells are more constrained
            center_r, center_c = (n-1)/2, (n-1)/2
            distance_from_center = abs(r - center_r) + abs(c - center_c)
            max_distance = (n-1)  # Maximum Manhattan distance from center
            centrality = 1.0 - (distance_from_center / max_distance) if max_distance > 0 else 1.0
            
            # Estimate if this cell is given based on position and ratio
            # Use a simple heuristic: cells closer to edges are more likely to be given
            edge_bias = min(r, c, n-1-r, n-1-c) / (n//2) if n > 1 else 0.5
            is_likely_given = given_count < num_givens and edge_bias < 0.6
            if is_likely_given:
                given_count += 1
                given_positions.add((r, c))
                # Given cells have higher weight as they constrain the solution
                weight = 0.7 + 0.3 * centrality
            else:
                # Empty cells weighted by centrality and constraint load
                weight = 0.3 + 0.4 * centrality
            
            G.add_node(cell_id, type=0, weight=weight)
    
    # Constraint nodes (type 1): All constraints explicitly modeled
    
    # Row constraints
    for r in range(n):
        row_givens = sum(1 for c in range(n) if (r, c) in given_positions)
        # Tightness: fewer givens = more constrained
        tightness = 1.0 - (row_givens / n) if n > 0 else 0.5
        G.add_node(f'row_{r}', type=1, weight=tightness)
    
    # Column constraints  
    for c in range(n):
        col_givens = sum(1 for r in range(n) if (r, c) in given_positions)
        tightness = 1.0 - (col_givens / n) if n > 0 else 0.5
        G.add_node(f'col_{c}', type=1, weight=tightness)
    
    # Region constraints
    for reg_r in range(reg):
        for reg_c in range(reg):
            if reg_r * reg + reg_c < (n * n) // (reg * reg):  # Valid region
                region_givens = 0
                for r in range(reg_r * reg, min((reg_r + 1) * reg, n)):
                    for c in range(reg_c * reg, min((reg_c + 1) * reg, n)):
                        if (r, c) in given_positions:
                            region_givens += 1
                
                region_size = min(reg, n - reg_r * reg) * min(reg, n - reg_c * reg)
                tightness = 1.0 - (region_givens / region_size) if region_size > 0 else 0.5
                G.add_node(f'region_{reg_r}_{reg_c}', type=1, weight=tightness)
    
    # Bipartite edges: cell participation in constraints
    for r in range(n):
        for c in range(n):
            cell_id = f'cell_{r}_{c}'
            
            # Connect to row constraint
            G.add_edge(cell_id, f'row_{r}', weight=1.0)
            
            # Connect to column constraint
            G.add_edge(cell_id, f'col_{c}', weight=1.0)
            
            # Connect to region constraint
            reg_r = r // reg
            reg_c = c // reg
            if reg_r < reg and reg_c < reg:
                G.add_edge(cell_id, f'region_{reg_r}_{reg_c}', weight=1.0)
    
    # Add conflict edges for cells in highly constrained areas
    # This captures additional difficulty when multiple constraints interact
    for r in range(n):
        for c in range(n):
            cell_id = f'cell_{r}_{c}'
            
            # Add conflict edges to nearby empty cells in same region
            reg_r, reg_c = r // reg, c // reg
            for r2 in range(reg_r * reg, min((reg_r + 1) * reg, n)):
                for c2 in range(reg_c * reg, min((reg_c + 1) * reg, n)):
                    if r2 != r or c2 != c:
                        cell2_id = f'cell_{r2}_{c2}'
                        # If both cells are empty and in same region, add weak conflict
                        if (r, c) not in given_positions and (r2, c2) not in given_positions:
                            # Weight based on proximity and constraint tightness
                            distance = abs(r - r2) + abs(c - c2)
                            conflict_weight = 0.2 / (1 + distance)
                            G.add_edge(cell_id, cell2_id, weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()