#!/usr/bin/env python3
"""
Graph converter for Hitori problem.
Created using subagent_prompt.md version: v_02

Hitori is a logic puzzle where you mark cells black in an n×n grid to satisfy:
1. Each row/column has all different non-black numbers (alldifferent constraint)
2. No two adjacent cells can both be black (adjacency constraint)
3. All non-black cells form a connected component (connectivity constraint)
4. Maximize sum of values in black cells (optimization objective)

Key challenges: Complex interaction between connectivity, alldifferent, and adjacency constraints.
The difficulty comes from balancing high-value cells against constraint satisfaction.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the Hitori problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph with:
    - Cell nodes (type 0): Each grid position with weight based on value and centrality
    - Constraint nodes (type 1): 
      * Row alldifferent constraints (one per row)
      * Column alldifferent constraints (one per column)  
      * Adjacency constraints (one per adjacent pair)
      * Connectivity constraint (global)
    - Edges model constraint participation and conflicts
    """
    n = json_data.get('n', 0)
    clue_flat = json_data.get('clue', [])
    
    # Convert flat array to 2D grid (row-major order)
    clue = []
    for r in range(n):
        row = []
        for c in range(n):
            idx = r * n + c
            if idx < len(clue_flat):
                row.append(clue_flat[idx])
            else:
                row.append(1)  # fallback
        clue.append(row)
    
    G = nx.Graph()
    
    # Cell nodes (type 0) - decision variables
    max_value = max(clue_flat) if clue_flat else 1
    for r in range(n):
        for c in range(n):
            cell_value = clue[r][c]
            
            # Weight combines value importance and positional centrality
            # High-value cells are more attractive to mark black
            value_weight = cell_value / max_value
            
            # Central cells are more constrained (harder to mark black due to connectivity)
            center_distance = abs(r - n//2) + abs(c - n//2)
            max_distance = n - 1
            centrality = 1.0 - (center_distance / max_distance) if max_distance > 0 else 1.0
            
            # Combine value and centrality with non-linear weighting
            # Higher values get exponential boost, but centrality adds constraint pressure
            weight = 0.7 * (1.0 - math.exp(-2.0 * value_weight)) + 0.3 * centrality
            
            G.add_node(f'cell_{r}_{c}', type=0, weight=weight)
    
    # Row alldifferent constraints (type 1)
    for r in range(n):
        # Calculate constraint tightness based on duplicate values in row
        row_values = [clue[r][c] for c in range(n)]
        unique_values = len(set(row_values))
        # More duplicates = tighter constraint
        tightness = 1.0 - (unique_values / n) if n > 0 else 0.5
        
        G.add_node(f'row_alldiff_{r}', type=1, weight=tightness)
        
        # Connect cells in this row to the row constraint
        for c in range(n):
            # Edge weight based on how many times this value appears in row
            value_count = row_values.count(clue[r][c])
            conflict_strength = min(value_count / n, 1.0)
            G.add_edge(f'cell_{r}_{c}', f'row_alldiff_{r}', weight=conflict_strength)
    
    # Column alldifferent constraints (type 1)
    for c in range(n):
        # Calculate constraint tightness based on duplicate values in column
        col_values = [clue[r][c] for r in range(n)]
        unique_values = len(set(col_values))
        tightness = 1.0 - (unique_values / n) if n > 0 else 0.5
        
        G.add_node(f'col_alldiff_{c}', type=1, weight=tightness)
        
        # Connect cells in this column to the column constraint
        for r in range(n):
            value_count = col_values.count(clue[r][c])
            conflict_strength = min(value_count / n, 1.0)
            G.add_edge(f'cell_{r}_{c}', f'col_alldiff_{c}', weight=conflict_strength)
    
    # Adjacency constraints (type 1) - no two adjacent cells can both be black
    adjacency_constraints = 0
    for r in range(n):
        for c in range(n):
            # Right neighbor
            if c < n - 1:
                # Weight based on value desirability of both cells
                value1 = clue[r][c] / max_value
                value2 = clue[r][c+1] / max_value
                # Higher values create stronger conflict (both want to be black)
                conflict_weight = (value1 + value2) / 2.0
                
                constraint_id = f'adj_{r}_{c}_right'
                G.add_node(constraint_id, type=1, weight=conflict_weight)
                G.add_edge(f'cell_{r}_{c}', constraint_id, weight=value1)
                G.add_edge(f'cell_{r}_{c+1}', constraint_id, weight=value2)
                adjacency_constraints += 1
            
            # Down neighbor
            if r < n - 1:
                value1 = clue[r][c] / max_value
                value2 = clue[r+1][c] / max_value
                conflict_weight = (value1 + value2) / 2.0
                
                constraint_id = f'adj_{r}_{c}_down'
                G.add_node(constraint_id, type=1, weight=conflict_weight)
                G.add_edge(f'cell_{r}_{c}', constraint_id, weight=value1)
                G.add_edge(f'cell_{r+1}_{c}', constraint_id, weight=value2)
                adjacency_constraints += 1
    
    # Global connectivity constraint (type 1)
    # All non-black cells must form connected component
    # Weight based on grid size (larger grids = harder connectivity)
    connectivity_weight = 1.0 - math.exp(-n / 10.0)  # Non-linear scaling
    G.add_node('connectivity', type=1, weight=connectivity_weight)
    
    # Connect all cells to connectivity constraint
    # Weight reflects how critical each cell is for connectivity
    for r in range(n):
        for c in range(n):
            # Cells on the boundary are more critical for connectivity
            boundary_distance = min(r, c, n-1-r, n-1-c)
            max_boundary = n // 2
            criticality = 1.0 - (boundary_distance / max_boundary) if max_boundary > 0 else 1.0
            
            G.add_edge(f'cell_{r}_{c}', 'connectivity', weight=criticality)
    
    # Add conflict edges between cells with same values in same row/column
    # These cells compete to avoid being black (since alldifferent requires one to be black)
    for r in range(n):
        for c1 in range(n):
            for c2 in range(c1 + 1, n):
                if clue[r][c1] == clue[r][c2]:
                    # Same value in same row - conflict
                    value_importance = clue[r][c1] / max_value
                    G.add_edge(f'cell_{r}_{c1}', f'cell_{r}_{c2}', weight=value_importance)
    
    for c in range(n):
        for r1 in range(n):
            for r2 in range(r1 + 1, n):
                if clue[r1][c] == clue[r2][c]:
                    # Same value in same column - conflict
                    value_importance = clue[r1][c] / max_value
                    G.add_edge(f'cell_{r1}_{c}', f'cell_{r2}_{c}', weight=value_importance)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()