#!/usr/bin/env python3
"""
Graph converter for Kakuro problem.
Created using subagent_prompt.md version: v_02

This problem is about filling a crossword-like grid with digits 1-9 such that:
- Each "word" (horizontal or vertical sequence) uses distinct digits
- Each word sums to a specified target value
Key challenges: constraint interdependencies, digit conflicts, sum feasibility
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the kakuro puzzle instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Bipartite model with cells as variables and clue constraints as explicit nodes
    - Cell nodes (type 0): Grid positions that need to be filled
    - Clue constraint nodes (type 1): Each horizontal/vertical clue becomes a separate constraint
    - Edges connect cells to the clues they participate in
    - Weights reflect constraint difficulty and cell importance
    """
    h = json_data.get('h', 0)  # rows
    w = json_data.get('w', 0)  # columns
    grid_data = json_data.get('grid_data', [])
    h_clue = json_data.get('h_clue', [])  # horizontal clues
    v_clue = json_data.get('v_clue', [])  # vertical clues
    n_h = json_data.get('n_h', 0)
    n_v = json_data.get('n_v', 0)
    
    G = nx.Graph()
    
    # Convert flat grid_data to 2D for easier access
    grid = []
    for r in range(h):
        row = []
        for c in range(w):
            idx = r * w + c
            row.append(grid_data[idx] if idx < len(grid_data) else 0)
        grid.append(row)
    
    # Add cell nodes (type 0) for fillable positions
    fillable_cells = []
    for r in range(h):
        for c in range(w):
            if grid[r][c] == 1:  # fillable cell
                # Weight based on centrality and constraint density
                centrality = 1.0 - (abs(r - h//2) + abs(c - w//2)) / max(h + w, 1)
                # Cells in center tend to be more constrained
                weight = 0.3 + 0.7 * centrality
                G.add_node(f'cell_{r}_{c}', type=0, weight=weight)
                fillable_cells.append((r, c))
    
    # Add horizontal clue constraint nodes (type 1)
    for i in range(n_h):
        if i * 4 + 3 < len(h_clue):
            clue_r = h_clue[i * 4] - 1  # Convert to 0-based
            clue_c = h_clue[i * 4 + 1] - 1
            clue_len = h_clue[i * 4 + 2]
            clue_sum = h_clue[i * 4 + 3]
            
            # Calculate constraint tightness based on sum feasibility
            # Minimum possible sum for length k is 1+2+...+k = k(k+1)/2
            # Maximum possible sum for length k is (10-k)+(11-k)+...+9 = k(19-k)/2
            min_sum = clue_len * (clue_len + 1) // 2
            max_sum = clue_len * (19 - clue_len) // 2 if clue_len <= 9 else clue_sum
            
            # Tightness: how close to impossible the constraint is
            if max_sum > min_sum:
                if clue_sum <= min_sum or clue_sum >= max_sum:
                    tightness = 1.0  # Very tight/impossible
                else:
                    # Use exponential to emphasize extreme values
                    range_pos = (clue_sum - min_sum) / (max_sum - min_sum)
                    # Most difficult when very close to min or max
                    tightness = math.exp(-5 * abs(range_pos - 0.5)) * 0.8 + 0.2
            else:
                tightness = 1.0
                
            # Also consider constraint scope (longer constraints are generally harder)
            scope_weight = min(clue_len / 9.0, 1.0)
            final_weight = 0.5 * tightness + 0.5 * scope_weight
            
            G.add_node(f'h_clue_{i}', type=1, weight=final_weight)
            
            # Connect to participating cells
            for j in range(clue_len):
                cell_r = clue_r
                cell_c = clue_c + 1 + j  # Start after clue cell
                if (0 <= cell_r < h and 0 <= cell_c < w and 
                    cell_r < len(grid) and cell_c < len(grid[cell_r]) and
                    grid[cell_r][cell_c] == 1):
                    
                    # Edge weight reflects participation strength
                    # Cells closer to ends of words may be easier to determine
                    position_factor = min(j + 1, clue_len - j) / ((clue_len + 1) // 2)
                    edge_weight = 0.5 + 0.5 * position_factor
                    G.add_edge(f'cell_{cell_r}_{cell_c}', f'h_clue_{i}', weight=edge_weight)
    
    # Add vertical clue constraint nodes (type 1)
    for i in range(n_v):
        if i * 4 + 3 < len(v_clue):
            clue_r = v_clue[i * 4] - 1  # Convert to 0-based
            clue_c = v_clue[i * 4 + 1] - 1
            clue_len = v_clue[i * 4 + 2]
            clue_sum = v_clue[i * 4 + 3]
            
            # Calculate constraint tightness (same as horizontal)
            min_sum = clue_len * (clue_len + 1) // 2
            max_sum = clue_len * (19 - clue_len) // 2 if clue_len <= 9 else clue_sum
            
            if max_sum > min_sum:
                if clue_sum <= min_sum or clue_sum >= max_sum:
                    tightness = 1.0
                else:
                    range_pos = (clue_sum - min_sum) / (max_sum - min_sum)
                    tightness = math.exp(-5 * abs(range_pos - 0.5)) * 0.8 + 0.2
            else:
                tightness = 1.0
                
            scope_weight = min(clue_len / 9.0, 1.0)
            final_weight = 0.5 * tightness + 0.5 * scope_weight
            
            G.add_node(f'v_clue_{i}', type=1, weight=final_weight)
            
            # Connect to participating cells
            for j in range(clue_len):
                cell_r = clue_r + 1 + j  # Start after clue cell
                cell_c = clue_c
                if (0 <= cell_r < h and 0 <= cell_c < w and 
                    cell_r < len(grid) and cell_c < len(grid[cell_r]) and
                    grid[cell_r][cell_c] == 1):
                    
                    # Edge weight reflects participation strength
                    position_factor = min(j + 1, clue_len - j) / ((clue_len + 1) // 2)
                    edge_weight = 0.5 + 0.5 * position_factor
                    G.add_edge(f'cell_{cell_r}_{cell_c}', f'v_clue_{i}', weight=edge_weight)
    
    # Add conflict edges between cells that share many constraints (high interaction)
    cell_constraints = {}
    for node in G.nodes():
        if G.nodes[node]['type'] == 0:  # cell node
            cell_constraints[node] = set()
            for neighbor in G.neighbors(node):
                if G.nodes[neighbor]['type'] == 1:  # constraint node
                    cell_constraints[node].add(neighbor)
    
    # Add conflict edges between cells with significant constraint overlap
    cells = list(cell_constraints.keys())
    for i in range(len(cells)):
        for j in range(i + 1, len(cells)):
            cell1, cell2 = cells[i], cells[j]
            shared_constraints = cell_constraints[cell1] & cell_constraints[cell2]
            
            # Only add conflict edge if they share constraints (intersecting words)
            if len(shared_constraints) > 0:
                # Weight by number of shared constraints and their difficulty
                constraint_weights = []
                for constraint in shared_constraints:
                    constraint_weights.append(G.nodes[constraint]['weight'])
                
                if constraint_weights:
                    avg_difficulty = sum(constraint_weights) / len(constraint_weights)
                    overlap_strength = len(shared_constraints) / max(len(cell_constraints[cell1]), 1)
                    conflict_weight = 0.3 * overlap_strength + 0.7 * avg_difficulty
                    G.add_edge(cell1, cell2, weight=min(conflict_weight, 1.0))
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()