#!/usr/bin/env python3
"""
Graph converter for Crossword Optimization problem.
Created using subagent_prompt.md version: v_02

This problem is about filling a crossword grid with words from dictionaries
to maximize the total value of letters used (each letter has a Scrabble-like point value).
Key challenges: word choice conflicts, letter sharing between intersecting words, dictionary size variation.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the crossword optimization problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model crossword as bipartite graph
    - Clue slots (Type 0): Decision variables for word placement
    - Word constraints (Type 1): Dictionary availability and uniqueness constraints
    - Cell intersection constraints (Type 1): Letter consistency between crossing words
    - Grid cells (Type 2): Shared resources with varying letter values
    """
    
    width = json_data.get('width', 5)
    height = json_data.get('height', 5)
    number_of_clues = json_data.get('number_of_clues', 0)
    
    # Get clue information
    startrow = json_data.get('startrow', [])
    startcol = json_data.get('startcol', [])
    down = json_data.get('down', [])
    leng = json_data.get('leng', [])
    
    # Get word counts per length (dictionary size information)
    word_counts = {}
    for i in range(1, 46):  # Words up to length 45
        word_counts[i] = json_data.get(f'words{i}', 0)
    
    # Letter values (Scrabble-like scoring)
    letter_values = [1,3,3,2,1,4,2,4,1,8,5,1,3,1,1,3,10,1,1,1,1,4,4,8,4,10]
    avg_letter_value = sum(letter_values) / len(letter_values)
    max_letter_value = max(letter_values)
    
    G = nx.Graph()
    
    # === NODE CREATION ===
    
    # 1. Clue slots (Type 0 - Variable-like): Each clue position needs a word
    max_clue_length = max(leng) if leng else 1
    for i in range(number_of_clues):
        if i < len(leng):
            # Weight by difficulty: longer words are harder to place, fewer dictionary options make it harder
            length = leng[i]
            available_words = word_counts.get(length, 0)
            
            # Normalize difficulty: longer lengths and fewer available words = harder
            length_difficulty = length / max_clue_length
            max_word_count = max(word_counts.values()) if word_counts.values() else 1
            scarcity_difficulty = 1.0 - min(available_words / max(max_word_count, 1), 1.0) if word_counts else 0.5
            
            # Combine factors with non-linear weighting
            difficulty = math.sqrt(length_difficulty * 0.6 + scarcity_difficulty * 0.4)
            
            G.add_node(f'clue_{i}', type=0, weight=difficulty)
    
    # 2. Dictionary constraints (Type 1): Word uniqueness and availability constraints
    for length, count in word_counts.items():
        if count > 0:
            # Weight by dictionary richness (more words = easier constraint)
            max_count = max(word_counts.values()) if word_counts.values() else 1
            richness = count / max_count
            constraint_tightness = 1.0 - richness  # Less rich = tighter constraint
            
            G.add_node(f'dict_constraint_{length}', type=1, weight=constraint_tightness)
    
    # 3. Grid cells (Type 2 - Resource-like): Each cell can hold one letter
    grid = json_data.get('grid', [])
    if not grid:
        # If grid is missing from JSON, assume all cells are available (common case)
        for r in range(height):
            for c in range(width):
                # Weight by strategic value: central cells are more constrained
                centrality = 1.0 - (abs(r - height//2) + abs(c - width//2)) / (height + width)
                # Add some randomness based on position to avoid uniform weights
                positional_value = (1 + math.sin(r * c * 0.5)) * 0.5  # [0,1]
                weight = (centrality * 0.7 + positional_value * 0.3)
                
                G.add_node(f'cell_{r}_{c}', type=2, weight=weight)
    
    # 4. Cell intersection constraints (Type 1): Where words cross, letters must match
    intersections = set()
    for i in range(number_of_clues):
        if i >= len(startrow) or i >= len(startcol) or i >= len(down) or i >= len(leng):
            continue
            
        r1, c1, d1, len1 = startrow[i], startcol[i], down[i], leng[i]
        
        for j in range(i+1, number_of_clues):
            if j >= len(startrow) or j >= len(startcol) or j >= len(down) or j >= len(leng):
                continue
                
            r2, c2, d2, len2 = startrow[j], startcol[j], down[j], leng[j]
            
            # Check if words intersect
            intersection_found = False
            intersection_pos = None
            
            if d1 != d2:  # One horizontal, one vertical
                if d1:  # Word i is vertical (down), word j is horizontal
                    if r2 >= r1 and r2 < r1 + len1 and c1 >= c2 and c1 < c2 + len2:
                        intersection_found = True
                        intersection_pos = (r2, c1)
                else:  # Word i is horizontal, word j is vertical
                    if r1 >= r2 and r1 < r2 + len2 and c2 >= c1 and c2 < c1 + len1:
                        intersection_found = True
                        intersection_pos = (r1, c2)
            
            if intersection_found and intersection_pos not in intersections:
                intersections.add(intersection_pos)
                r, c = intersection_pos
                # Weight by constraint difficulty: intersections are critical points
                # Multiple intersecting words make it harder
                intersection_count = sum(1 for pos in intersections if pos == intersection_pos)
                difficulty = min(1.0, intersection_count * 0.3 + 0.4)
                
                G.add_node(f'intersection_{r}_{c}', type=1, weight=difficulty)
    
    # === EDGE CREATION ===
    
    # 1. Clue-to-dictionary constraint edges (bipartite: clue needs word from appropriate dictionary)
    for i in range(number_of_clues):
        if i < len(leng):
            length = leng[i]
            if word_counts.get(length, 0) > 0:
                # Edge weight by how constrained this choice is
                available_words = word_counts[length]
                max_available = max(word_counts.values()) if word_counts.values() else 1
                constraint_strength = 1.0 - (available_words / max_available)
                
                G.add_edge(f'clue_{i}', f'dict_constraint_{length}', weight=constraint_strength)
    
    # 2. Clue-to-cell edges (clue occupies specific cells)
    for i in range(number_of_clues):
        if i >= len(startrow) or i >= len(startcol) or i >= len(down) or i >= len(leng):
            continue
            
        r, c, is_down, length = startrow[i], startcol[i], down[i], leng[i]
        
        for pos in range(length):
            if is_down:
                cell_r, cell_c = r + pos, c
            else:
                cell_r, cell_c = r, c + pos
            
            if 0 <= cell_r < height and 0 <= cell_c < width:
                # Weight by position importance: middle positions of words are more critical
                mid_pos = length // 2
                position_importance = 1.0 - abs(pos - mid_pos) / max(mid_pos, 1)
                weight = position_importance * 0.7 + 0.3
                
                G.add_edge(f'clue_{i}', f'cell_{cell_r}_{cell_c}', weight=weight)
    
    # 3. Intersection constraint edges
    for i in range(number_of_clues):
        if i >= len(startrow) or i >= len(startcol) or i >= len(down) or i >= len(leng):
            continue
            
        r1, c1, d1, len1 = startrow[i], startcol[i], down[i], leng[i]
        
        for j in range(i+1, number_of_clues):
            if j >= len(startrow) or j >= len(startcol) or j >= len(down) or j >= len(leng):
                continue
                
            r2, c2, d2, len2 = startrow[j], startcol[j], down[j], leng[j]
            
            # Find intersection
            intersection_pos = None
            if d1 != d2:  # One horizontal, one vertical
                if d1:  # Word i is vertical, word j is horizontal
                    if r2 >= r1 and r2 < r1 + len1 and c1 >= c2 and c1 < c2 + len2:
                        intersection_pos = (r2, c1)
                else:  # Word i is horizontal, word j is vertical
                    if r1 >= r2 and r1 < r2 + len2 and c2 >= c1 and c2 < c1 + len1:
                        intersection_pos = (r1, c2)
            
            if intersection_pos:
                r, c = intersection_pos
                constraint_node = f'intersection_{r}_{c}'
                if constraint_node in G:
                    # Weight by how critical this constraint is
                    # Longer words have more potential conflicts
                    conflict_potential = (len1 + len2) / (2 * max_clue_length) if max_clue_length > 0 else 0.5
                    
                    G.add_edge(f'clue_{i}', constraint_node, weight=conflict_potential)
                    G.add_edge(f'clue_{j}', constraint_node, weight=conflict_potential)
    
    # 4. Add some conflict edges between clues that compete for similar dictionary words
    for i in range(number_of_clues):
        if i >= len(leng):
            continue
        length_i = leng[i]
        available_i = word_counts.get(length_i, 0)
        
        for j in range(i+1, number_of_clues):
            if j >= len(leng):
                continue
            length_j = leng[j]
            available_j = word_counts.get(length_j, 0)
            
            # If both clues need words of the same length, they compete
            if length_i == length_j and available_i > 0:
                # Conflict strength based on scarcity
                max_available = max(word_counts.values()) if word_counts.values() else 1
                scarcity = 1.0 - (available_i / max_available)
                conflict_weight = scarcity * 0.6  # Moderate conflict weight
                
                if conflict_weight > 0.3:  # Only add significant conflicts
                    G.add_edge(f'clue_{i}', f'clue_{j}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()