#!/usr/bin/env python3
"""
Graph converter for pentominoes-int problem.
Created using subagent_prompt.md version: v_02

This problem is about placing pentomino tiles (5-unit shapes) on a rectangular board.
Each tile has multiple orientations and positions represented by regular expressions.
Key challenges: complex spatial constraints, tile orientation conflicts, board boundary constraints.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the pentominoes placement problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model as bipartite graph with spatial awareness
    - Board positions as type 0 nodes (spatial constraints)
    - Tiles as type 2 nodes (resources - each tile used exactly once)
    - Placement constraints as type 1 nodes (one per tile-position combination)
    - Edge weights based on spatial difficulty and constraint tightness
    """
    
    # Extract problem parameters
    width = json_data.get('width', 0)
    height = json_data.get('height', 0)
    filled = json_data.get('filled', 1)
    ntiles = json_data.get('ntiles', 0)
    size = json_data.get('size', 0)
    tiles_data = json_data.get('tiles', [])
    
    if width == 0 or height == 0 or ntiles == 0:
        # Create minimal graph for empty instances
        G = nx.Graph()
        G.add_node('empty', type=0, weight=0.5)
        return G
    
    G = nx.Graph()
    
    # Board position nodes (type 0 - variable-like)
    board_size = width * height
    for pos in range(board_size):
        row = pos // width
        col = pos % width
        
        # Weight by position importance - corners and edges are more constrained
        # Center positions have more placement options, edge/corner positions fewer
        edge_distance = min(row, height - 1 - row, col, width - 1 - col)
        max_edge_distance = min(width // 2, height // 2)
        
        # Non-linear weighting: positions near edges are more constrained
        if max_edge_distance > 0:
            centrality = edge_distance / max_edge_distance
            # Use exponential decay for stronger effect
            position_weight = 0.3 + 0.7 * math.exp(-2.0 * (1.0 - centrality))
        else:
            position_weight = 0.5
            
        # Last column positions are terminators (always filled with ntiles+1)
        if col == width - 1:
            position_weight = 1.0  # These are highly constrained
            
        G.add_node(f'pos_{pos}', type=0, weight=position_weight)
    
    # Tile resource nodes (type 2 - resource-like)
    # Each tile must be placed exactly once
    for tile_id in range(ntiles):
        # Weight by tile complexity - more complex tiles are harder to place
        # Use tile_id as proxy for complexity (could be refined with actual shape data)
        complexity = (tile_id + 1) / ntiles
        # Non-linear scaling for tile difficulty
        tile_weight = 0.4 + 0.6 * math.sqrt(complexity)
        G.add_node(f'tile_{tile_id}', type=2, weight=tile_weight)
    
    # Parse tiles data structure and create constraint nodes
    # The tiles array contains sequences: [q, s, fstart, fend, dstart] for each tile
    # This represents the regular expression parameters for valid placements
    
    tiles_per_entry = 5  # Based on the MZN model structure
    constraint_id = 0
    
    for tile_id in range(ntiles):
        start_idx = tile_id * tiles_per_entry
        if start_idx + 4 < len(tiles_data):
            q = tiles_data[start_idx]      # Number of states
            s = tiles_data[start_idx + 1]  # Alphabet size  
            fstart = tiles_data[start_idx + 2]  # Final state start
            fend = tiles_data[start_idx + 3]    # Final state end
            dstart = tiles_data[start_idx + 4]  # DFA data start
            
            # Create constraint node for this tile's regular expression constraint
            # Weight by constraint complexity (more states = more complex)
            max_states = max(tiles_data[i] for i in range(0, min(len(tiles_data), ntiles * tiles_per_entry), tiles_per_entry) if i < len(tiles_data))
            if max_states > 0:
                state_complexity = q / max_states
                # Non-linear scaling for constraint difficulty
                constraint_weight = 0.5 + 0.5 * math.log(1 + 4 * state_complexity) / math.log(5)
            else:
                constraint_weight = 0.5
                
            constraint_node = f'regular_constraint_{tile_id}'
            G.add_node(constraint_node, type=1, weight=constraint_weight)
            constraint_id += 1
            
            # Connect tile to its constraint
            G.add_edge(f'tile_{tile_id}', constraint_node, weight=0.9)
            
            # Connect board positions to constraint based on potential placement difficulty
            # Positions near edges have stronger constraint connections
            for pos in range(board_size):
                row = pos // width
                col = pos % width
                
                # Skip last column (terminator positions)
                if col == width - 1:
                    continue
                    
                # Calculate placement difficulty for this position
                # Consider boundary effects and remaining space
                remaining_width = width - col - 1
                remaining_height = height - row
                
                # Pentominoes need at least 5 connected cells
                # Positions with less space are more constrained
                available_space = min(remaining_width, remaining_height, 5)
                space_ratio = available_space / 5.0
                
                # Edge weight based on placement difficulty
                edge_weight = 0.3 + 0.7 * (1.0 - space_ratio)
                
                G.add_edge(f'pos_{pos}', constraint_node, weight=edge_weight)
    
    # Add spatial adjacency constraints between nearby positions
    # This captures the spatial nature of pentomino placement
    adjacency_constraint_id = 0
    for pos in range(board_size):
        row = pos // width
        col = pos % width
        
        # Skip last column
        if col == width - 1:
            continue
            
        # Create adjacency constraints for position clusters
        # This helps capture spatial relationships
        if row < height - 1 and col < width - 2:  # Has neighbors below and right
            constraint_node = f'adjacency_{adjacency_constraint_id}'
            
            # Weight by how central this cluster is
            cluster_centrality = min(row, height - 2 - row, col, width - 3 - col)
            max_cluster_centrality = min(width // 2 - 1, height // 2 - 1)
            
            if max_cluster_centrality > 0:
                cluster_weight = 0.4 + 0.6 * (cluster_centrality / max_cluster_centrality)
            else:
                cluster_weight = 0.5
                
            G.add_node(constraint_node, type=1, weight=cluster_weight)
            
            # Connect adjacent positions to this constraint
            positions = [pos, pos + 1, pos + width, pos + width + 1]
            for p in positions:
                if p < board_size and (p % width) < width - 1:  # Valid position, not terminator
                    G.add_edge(f'pos_{p}', constraint_node, weight=0.6)
                    
            adjacency_constraint_id += 1
    
    # Add boundary constraint nodes for edge effects
    # Top/bottom boundaries
    for col in range(width - 1):  # Exclude terminator column
        # Top boundary
        boundary_node = f'top_boundary_{col}'
        G.add_node(boundary_node, type=1, weight=0.8)
        G.add_edge(f'pos_{col}', boundary_node, weight=0.7)
        
        # Bottom boundary  
        boundary_node = f'bottom_boundary_{col}'
        G.add_node(boundary_node, type=1, weight=0.8)
        bottom_pos = (height - 1) * width + col
        G.add_edge(f'pos_{bottom_pos}', boundary_node, weight=0.7)
    
    # Left/right boundaries
    for row in range(height):
        # Left boundary
        boundary_node = f'left_boundary_{row}'
        G.add_node(boundary_node, type=1, weight=0.8)
        left_pos = row * width
        G.add_edge(f'pos_{left_pos}', boundary_node, weight=0.7)
    
    # Add global coverage constraint
    # All tiles must be placed exactly once
    coverage_node = 'global_coverage'
    coverage_weight = 0.3 + 0.7 * (ntiles / 15.0)  # Harder with more tiles
    G.add_node(coverage_node, type=1, weight=min(coverage_weight, 1.0))
    
    # Connect all tiles to coverage constraint
    for tile_id in range(ntiles):
        G.add_edge(f'tile_{tile_id}', coverage_node, weight=0.5)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()