#!/usr/bin/env python3
"""
Graph converter for OPD (Optimal Pairwise Difference) problem.
Created using subagent_prompt.md version: v_02

This problem is about finding a binary matrix of v rows and b columns such that
each row sums to r, and the dot product between any pair of distinct rows is minimal.
Key challenges: Balancing row constraints while minimizing pairwise overlaps.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the OPD problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph modeling the matrix structure
    - Row nodes (type 0): Represent the v rows, each must have exactly r entries
    - Column nodes (type 2): Represent the b columns as shared resources
    - Row constraint nodes (type 1): Enforce row sum = r constraints
    - Pairwise conflict nodes (type 1): Model conflicts between row pairs
    - Weight rows by their potential for conflicts (higher r/b ratio = more constrained)
    - Weight columns by scarcity (fewer available positions relative to demand)
    """
    # Access data directly from json_data dict
    v = json_data.get('v', 0)  # number of rows
    b = json_data.get('b', 0)  # number of columns  
    r = json_data.get('r', 0)  # row sum
    
    if v == 0 or b == 0 or r == 0:
        return nx.Graph()
    
    # Create graph
    G = nx.Graph()
    
    # Calculate key metrics for weighting
    density = r / b  # How dense each row is
    max_possible_overlap = min(r, b)  # Maximum possible dot product
    
    # Add row nodes (type 0 - variable-like: decision makers)
    # Weight by constraint pressure - rows with higher r/b are more constrained
    for i in range(v):
        constraint_pressure = min(density * 2, 1.0)  # Normalize to [0,1]
        G.add_node(f'row_{i}', type=0, weight=constraint_pressure)
    
    # Add column nodes (type 2 - resource-like: shared positions)
    # Weight by scarcity - how much demand vs availability
    total_demand = v * r  # Total 1s needed across all rows
    demand_per_column = total_demand / b
    for j in range(b):
        # Columns with higher demand pressure are more critical
        scarcity = min(demand_per_column / v, 1.0)  # Normalize to [0,1]
        G.add_node(f'col_{j}', type=2, weight=scarcity)
    
    # Add row sum constraint nodes (type 1 - constraint-like)
    # Each row must sum to exactly r
    for i in range(v):
        # Tightness based on how constraining this sum is
        tightness = min(r / b * 2, 1.0)  # Higher r/b = tighter constraint
        G.add_node(f'rowsum_{i}', type=1, weight=tightness)
    
    # Add pairwise overlap constraint nodes (type 1)
    # These model the core challenge: minimizing dot products between rows
    for i in range(v):
        for j in range(i + 1, v):
            # Weight by potential for conflict
            # Higher when both rows need many 1s relative to available columns
            conflict_potential = (2 * r - b) / b if (2 * r) > b else 0.1
            conflict_potential = max(0.1, min(conflict_potential, 1.0))
            G.add_node(f'overlap_{i}_{j}', type=1, weight=conflict_potential)
    
    # Bipartite edges: rows participate in their sum constraints
    for i in range(v):
        G.add_edge(f'row_{i}', f'rowsum_{i}', weight=1.0)
    
    # Edges: rows to columns (potential assignments)
    # Weight by competition - how many rows compete for this column
    for i in range(v):
        for j in range(b):
            # Competition weight: how critical this assignment could be
            competition = min(total_demand / (b * v), 1.0)
            G.add_edge(f'row_{i}', f'col_{j}', weight=competition)
    
    # Edges: rows to pairwise overlap constraints
    # Each pair of rows participates in their overlap constraint
    for i in range(v):
        for j in range(i + 1, v):
            overlap_node = f'overlap_{i}_{j}'
            # Weight represents how much each row contributes to potential overlap
            overlap_weight = min(r / max_possible_overlap, 1.0)
            G.add_edge(f'row_{i}', overlap_node, weight=overlap_weight)
            G.add_edge(f'row_{j}', overlap_node, weight=overlap_weight)
    
    # Add high-conflict edges between rows when problem is overconstrained
    # This happens when 2*r > b (two full rows can't fit without overlap)
    if 2 * r > b:
        for i in range(v):
            for j in range(i + 1, v):
                # Conflict strength based on forced overlap
                forced_overlap = (2 * r - b) / b
                conflict_weight = min(forced_overlap, 1.0)
                if conflict_weight > 0.3:  # Only add significant conflicts
                    G.add_edge(f'row_{i}', f'row_{j}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()