#!/usr/bin/env python3
"""
Graph converter for Market Split problem.
Created using subagent_prompt.md version: v_02

This problem is about finding a binary assignment to variables that satisfies linear equality constraints.
Each constraint is of the form: sum(a[i,j] * x[j]) = target[i]
Key challenges: Tight equality constraints with binary variables, balancing multiple competing constraints
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the market split problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create bipartite graph with variables and constraints
    - Variables (type 0): Binary decision variables x[j]
    - Constraints (type 1): Equality constraints for each row
    - Edges: Variable participation in constraints with coefficient-based weights
    - Add conflict edges between variables that compete in tight constraints
    """
    # Access data directly from json_data dict
    m = json_data.get('m', 0)  # number of constraints
    n = json_data.get('n', 0)  # number of variables
    a_flat = json_data.get('a', [])  # flattened coefficient matrix
    
    # Reconstruct the 2D matrix from flattened array
    # Matrix is m x (n+1), where last column contains target values
    if len(a_flat) != m * (n + 1):
        # Fallback if data doesn't match expected structure
        return nx.Graph()
    
    # Reshape flat array into matrix
    a = []
    for i in range(m):
        row = a_flat[i * (n + 1):(i + 1) * (n + 1)]
        a.append(row)
    
    G = nx.Graph()
    
    # Add variable nodes (type 0) with weights based on their involvement
    max_coeff = max(abs(val) for val in a_flat) if a_flat else 1
    for j in range(n):
        # Weight by sum of absolute coefficients across all constraints
        total_involvement = sum(abs(a[i][j]) for i in range(m))
        weight = min(total_involvement / (max_coeff * m) if max_coeff > 0 else 0.5, 1.0)
        G.add_node(f'var_{j}', type=0, weight=weight)
    
    # Add constraint nodes (type 1) with weights based on tightness
    for i in range(m):
        target = a[i][n]  # target value is in last column
        coeffs = a[i][:n]  # coefficients for variables
        
        # Calculate constraint tightness
        # Sum of positive coefficients vs target
        pos_sum = sum(c for c in coeffs if c > 0)
        neg_sum = sum(abs(c) for c in coeffs if c < 0)
        
        if pos_sum > 0:
            # How tight is this constraint? 
            # If target is close to pos_sum, it's very constrained
            tightness = 1.0 - abs(target - pos_sum) / pos_sum if pos_sum > 0 else 0.5
            tightness = max(0.1, min(tightness, 1.0))  # Keep in reasonable range
        else:
            tightness = 0.5
            
        # Weight also by number of non-zero coefficients (scope)
        scope = sum(1 for c in coeffs if c != 0)
        scope_weight = scope / n if n > 0 else 0.5
        
        # Combine tightness and scope
        final_weight = 0.7 * tightness + 0.3 * scope_weight
        
        G.add_node(f'constraint_{i}', type=1, weight=final_weight)
    
    # Add bipartite edges: variable participation in constraints
    for i in range(m):
        for j in range(n):
            coeff = a[i][j]
            if coeff != 0:  # Only connect if variable participates
                # Edge weight based on coefficient magnitude and its relative importance
                coeff_magnitude = abs(coeff) / max_coeff if max_coeff > 0 else 0.5
                # Non-linear scaling to emphasize larger coefficients
                edge_weight = math.sqrt(coeff_magnitude)
                G.add_edge(f'var_{j}', f'constraint_{i}', weight=edge_weight)
    
    # Add conflict edges between variables that compete in tight constraints
    for i in range(m):
        target = a[i][n]
        coeffs = a[i][:n]
        
        # Find variables with positive coefficients in this constraint
        pos_vars = [(j, coeffs[j]) for j in range(n) if coeffs[j] > 0]
        
        # If constraint is tight (sum of all positive coeffs > target)
        total_pos = sum(c for _, c in pos_vars)
        if total_pos > target * 1.2:  # Oversubscribed constraint
            # Add conflicts between high-coefficient variables
            pos_vars.sort(key=lambda x: x[1], reverse=True)  # Sort by coefficient size
            
            for idx1 in range(min(len(pos_vars), 4)):  # Consider top 4 variables
                for idx2 in range(idx1 + 1, min(len(pos_vars), 4)):
                    j1, c1 = pos_vars[idx1]
                    j2, c2 = pos_vars[idx2]
                    
                    # If both variables together would exceed target significantly
                    if c1 + c2 > target * 0.8:
                        conflict_strength = min((c1 + c2) / total_pos, 1.0)
                        # Use exponential scaling to emphasize strong conflicts
                        conflict_weight = math.exp(-2.0 * (1.0 - conflict_strength))
                        G.add_edge(f'var_{j1}', f'var_{j2}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()