#!/usr/bin/env python3
"""
Graph converter for WCSP (Weighted Constraint Satisfaction Problem).
Created using subagent_prompt.md version: v_02

WCSP is about finding an assignment to variables that minimizes the total cost
from unary and binary cost functions. Each variable has a finite domain, and
cost functions assign positive integer costs to variable assignments.
Key challenges: managing variable domains, cost function interactions, and finding
assignments that avoid high-cost (or forbidden) combinations.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the WCSP instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph with variables and cost functions.
    - Variables (type 0): Decision points with domain-based weights
    - Unary cost functions (type 1): Constraints on single variables
    - Binary cost functions (type 1): Constraints on pairs of variables
    - Edge weights reflect cost function complexity and variable participation
    
    Key entities:
    - Variables: The decision points, weighted by domain size
    - Cost functions: Constraints that penalize certain assignments
    - Relationships: Variable participation in cost functions
    """
    
    # Extract problem data
    num_variables = json_data.get('num_variables', 0)
    domains = json_data.get('domains', [])
    top = json_data.get('top', 1)  # Maximum cost (forbidden assignments)
    
    # Unary cost functions
    num_constraints1 = json_data.get('num_constraints1', 0)
    func1x = json_data.get('func1x', [])
    num_tuples1 = json_data.get('num_tuples1', [])
    costs1 = json_data.get('costs1', [])
    cum_tuples1 = json_data.get('cum_tuples1', [])
    
    # Binary cost functions  
    num_constraints2 = json_data.get('num_constraints2', 0)
    func2x = json_data.get('func2x', [])
    func2y = json_data.get('func2y', [])
    num_tuples2 = json_data.get('num_tuples2', [])
    costs2 = json_data.get('costs2', [])
    cum_tuples2 = json_data.get('cum_tuples2', [])
    
    G = nx.Graph()
    
    # Add variable nodes (type 0)
    max_domain = max(domains) if domains else 1
    for i in range(num_variables):
        var_id = f'var_{i+1}'  # Variables are 1-indexed in the model
        domain_size = domains[i] if i < len(domains) else 1
        
        # Weight by normalized domain size (larger domains = more choices = potentially easier)
        # Use inverse log to give higher weight to variables with smaller domains (more constrained)
        if domain_size > 1:
            domain_weight = 1.0 - math.log(domain_size) / math.log(max_domain)
        else:
            domain_weight = 1.0
            
        G.add_node(var_id, type=0, weight=domain_weight)
    
    # Add unary cost function nodes (type 1)
    for j in range(num_constraints1):
        func_id = f'unary_{j}'
        var_idx = func1x[j] if j < len(func1x) else 1
        num_tuples = num_tuples1[j] if j < len(num_tuples1) else 1
        
        # Calculate cost function tightness
        # Extract costs for this function
        start_idx = cum_tuples1[j] * 2 if j < len(cum_tuples1) else 0
        end_idx = start_idx + num_tuples * 2
        func_costs = costs1[start_idx:end_idx:2] if start_idx < len(costs1) else [0]
        
        # Tightness based on proportion of high-cost assignments
        if func_costs:
            forbidden_count = sum(1 for cost in func_costs if cost >= top)
            high_cost_count = sum(1 for cost in func_costs if cost > 0)
            tightness = (forbidden_count * 2 + high_cost_count) / (len(func_costs) * 3)
        else:
            tightness = 0.5
            
        G.add_node(func_id, type=1, weight=min(tightness, 1.0))
        
        # Add edge from variable to unary function
        if var_idx <= num_variables:
            participation_weight = min(num_tuples / max(domains), 1.0) if domains else 0.5
            G.add_edge(f'var_{var_idx}', func_id, weight=participation_weight)
    
    # Add binary cost function nodes (type 1)
    for j in range(num_constraints2):
        func_id = f'binary_{j}'
        var_x = func2x[j] if j < len(func2x) else 1
        var_y = func2y[j] if j < len(func2y) else 1
        num_tuples = num_tuples2[j] if j < len(num_tuples2) else 1
        
        # Calculate cost function tightness for binary constraints
        start_idx = cum_tuples2[j] * 3 if j < len(cum_tuples2) else 0
        end_idx = start_idx + num_tuples * 3
        func_costs = costs2[start_idx:end_idx:3] if start_idx < len(costs2) else [0]
        
        # Binary constraints are typically more complex
        if func_costs:
            forbidden_count = sum(1 for cost in func_costs if cost >= top)
            high_cost_count = sum(1 for cost in func_costs if cost > 0)
            # Weight binary constraints higher due to their complexity
            tightness = (forbidden_count * 3 + high_cost_count * 2) / (len(func_costs) * 4)
        else:
            tightness = 0.7  # Default higher for binary
            
        G.add_node(func_id, type=1, weight=min(tightness, 1.0))
        
        # Add edges from both variables to binary function
        if var_x <= num_variables:
            # Weight by scope complexity (binary constraints affect two variables)
            scope_weight = min(math.sqrt(num_tuples) / 10.0, 1.0)
            G.add_edge(f'var_{var_x}', func_id, weight=scope_weight)
            
        if var_y <= num_variables and var_y != var_x:
            scope_weight = min(math.sqrt(num_tuples) / 10.0, 1.0)
            G.add_edge(f'var_{var_y}', func_id, weight=scope_weight)
    
    # Add conflict edges between variables that share high-cost binary constraints
    for j in range(num_constraints2):
        var_x = func2x[j] if j < len(func2x) else 1
        var_y = func2y[j] if j < len(func2y) else 1
        
        if var_x <= num_variables and var_y <= num_variables and var_x != var_y:
            # Extract costs for this binary function
            start_idx = cum_tuples2[j] * 3 if j < len(cum_tuples2) else 0
            end_idx = start_idx + num_tuples2[j] * 3 if j < len(num_tuples2) else start_idx
            func_costs = costs2[start_idx:end_idx:3] if start_idx < len(costs2) else [0]
            
            # Add conflict edge if many assignments are forbidden or high-cost
            if func_costs:
                forbidden_count = sum(1 for cost in func_costs if cost >= top)
                conflict_ratio = forbidden_count / len(func_costs)
                
                if conflict_ratio > 0.3:  # Significant conflict
                    # Use exponential decay for conflict strength
                    conflict_weight = 1.0 - math.exp(-3.0 * conflict_ratio)
                    G.add_edge(f'var_{var_x}', f'var_{var_y}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()