#!/usr/bin/env python3
"""
Graph converter for CELAR Radio Link Frequency Assignment problem.
Created using subagent_prompt.md version: v_02

This problem is about assigning frequencies to communication links while avoiding interference.
Key challenges: Hard equality constraints require exact differences, soft constraints penalize violations with different weights, domain restrictions limit frequency choices.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the CELAR frequency assignment instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model the radio link frequency assignment problem structure
    - Variables (type 0): Radio links needing frequency assignment, weighted by domain constraint tightness
    - Hard constraint nodes (type 1): Exact frequency difference requirements, weighted by constraint criticality
    - Soft constraint nodes (type 1): Interference avoidance constraints, weighted by violation cost
    - Create bipartite structure connecting variables to their constraints
    """
    # Extract problem data
    num_variables = json_data.get('num_variables', 0)
    num_hardconstraints = json_data.get('num_hardconstraints', 0)
    num_softconstraints = json_data.get('num_softconstraints', 0)
    
    costs = json_data.get('costs', [1, 1, 1, 1])
    domains = json_data.get('domains', [])
    num_categories = json_data.get('num_categories', 1)
    
    # Hard constraint data
    hardctrx = json_data.get('hardctrx', [])
    hardctry = json_data.get('hardctry', [])
    hardctrk = json_data.get('hardctrk', [])
    
    # Soft constraint data
    softctrx = json_data.get('softctrx', [])
    softctry = json_data.get('softctry', [])
    softctrk = json_data.get('softctrk', [])
    softctrw = json_data.get('softctrw', [])
    
    G = nx.Graph()
    
    # Variable nodes (radio links) - weighted by domain tightness and connectivity
    domain_tightness = {}
    for category in range(1, num_categories + 1):
        # Count how many variables use each domain category
        count = sum(1 for d in domains if d == category)
        domain_tightness[category] = min(count / max(num_variables, 1), 1.0)
    
    for i in range(1, num_variables + 1):
        # Weight by domain constraint and centrality in constraint network
        domain_category = domains[i-1] if i-1 < len(domains) else 1
        domain_weight = domain_tightness.get(domain_category, 0.5)
        
        # Count how many constraints this variable participates in
        hard_participation = sum(1 for j in range(len(hardctrx)) if hardctrx[j] == i or hardctry[j] == i)
        soft_participation = sum(1 for j in range(len(softctrx)) if softctrx[j] == i or softctry[j] == i)
        total_participation = hard_participation + soft_participation
        
        # Combine domain tightness with constraint participation using non-linear scaling
        participation_weight = math.tanh(total_participation / max(num_variables / 10, 1))
        
        # Final weight combines domain constraint and connectivity
        weight = 0.3 * domain_weight + 0.7 * participation_weight
        
        G.add_node(f'var_{i}', type=0, weight=min(weight, 1.0))
    
    # Hard constraint nodes - weighted by criticality (all must be satisfied)
    max_hard_k = max(hardctrk) if hardctrk else 1
    for j in range(num_hardconstraints):
        if j < len(hardctrx) and j < len(hardctry) and j < len(hardctrk):
            # Hard constraints are critical - weight by required difference magnitude
            k_value = hardctrk[j]
            # Larger required differences are potentially more constraining
            weight = 0.7 + 0.3 * (k_value / max_hard_k)
            G.add_node(f'hard_{j}', type=1, weight=min(weight, 1.0))
    
    # Soft constraint nodes - weighted by violation cost and constraint tightness
    max_cost = max(costs) if costs else 1
    max_soft_k = max(softctrk) if softctrk else 1
    for j in range(num_softconstraints):
        if j < len(softctrx) and j < len(softctry) and j < len(softctrk) and j < len(softctrw):
            # Weight by violation cost (higher cost = more important)
            cost_idx = softctrw[j] - 1 if softctrw[j] > 0 and softctrw[j] <= len(costs) else 0
            cost_weight = costs[cost_idx] / max_cost if cost_idx < len(costs) else 0.5
            
            # Weight by constraint threshold (smaller k = tighter constraint)
            k_value = softctrk[j]
            tightness_weight = 1.0 - (k_value / max_soft_k) if max_soft_k > 0 else 0.5
            
            # Combine using exponential scaling for cost importance
            weight = 0.6 * math.tanh(cost_weight * 3.0) + 0.4 * tightness_weight
            
            G.add_node(f'soft_{j}', type=1, weight=min(weight, 1.0))
    
    # Add bipartite edges: variables to their constraints
    
    # Hard constraint participation edges
    for j in range(num_hardconstraints):
        if j < len(hardctrx) and j < len(hardctry) and j < len(hardctrk):
            var1 = hardctrx[j]
            var2 = hardctry[j]
            k_value = hardctrk[j]
            
            # Edge weight represents constraint strength (normalized by max difference)
            edge_weight = min(0.8 + 0.2 * (k_value / max_hard_k), 1.0) if max_hard_k > 0 else 0.9
            
            if f'var_{var1}' in G and f'hard_{j}' in G:
                G.add_edge(f'var_{var1}', f'hard_{j}', weight=edge_weight)
            if f'var_{var2}' in G and f'hard_{j}' in G:
                G.add_edge(f'var_{var2}', f'hard_{j}', weight=edge_weight)
    
    # Soft constraint participation edges
    for j in range(num_softconstraints):
        if j < len(softctrx) and j < len(softctry) and j < len(softctrk) and j < len(softctrw):
            var1 = softctrx[j]
            var2 = softctry[j]
            k_value = softctrk[j]
            cost_idx = softctrw[j] - 1 if softctrw[j] > 0 and softctrw[j] <= len(costs) else 0
            
            # Edge weight combines constraint tightness and cost
            tightness = 1.0 - (k_value / max_soft_k) if max_soft_k > 0 else 0.5
            cost_factor = costs[cost_idx] / max_cost if cost_idx < len(costs) else 0.5
            
            # Use non-linear combination emphasizing high-cost tight constraints
            edge_weight = min(0.3 + 0.4 * tightness + 0.3 * math.sqrt(cost_factor), 1.0)
            
            if f'var_{var1}' in G and f'soft_{j}' in G:
                G.add_edge(f'var_{var1}', f'soft_{j}', weight=edge_weight)
            if f'var_{var2}' in G and f'soft_{j}' in G:
                G.add_edge(f'var_{var2}', f'soft_{j}', weight=edge_weight)
    
    # Add conflict edges between highly constrained variables that share many soft constraints
    if num_variables > 1:
        # Find pairs of variables that share many high-cost soft constraints
        var_constraint_map = {}
        for j in range(num_softconstraints):
            if j < len(softctrx) and j < len(softctry) and j < len(softctrw):
                var1 = softctrx[j]
                var2 = softctry[j]
                cost_idx = softctrw[j] - 1 if softctrw[j] > 0 and softctrw[j] <= len(costs) else 0
                cost = costs[cost_idx] if cost_idx < len(costs) else 1
                
                if var1 not in var_constraint_map:
                    var_constraint_map[var1] = []
                if var2 not in var_constraint_map:
                    var_constraint_map[var2] = []
                
                var_constraint_map[var1].append((var2, cost, j))
                var_constraint_map[var2].append((var1, cost, j))
        
        # Add conflict edges for variables with high shared constraint costs
        conflict_threshold = max_cost * 0.7  # Only consider high-cost constraints
        for var1 in range(1, min(num_variables + 1, 20)):  # Limit to avoid too many edges
            if var1 in var_constraint_map:
                shared_costs = {}
                for var2, cost, _ in var_constraint_map[var1]:
                    if cost >= conflict_threshold:
                        shared_costs[var2] = shared_costs.get(var2, 0) + cost
                
                # Add edges to variables with high shared constraint costs
                for var2, total_cost in shared_costs.items():
                    if var2 > var1 and total_cost >= conflict_threshold * 2:  # Strong conflict
                        if f'var_{var1}' in G and f'var_{var2}' in G:
                            conflict_weight = min(total_cost / (max_cost * 3), 1.0)
                            G.add_edge(f'var_{var1}', f'var_{var2}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()