#!/usr/bin/env python3
"""
Graph converter for CCMCP (Cardinality-constrained Multi-cycle Problem) problem.
Created using subagent_prompt.md version: v_02

This problem is about finding maximum-weight cycles in a directed graph with length constraints.
It models kidney exchange where vertices are patients and edges represent compatibility with weights.
Key challenges: cycle length limitation (K), negative edge weights (infeasible exchanges), sparse connectivity.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the CCMCP problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model as bipartite graph with vertices and cycle-length constraints.
    - Variable nodes (type 0): vertices in the exchange graph
    - Constraint nodes (type 1): cycle length constraints and compatibility constraints
    - Edges represent participation in constraints and potential exchanges
    
    The main complexity comes from:
    1. Sparse connectivity (many -1 edges indicating incompatibility)
    2. Maximum cycle length constraint K
    3. Objective maximization with heterogeneous edge weights
    """
    # Access data directly from json_data dict
    V = json_data.get('V', 0)  # Number of vertices
    K = json_data.get('K', 2)  # Maximum cycle length
    edge_weight = json_data.get('edge_weight', [])
    
    # Reshape flat edge_weight array into V x V matrix
    weight_matrix = []
    for i in range(V):
        row = []
        for j in range(V):
            idx = i * V + j
            if idx < len(edge_weight):
                row.append(edge_weight[idx])
            else:
                row.append(-1)
        weight_matrix.append(row)
    
    G = nx.Graph()
    
    # Calculate graph statistics for weight normalization
    valid_weights = [w for row in weight_matrix for w in row if w > 0]
    max_weight = max(valid_weights) if valid_weights else 1
    avg_weight = sum(valid_weights) / len(valid_weights) if valid_weights else 0
    
    # Calculate connectivity statistics for each vertex
    out_degrees = []
    in_degrees = []
    for i in range(V):
        out_deg = sum(1 for j in range(V) if weight_matrix[i][j] > 0)
        in_deg = sum(1 for j in range(V) if weight_matrix[j][i] > 0)
        out_degrees.append(out_deg)
        in_degrees.append(in_deg)
    
    max_out_degree = max(out_degrees) if out_degrees else 1
    max_in_degree = max(in_degrees) if in_degrees else 1
    
    # Variable nodes (type 0): vertices in the exchange graph
    for i in range(V):
        # Weight based on connectivity and potential for high-value exchanges
        connectivity = (out_degrees[i] + in_degrees[i]) / (max_out_degree + max_in_degree)
        
        # Calculate value potential (sum of positive outgoing edge weights)
        value_potential = sum(max(0, weight_matrix[i][j]) for j in range(V))
        norm_value = value_potential / max_weight if max_weight > 0 else 0
        
        # Combine connectivity and value potential with non-linear weighting
        # Use sqrt to emphasize high-connectivity vertices less linearly
        node_weight = 0.3 * math.sqrt(connectivity) + 0.7 * min(norm_value, 1.0)
        node_weight = max(min(node_weight, 1.0), 0.1)  # Clamp to [0.1, 1.0]
        
        G.add_node(f'vertex_{i}', type=0, weight=node_weight)
    
    # Constraint nodes (type 1): Cycle length constraint
    # Create constraint nodes for different cycle lengths up to K
    for cycle_len in range(2, K + 1):
        # Weight by how restrictive this constraint is
        # Shorter cycles are more restrictive (fewer options)
        restrictiveness = 1.0 - (cycle_len - 2) / (K - 1) if K > 2 else 1.0
        G.add_node(f'cycle_len_{cycle_len}', type=1, weight=restrictiveness)
    
    # Constraint nodes for edge compatibility constraints
    # Create constraint nodes for groups of high-weight edges
    if valid_weights:
        high_weight_threshold = max(avg_weight, max_weight * 0.7)
        constraint_id = 0
        
        for i in range(V):
            high_value_edges = []
            for j in range(V):
                if weight_matrix[i][j] >= high_weight_threshold:
                    high_value_edges.append((j, weight_matrix[i][j]))
            
            if len(high_value_edges) > 1:  # Only create constraint if multiple edges
                # Weight by the potential conflict/competition for this vertex
                total_weight = sum(w for _, w in high_value_edges)
                constraint_weight = min(total_weight / (max_weight * len(high_value_edges)), 1.0)
                
                G.add_node(f'high_value_constraint_{constraint_id}', type=1, weight=constraint_weight)
                
                # Connect all high-value target vertices to this constraint
                for target_vertex, _ in high_value_edges:
                    # Weight by the edge weight normalized
                    edge_w = weight_matrix[i][target_vertex] / max_weight
                    G.add_edge(f'vertex_{target_vertex}', f'high_value_constraint_{constraint_id}', 
                             weight=edge_w)
                
                constraint_id += 1
    
    # Add edges for cycle length participation
    # Each vertex participates in all possible cycle length constraints
    for i in range(V):
        for cycle_len in range(2, K + 1):
            # Weight by how likely this vertex is to be in cycles of this length
            # Vertices with higher connectivity are more likely to form longer cycles
            connectivity_score = (out_degrees[i] + in_degrees[i]) / (max_out_degree + max_in_degree)
            
            # Exponential decay for longer cycles (harder to form)
            length_penalty = math.exp(-0.5 * (cycle_len - 2))
            participation_weight = connectivity_score * length_penalty
            
            if participation_weight > 0.1:  # Only add meaningful edges
                G.add_edge(f'vertex_{i}', f'cycle_len_{cycle_len}', 
                         weight=min(participation_weight, 1.0))
    
    # Add direct competition edges between vertices for high-value opportunities
    # This captures the competitive nature of the assignment problem
    for i in range(V):
        for j in range(i + 1, V):
            # Check if they compete for the same high-value targets
            common_targets = []
            for k in range(V):
                if (weight_matrix[i][k] > 0 and weight_matrix[j][k] > 0 and 
                    weight_matrix[i][k] >= avg_weight and weight_matrix[j][k] >= avg_weight):
                    common_targets.append(k)
            
            if len(common_targets) >= 2:  # Significant competition
                # Weight by the intensity of competition
                competition_intensity = 0
                for target in common_targets:
                    w1 = weight_matrix[i][target] / max_weight
                    w2 = weight_matrix[j][target] / max_weight
                    competition_intensity += min(w1, w2)  # Overlap in capability
                
                if competition_intensity > 0.3:
                    G.add_edge(f'vertex_{i}', f'vertex_{j}', 
                             weight=min(competition_intensity / len(common_targets), 1.0))
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()