#!/usr/bin/env python3
"""
Graph converter for shortest_path problem.
Created using subagent_prompt.md version: v_02

This problem is about finding the shortest path between two nodes in a directed graph.
Key challenges: Graph connectivity, path feasibility, edge weight distribution, 
and the distance between start and end nodes affecting solution complexity.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the shortest path problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create bipartite graph with:
    - Node variables (type 0): Graph nodes weighted by centrality and distance to start/end
    - Edge variables (type 0): Graph edges weighted by normalized edge length 
    - Flow constraints (type 1): One per node, weighted by constraint tightness
    - Path connectivity analyzed through graph structure
    """
    # Access data from json_data dict
    N = json_data.get('N', 0)  # number of nodes
    M = json_data.get('M', 0)  # number of edges
    Start = json_data.get('Start', 1) - 1  # Convert to 0-indexed
    End = json_data.get('End', N) - 1      # Convert to 0-indexed
    Edge_Start = json_data.get('Edge_Start', [])
    Edge_End = json_data.get('Edge_End', [])
    L = json_data.get('L', [])  # edge lengths
    
    # Create graph
    G = nx.Graph()
    
    # Calculate maximum edge length for normalization
    max_length = max(L) if L else 1
    min_length = min(L) if L else 1
    
    # Calculate in-degree and out-degree for each node to assess centrality
    in_degree = [0] * N
    out_degree = [0] * N
    for i in range(M):
        if i < len(Edge_Start) and i < len(Edge_End):
            start_idx = Edge_Start[i] - 1  # Convert to 0-indexed
            end_idx = Edge_End[i] - 1
            if 0 <= start_idx < N and 0 <= end_idx < N:
                out_degree[start_idx] += 1
                in_degree[end_idx] += 1
    
    max_degree = max(max(in_degree), max(out_degree)) if N > 0 else 1
    
    # Add node variables with weights based on centrality and proximity to start/end
    for i in range(N):
        # Base centrality from connectivity
        centrality = (in_degree[i] + out_degree[i]) / (2 * max_degree) if max_degree > 0 else 0.5
        
        # Distance-based importance (nodes closer to start/end are more critical)
        distance_to_start = abs(i - Start) / max(N - 1, 1) if N > 1 else 0
        distance_to_end = abs(i - End) / max(N - 1, 1) if N > 1 else 0
        proximity_importance = 1.0 - min(distance_to_start, distance_to_end)
        
        # Special weighting for start and end nodes
        if i == Start or i == End:
            weight = 1.0  # Maximum importance
        else:
            # Combine centrality and proximity with non-linear scaling
            weight = 0.3 + 0.4 * centrality + 0.3 * math.exp(-2.0 * min(distance_to_start, distance_to_end))
        
        G.add_node(f'node_{i}', type=0, weight=min(weight, 1.0))
    
    # Add edge variables with weights based on normalized length (shorter = more important)
    for i in range(M):
        if i < len(Edge_Start) and i < len(Edge_End) and i < len(L):
            start_idx = Edge_Start[i] - 1
            end_idx = Edge_End[i] - 1
            length = L[i]
            
            if 0 <= start_idx < N and 0 <= end_idx < N:
                # Weight edges inversely to their length (shorter edges more likely to be used)
                normalized_length = (length - min_length) / max(max_length - min_length, 1)
                edge_weight = 1.0 - normalized_length  # Invert so shorter edges have higher weight
                
                G.add_node(f'edge_{i}', type=0, weight=edge_weight)
    
    # Add flow constraint nodes (one per graph node)
    for i in range(N):
        # Weight constraints by their tightness and role in the flow
        if i == Start:
            # Source constraint: must have net outflow of 1
            constraint_tightness = 1.0  # Always tight
        elif i == End:
            # Sink constraint: must have net inflow of 1  
            constraint_tightness = 1.0  # Always tight
        else:
            # Intermediate nodes: flow conservation (inflow = outflow)
            # Tightness depends on connectivity - more connected nodes have tighter constraints
            node_connectivity = (in_degree[i] + out_degree[i]) / max(max_degree, 1)
            constraint_tightness = 0.5 + 0.5 * node_connectivity
        
        # Ensure weight is in [0,1] range
        constraint_tightness = min(constraint_tightness, 1.0)
        G.add_node(f'flow_constraint_{i}', type=1, weight=constraint_tightness)
    
    # Add bipartite edges: connect nodes to their flow constraints
    for i in range(N):
        G.add_edge(f'node_{i}', f'flow_constraint_{i}', weight=1.0)
    
    # Connect edge variables to the flow constraints of their endpoints
    for i in range(M):
        if i < len(Edge_Start) and i < len(Edge_End) and i < len(L):
            start_idx = Edge_Start[i] - 1
            end_idx = Edge_End[i] - 1
            
            if 0 <= start_idx < N and 0 <= end_idx < N:
                edge_node = f'edge_{i}'
                
                # Edge contributes to outflow of start node
                if f'flow_constraint_{start_idx}' in G.nodes():
                    G.add_edge(edge_node, f'flow_constraint_{start_idx}', weight=0.8)
                
                # Edge contributes to inflow of end node  
                if f'flow_constraint_{end_idx}' in G.nodes():
                    G.add_edge(edge_node, f'flow_constraint_{end_idx}', weight=0.8)
    
    # Add edges between nodes that are connected by graph edges (structural relationships)
    for i in range(M):
        if i < len(Edge_Start) and i < len(Edge_End) and i < len(L):
            start_idx = Edge_Start[i] - 1
            end_idx = Edge_End[i] - 1
            length = L[i]
            
            if 0 <= start_idx < N and 0 <= end_idx < N:
                # Weight based on inverse of edge length (shorter connections are stronger)
                connection_strength = math.exp(-2.0 * length / max_length)
                G.add_edge(f'node_{start_idx}', f'node_{end_idx}', weight=connection_strength)
    
    # Add critical path analysis: connect start and end with special weighting
    if Start != End and N > 0:
        # Direct connection between start and end (if exists) gets maximum weight
        start_node = f'node_{Start}'
        end_node = f'node_{End}'
        if not G.has_edge(start_node, end_node):
            # Estimate path difficulty based on graph diameter approximation
            path_difficulty = 1.0 - math.exp(-abs(End - Start) / max(N, 1))
            G.add_edge(start_node, end_node, weight=path_difficulty)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()