#!/usr/bin/env python3
"""
Graph converter for road_naive problem.
Created using subagent_prompt.md version: v_02

This problem is about optimal road construction under budget constraints.
The goal is to decide which roads to build to minimize total shortest path distances
while staying within a construction budget.

Key challenges: 
- Budget constraint creates trade-offs between cost and benefit
- Need to consider both direct distances and shortest path improvements
- Network connectivity affects overall travel efficiency
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the road construction problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model this as a bipartite graph with explicit constraint nodes
    - Type 0 nodes: Potential road segments (decision variables)
    - Type 1 nodes: Budget constraint and connectivity requirements  
    - Type 2 nodes: Cities/locations (resources being connected)
    - Edges connect roads to constraints they participate in
    - Weights reflect cost efficiency and connectivity importance
    """
    # Access data from json_data
    n = json_data.get('n', 0)
    budget = json_data.get('budget', 1)
    distance_flat = json_data.get('distance', [])
    cost_flat = json_data.get('cost', [])
    
    # Convert flattened arrays back to n×n matrices
    distance = [[distance_flat[i*n + j] for j in range(n)] for i in range(n)]
    cost = [[cost_flat[i*n + j] for j in range(n)] for i in range(n)]
    
    G = nx.Graph()
    
    # Calculate some global metrics for normalization
    total_cost = sum(cost[i][j] for i in range(n) for j in range(n) if i < j and cost[i][j] > 0)
    max_distance = max(distance[i][j] for i in range(n) for j in range(n) if distance[i][j] > 0)
    max_cost = max(cost[i][j] for i in range(n) for j in range(n) if cost[i][j] > 0)
    
    # Type 2 nodes: Cities/locations (resources being connected)
    for i in range(n):
        # Weight cities by their centrality in the distance network
        total_distance_from_i = sum(distance[i][j] for j in range(n) if i != j)
        centrality = 1.0 - (total_distance_from_i / (max_distance * (n-1))) if max_distance > 0 else 0.5
        G.add_node(f'city_{i}', type=2, weight=centrality)
    
    # Type 0 nodes: Potential road segments (decision variables)
    road_segments = []
    for i in range(n):
        for j in range(i+1, n):  # Only upper triangle
            if cost[i][j] > 0:  # Valid road segment
                # Calculate cost efficiency: benefit per unit cost
                benefit = distance[i][j] / max_distance if max_distance > 0 else 0.5
                cost_ratio = cost[i][j] / max_cost if max_cost > 0 else 0.5
                
                # Use non-linear weighting: high benefit/cost ratio gets higher weight
                efficiency = benefit / cost_ratio if cost_ratio > 0 else 0.5
                weight = 1.0 - math.exp(-2.0 * efficiency)  # Exponential scaling
                
                road_id = f'road_{i}_{j}'
                G.add_node(road_id, type=0, weight=min(weight, 1.0))
                road_segments.append((i, j, road_id, cost[i][j], distance[i][j]))
    
    # Type 1 constraint nodes
    
    # 1. Global budget constraint
    budget_tightness = min(total_cost / budget, 2.0) / 2.0 if budget > 0 else 1.0
    G.add_node('budget_constraint', type=1, weight=budget_tightness)
    
    # 2. City connectivity constraints (one per city pair)
    # These represent the need to connect city pairs efficiently
    for i in range(n):
        for j in range(i+1, n):
            # Weight by the importance of connecting these specific cities
            direct_distance = distance[i][j]
            connectivity_importance = direct_distance / max_distance if max_distance > 0 else 0.5
            
            # Cities that are far apart benefit more from direct connections
            importance_weight = math.sqrt(connectivity_importance)
            constraint_id = f'connect_{i}_{j}'
            G.add_node(constraint_id, type=1, weight=importance_weight)
    
    # Add bipartite edges: road segments to constraints
    
    # Roads participate in budget constraint
    for i, j, road_id, road_cost, road_dist in road_segments:
        # Edge weight reflects how much this road contributes to budget pressure
        budget_impact = road_cost / budget if budget > 0 else 0.5
        G.add_edge(road_id, 'budget_constraint', weight=min(budget_impact, 1.0))
        
        # Roads directly serve the connectivity constraint for their endpoints
        G.add_edge(road_id, f'connect_{i}_{j}', weight=1.0)
        
        # Roads also connect to the cities they serve
        G.add_edge(road_id, f'city_{i}', weight=0.8)
        G.add_edge(road_id, f'city_{j}', weight=0.8)
    
    # Add conflict edges between expensive road combinations
    # Roads that together would exceed budget create conflicts
    expensive_roads = [(road_id, road_cost) for i, j, road_id, road_cost, road_dist in road_segments 
                       if road_cost > budget * 0.3]  # Roads costing >30% of budget
    
    for idx1 in range(len(expensive_roads)):
        for idx2 in range(idx1+1, len(expensive_roads)):
            road1_id, cost1 = expensive_roads[idx1]
            road2_id, cost2 = expensive_roads[idx2]
            
            if cost1 + cost2 > budget:  # These roads can't both be built
                conflict_strength = (cost1 + cost2) / budget - 1.0
                G.add_edge(road1_id, road2_id, weight=min(conflict_strength, 1.0))
    
    # Add city-to-city connectivity edges for network analysis
    # These help capture the underlying geographic structure
    for i in range(n):
        for j in range(i+1, n):
            if distance[i][j] > 0:
                # Closer cities have stronger connection weights
                proximity = 1.0 - distance[i][j] / max_distance if max_distance > 0 else 0.5
                # Use exponential decay for distance relationships
                weight = math.exp(-3.0 * (1.0 - proximity))
                G.add_edge(f'city_{i}', f'city_{j}', weight=weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()