#!/usr/bin/env python3
"""
Graph converter for road_naive problem.
Created using subagent_prompt.md version: v_02

This problem is about road construction optimization with budget constraints.
Given n locations with distances and construction costs between them, the goal is to
select which roads to build to minimize total shortest path distances while staying within budget.

Key challenges: 
- Budget constraint makes this harder than simple MST
- Trade-off between connectivity and total distance minimization
- Expensive but short roads vs cheap but long roads
- Network topology affects shortest path calculations
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the road construction problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model as bipartite graph with location and road/constraint nodes
    - Location nodes (type 0): Cities/locations that need connectivity
    - Potential road nodes (type 1): Each possible road connection  
    - Budget constraint node (type 1): Global budget constraint
    - Edges represent: locations connected by roads, roads consuming budget
    
    Key insights for difficulty:
    - High cost/distance ratio roads are harder decisions
    - Locations with few cheap connections are bottlenecks
    - Tight budget relative to minimum cost creates difficulty
    """
    n = json_data.get('n', 0)
    budget = json_data.get('budget', 0)
    distance_flat = json_data.get('distance', [])
    cost_flat = json_data.get('cost', [])
    
    # Convert flat arrays to 2D matrices
    distance = []
    cost = []
    for i in range(n):
        distance.append(distance_flat[i*n:(i+1)*n])
        cost.append(cost_flat[i*n:(i+1)*n])
    
    G = nx.Graph()
    
    # Location nodes (type 0) - weight by connectivity potential and centrality
    max_connections = n - 1  # Maximum possible connections per location
    for i in range(n):
        # Calculate potential connectivity: number of affordable roads from this location
        affordable_connections = sum(1 for j in range(n) if i != j and cost[i][j] <= budget)
        connectivity_ratio = affordable_connections / max_connections if max_connections > 0 else 0
        
        # Weight by connectivity potential (higher = more options = easier)
        # Use inverse to make constrained locations have higher weight
        weight = 1.0 - connectivity_ratio * 0.7  # Keep weights in reasonable range
        G.add_node(f'location_{i}', type=0, weight=weight)
    
    # Potential road nodes (type 1) - one for each possible road connection
    total_cost = sum(cost[i][j] for i in range(n) for j in range(i+1, n))
    max_distance = max(distance[i][j] for i in range(n) for j in range(n) if i != j) if n > 1 else 1
    max_cost = max(cost[i][j] for i in range(n) for j in range(n) if i != j) if n > 1 else 1
    
    road_edges = []
    for i in range(n):
        for j in range(i+1, n):  # Only upper triangle to avoid duplicates
            road_cost = cost[i][j]
            road_distance = distance[i][j]
            
            # Weight by difficulty of decision: high cost/benefit ratio is harder
            # Consider both absolute cost and distance efficiency
            cost_ratio = road_cost / max_cost if max_cost > 0 else 0.5
            distance_ratio = road_distance / max_distance if max_distance > 0 else 0.5
            
            # Expensive roads with long distances are hardest decisions
            # Use exponential to make differences more pronounced
            difficulty = math.exp(cost_ratio + distance_ratio) - 1
            difficulty = min(difficulty / (math.exp(2) - 1), 1.0)  # Normalize to [0,1]
            
            G.add_node(f'road_{i}_{j}', type=1, weight=difficulty)
            road_edges.append((i, j, road_cost))
    
    # Global budget constraint node (type 1)
    # Weight by budget tightness - tighter budget = higher weight
    min_spanning_cost = 0
    if n > 1:
        # Estimate minimum cost to connect all locations (approximate MST)
        edges_by_cost = [(cost[i][j], i, j) for i in range(n) for j in range(i+1, n)]
        edges_by_cost.sort()
        
        # Simple MST approximation for budget tightness
        connected = [False] * n
        connected[0] = True
        for edge_cost, i, j in edges_by_cost:
            if connected[i] != connected[j]:
                min_spanning_cost += edge_cost
                connected[i] = connected[j] = True
                if all(connected):
                    break
    
    budget_tightness = 1.0 - (budget / max(min_spanning_cost * 2, 1))  # How tight is budget
    budget_tightness = max(0.0, min(budget_tightness, 1.0))  # Clamp to [0,1]
    G.add_node('budget_constraint', type=1, weight=budget_tightness)
    
    # Edges: locations connected to their potential roads
    for i in range(n):
        for j in range(i+1, n):
            road_node = f'road_{i}_{j}'
            road_cost = cost[i][j]
            road_distance = distance[i][j]
            
            # Edge weight represents importance of this road for these locations
            # Consider both distance efficiency and affordability
            distance_efficiency = 1.0 - (road_distance / max_distance) if max_distance > 0 else 0.5
            affordability = 1.0 - (road_cost / budget) if budget > 0 else 0.5
            edge_weight = (distance_efficiency + affordability) / 2
            edge_weight = max(0.1, min(edge_weight, 1.0))  # Keep in reasonable range
            
            G.add_edge(f'location_{i}', road_node, weight=edge_weight)
            G.add_edge(f'location_{j}', road_node, weight=edge_weight)
    
    # Edges: roads connected to budget constraint
    for i in range(n):
        for j in range(i+1, n):
            road_node = f'road_{i}_{j}'
            road_cost = cost[i][j]
            
            # Edge weight represents how much this road consumes the budget
            budget_consumption = road_cost / budget if budget > 0 else 0.5
            budget_consumption = min(budget_consumption, 1.0)
            
            G.add_edge(road_node, 'budget_constraint', weight=budget_consumption)
    
    # Add conflict edges between expensive roads that together exceed budget
    expensive_roads = [(cost[i][j], i, j) for i in range(n) for j in range(i+1, n) 
                       if cost[i][j] > budget * 0.3]  # Roads consuming >30% of budget
    expensive_roads.sort(reverse=True)  # Most expensive first
    
    for idx1 in range(len(expensive_roads)):
        for idx2 in range(idx1+1, min(len(expensive_roads), idx1+5)):  # Limit conflicts
            cost1, i1, j1 = expensive_roads[idx1]
            cost2, i2, j2 = expensive_roads[idx2]
            
            if cost1 + cost2 > budget:  # Can't afford both
                conflict_strength = (cost1 + cost2) / (budget * 2)
                conflict_strength = min(conflict_strength, 1.0)
                G.add_edge(f'road_{i1}_{j1}', f'road_{i2}_{j2}', weight=conflict_strength)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()