#!/usr/bin/env python3
"""
Graph converter for Capacitated Vehicle Routing Problem (CVRP).
Created using subagent_prompt.md version: v_02

The CVRP is about finding optimal routes for a fleet of vehicles to serve customers
from a central depot while respecting vehicle capacity constraints and minimizing travel cost.

Key challenges: 
- Balancing routes to avoid overloading vehicles
- Minimizing total travel distance
- Determining which customers to group together
- Managing the trade-off between number of vehicles and route efficiency
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the CVRP instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph with customers as variable nodes and 
    capacity/routing constraints as constraint nodes. Model geographic proximity
    and capacity conflicts to capture routing difficulty.
    
    - Customer nodes (type 0): Weight by demand/capacity ratio
    - Capacity constraint node (type 1): Weight by total demand vs total capacity
    - Geographic proximity constraints (type 1): Weight by local density
    - Vehicle route constraints (type 1): Model vehicle assignment conflicts
    """
    # Extract data
    n = json_data.get('N', 0)  # number of customers (depot is separate)
    capacity = json_data.get('Capacity', 1)
    demand = json_data.get('Demand', [])
    distance_flat = json_data.get('Distance', [])
    
    # Reconstruct distance matrix from flattened array
    # Matrix is (N+1) x (N+1) where index 0 is depot, 1..N are customers
    matrix_size = n + 1
    distance_matrix = []
    for i in range(matrix_size):
        row = []
        for j in range(matrix_size):
            idx = i * matrix_size + j
            if idx < len(distance_flat):
                row.append(distance_flat[idx])
            else:
                row.append(0)
        distance_matrix.append(row)
    
    G = nx.Graph()
    
    # Calculate useful metrics
    total_demand = sum(demand)
    max_demand = max(demand) if demand else 1
    max_distance = max(distance_flat) if distance_flat else 1
    min_vehicles_needed = math.ceil(total_demand / capacity) if capacity > 0 else n
    
    # Add customer nodes (type 0) - weighted by demand pressure
    for i in range(n):
        customer_demand = demand[i] if i < len(demand) else 0
        # Weight by how much of vehicle capacity this customer consumes
        demand_pressure = customer_demand / capacity if capacity > 0 else 0.5
        # Also factor in isolation (customers far from depot are harder to serve)
        depot_distance = distance_matrix[0][i+1] if len(distance_matrix) > i+1 else 1
        isolation_factor = math.exp(-2.0 * depot_distance / max_distance) if max_distance > 0 else 0.5
        
        node_weight = min(demand_pressure + (1 - isolation_factor) * 0.3, 1.0)
        G.add_node(f'customer_{i}', type=0, weight=node_weight)
    
    # Add global capacity constraint (type 1) - how tight is capacity overall
    capacity_tightness = 1.0 - (n * capacity) / (total_demand * min_vehicles_needed) if total_demand > 0 else 0.5
    capacity_tightness = max(0.1, min(capacity_tightness, 1.0))
    G.add_node('capacity_constraint', type=1, weight=capacity_tightness)
    
    # Connect all customers to capacity constraint with demand-based weights
    for i in range(n):
        customer_demand = demand[i] if i < len(demand) else 0
        edge_weight = customer_demand / capacity if capacity > 0 else 0.5
        edge_weight = min(edge_weight, 1.0)
        G.add_edge(f'customer_{i}', 'capacity_constraint', weight=edge_weight)
    
    # Add geographic clustering constraints - identify dense customer regions
    # This captures the routing challenge of efficiently grouping customers
    clusters_created = 0
    for center in range(n):
        if clusters_created >= n // 3:  # Limit number of cluster constraints
            break
            
        # Find customers close to this center
        close_customers = []
        center_pos = center + 1  # +1 because depot is index 0
        
        for other in range(n):
            if other == center:
                continue
            other_pos = other + 1
            dist = distance_matrix[center_pos][other_pos] if center_pos < len(distance_matrix) and other_pos < len(distance_matrix[center_pos]) else max_distance
            
            # If distance is less than 25% of max distance, consider it close
            if dist < max_distance * 0.25:
                close_customers.append(other)
        
        # Only create cluster constraint if there are enough close customers
        if len(close_customers) >= 2:
            cluster_demand = sum(demand[i] if i < len(demand) else 0 for i in close_customers + [center])
            # Weight by how much this cluster exceeds a single vehicle capacity
            cluster_pressure = cluster_demand / capacity if capacity > 0 else len(close_customers) / n
            cluster_weight = min(cluster_pressure * 0.7, 1.0)
            
            cluster_node = f'cluster_{clusters_created}'
            G.add_node(cluster_node, type=1, weight=cluster_weight)
            
            # Connect cluster customers to this constraint
            G.add_edge(f'customer_{center}', cluster_node, weight=0.8)
            for other in close_customers:
                G.add_edge(f'customer_{other}', cluster_node, weight=0.7)
            
            clusters_created += 1
    
    # Add conflict edges between high-demand customers that can't fit in one vehicle
    high_demand_customers = []
    for i in range(n):
        customer_demand = demand[i] if i < len(demand) else 0
        if customer_demand > capacity * 0.6:  # High-demand customers
            high_demand_customers.append((i, customer_demand))
    
    # Add conflicts between customers whose combined demand exceeds capacity
    for i in range(len(high_demand_customers)):
        for j in range(i + 1, len(high_demand_customers)):
            idx1, demand1 = high_demand_customers[i]
            idx2, demand2 = high_demand_customers[j]
            
            if demand1 + demand2 > capacity:
                # Conflict weight based on how much they exceed capacity
                excess_ratio = (demand1 + demand2) / capacity - 1.0
                conflict_weight = min(excess_ratio, 1.0)
                G.add_edge(f'customer_{idx1}', f'customer_{idx2}', weight=conflict_weight)
    
    # Add distance-based routing constraints for distant customer pairs
    # These represent the challenge of serving geographically dispersed customers
    distant_pairs = []
    for i in range(n):
        for j in range(i + 1, n):
            i_pos, j_pos = i + 1, j + 1
            if i_pos < len(distance_matrix) and j_pos < len(distance_matrix[i_pos]):
                dist = distance_matrix[i_pos][j_pos]
                if dist > max_distance * 0.7:  # Far apart customers
                    distant_pairs.append((i, j, dist))
    
    # Create constraints for managing distant customer pairs
    distant_pairs.sort(key=lambda x: x[2], reverse=True)  # Sort by distance
    for idx, (i, j, dist) in enumerate(distant_pairs[:min(5, len(distant_pairs))]):  # Top 5 most distant pairs
        # Weight by normalized distance
        distance_weight = min(dist / max_distance, 1.0)
        constraint_node = f'distance_constraint_{idx}'
        G.add_node(constraint_node, type=1, weight=distance_weight)
        
        G.add_edge(f'customer_{i}', constraint_node, weight=0.9)
        G.add_edge(f'customer_{j}', constraint_node, weight=0.9)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()