#!/usr/bin/env python3
"""
Graph converter for gfd-schedule2 problem.
Created using subagent_prompt.md version: v_02

This problem is about scheduling items with facilities:
- Items have kinds and must be processed by compatible facilities
- Items have production days (earliest start) and deadlines
- Items of the same kind can be grouped together
- Limited facilities and max items per day constraints
- Objective: minimize facility usage while meeting deadlines

Key challenges: temporal constraints, facility contention, grouping constraints
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the GFD scheduling problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model the multi-dimensional constraint structure
    - Items as variables (type 0) with urgency-based weights
    - Facility constraints (type 1) for resource conflicts
    - Kind constraints (type 1) for grouping requirements  
    - Temporal constraints (type 1) for deadline pressure
    - Daily capacity constraints (type 1) for throughput limits
    """
    
    # Extract data from JSON
    N = json_data.get('N', 0)
    F = json_data.get('F', 1)
    max_items_per_day = json_data.get('MaxItemsPerDay', 1)
    max_day = json_data.get('MaxDay', 50)
    kinds = json_data.get('kind', [])
    produced_days = json_data.get('producedDay', [])
    deadline_days = json_data.get('deadLineDay', [])
    
    if N == 0:
        return nx.Graph()
    
    G = nx.Graph()
    
    # Calculate urgency metrics for meaningful weights
    time_windows = []
    urgencies = []
    for i in range(N):
        prod_day = produced_days[i] if i < len(produced_days) else 1
        deadline = deadline_days[i] if i < len(deadline_days) else max_day
        time_window = deadline - prod_day
        time_windows.append(time_window)
        
        # Urgency: smaller window = more urgent (higher weight)
        urgency = 1.0 / (time_window + 1)  # +1 to avoid division by zero
        urgencies.append(urgency)
    
    max_urgency = max(urgencies) if urgencies else 1.0
    
    # Add item nodes (type 0) with urgency-based weights
    for i in range(N):
        urgency_weight = urgencies[i] / max_urgency
        G.add_node(f'item_{i}', type=0, weight=urgency_weight)
    
    # Add facility constraint nodes (type 1)
    # Each facility has limited capacity and exclusive usage requirements
    for f in range(F):
        # Weight by facility scarcity (inverse of facility count)
        facility_weight = 1.0 / F
        G.add_node(f'facility_{f}', type=1, weight=facility_weight)
    
    # Add kind constraint nodes (type 1)
    # Items of same kind must be grouped together
    unique_kinds = list(set(kinds))
    kind_counts = {}
    for kind in unique_kinds:
        kind_counts[kind] = kinds.count(kind)
    
    max_kind_count = max(kind_counts.values()) if kind_counts else 1
    
    for kind in unique_kinds:
        # Weight by kind complexity (larger groups = more complex scheduling)
        kind_size = kind_counts[kind]
        complexity_weight = kind_size / max_kind_count
        G.add_node(f'kind_{kind}', type=1, weight=complexity_weight)
    
    # Add daily capacity constraint nodes (type 1)
    # Each day has maximum throughput limit
    for day in range(1, max_day + 1):
        # Weight by relative capacity tightness
        # Days with more potential conflicts get higher weights
        items_that_could_process_this_day = 0
        for i in range(N):
            prod_day = produced_days[i] if i < len(produced_days) else 1
            deadline = deadline_days[i] if i < len(deadline_days) else max_day
            if prod_day < day <= deadline:
                items_that_could_process_this_day += 1
        
        # Capacity pressure: demand vs capacity
        if max_items_per_day > 0:
            pressure = min(items_that_could_process_this_day / max_items_per_day, 2.0)
            capacity_weight = pressure / 2.0  # Normalize to [0,1]
        else:
            capacity_weight = 0.5
        
        G.add_node(f'day_{day}', type=1, weight=capacity_weight)
    
    # Add temporal precedence constraint nodes (type 1) 
    # High-urgency temporal constraints
    tight_windows = [(i, time_windows[i]) for i in range(N) if time_windows[i] <= 5]
    for i, window in tight_windows:
        # Very tight temporal constraints get their own nodes
        temporal_weight = 1.0 - (window / 10.0)  # Tighter = higher weight
        G.add_node(f'temporal_{i}', type=1, weight=max(temporal_weight, 0.1))
    
    # Connect items to their constraints with meaningful edge weights
    
    # Connect items to kind constraints
    for i in range(N):
        if i < len(kinds):
            kind = kinds[i]
            # Weight by kind group size (larger groups = stronger coupling)
            group_size = kind_counts.get(kind, 1)
            kind_coupling = math.sqrt(group_size) / math.sqrt(max_kind_count)
            G.add_edge(f'item_{i}', f'kind_{kind}', weight=kind_coupling)
    
    # Connect items to facility constraints 
    # Note: facility sets not available in JSON, so we use a heuristic
    # Items are distributed across facilities based on their properties
    for i in range(N):
        # Heuristic: assign items to facilities based on kind and index
        # This approximates the facility compatibility constraints
        kind = kinds[i] if i < len(kinds) else 1
        primary_facility = (kind + i) % F
        
        # Primary facility gets stronger connection
        G.add_edge(f'item_{i}', f'facility_{primary_facility}', weight=0.8)
        
        # Secondary facility connection for flexibility (if F > 1)
        if F > 1:
            secondary_facility = (primary_facility + 1) % F
            G.add_edge(f'item_{i}', f'facility_{secondary_facility}', weight=0.3)
    
    # Connect items to daily capacity constraints based on their time windows
    for i in range(N):
        prod_day = produced_days[i] if i < len(produced_days) else 1
        deadline = deadline_days[i] if i < len(deadline_days) else max_day
        
        # Connect to days within the processing window
        for day in range(prod_day + 1, min(deadline + 1, max_day + 1)):
            # Weight by day preference: earlier = higher urgency
            days_from_production = day - prod_day
            days_to_deadline = deadline - day
            
            if days_to_deadline > 0:
                # Prefer days closer to production but before deadline
                time_pressure = 1.0 / (days_from_production + 1)
                deadline_pressure = 1.0 / (days_to_deadline + 1) 
                day_weight = (time_pressure + deadline_pressure) / 2.0
                day_weight = min(day_weight, 1.0)
                
                G.add_edge(f'item_{i}', f'day_{day}', weight=day_weight)
    
    # Connect items to temporal constraints
    for i, window in tight_windows:
        temporal_urgency = 1.0 / (window + 1)
        G.add_edge(f'item_{i}', f'temporal_{i}', weight=temporal_urgency)
    
    # Add conflict edges between items competing for scarce resources
    # Items of different kinds that need same facilities on same potential days
    for i in range(N):
        for j in range(i + 1, N):
            if i < len(kinds) and j < len(kinds) and kinds[i] != kinds[j]:
                # Different kinds competing for resources
                
                # Check temporal overlap
                prod_i = produced_days[i] if i < len(produced_days) else 1
                dead_i = deadline_days[i] if i < len(deadline_days) else max_day
                prod_j = produced_days[j] if j < len(produced_days) else 1
                dead_j = deadline_days[j] if j < len(deadline_days) else max_day
                
                overlap_start = max(prod_i, prod_j)
                overlap_end = min(dead_i, dead_j)
                
                if overlap_start < overlap_end:
                    # They have temporal overlap
                    overlap_duration = overlap_end - overlap_start
                    
                    # Check facility conflict potential (heuristic)
                    facility_i = (kinds[i] + i) % F
                    facility_j = (kinds[j] + j) % F
                    
                    if facility_i == facility_j:
                        # Potential facility conflict
                        urgency_i = urgencies[i] / max_urgency
                        urgency_j = urgencies[j] / max_urgency
                        
                        # Conflict strength based on urgency and overlap
                        conflict_weight = (urgency_i + urgency_j) / 2.0
                        conflict_weight *= min(overlap_duration / 10.0, 1.0)
                        
                        if conflict_weight > 0.1:  # Only add significant conflicts
                            G.add_edge(f'item_{i}', f'item_{j}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()