#!/usr/bin/env python3
"""
Graph converter for gfd-schedule problem.
Created using subagent_prompt.md version: v_02

This problem is about scheduling items with different kinds on limited facilities.
Key challenges: facility constraints, time windows, grouping by kind, and capacity limits.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the gfd-schedule problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model this as a bipartite graph with items and constraints.
    - Items are type 0 nodes (variables being scheduled)
    - Constraints are type 1 nodes (time windows, capacity, facility conflicts)
    - Facilities are type 2 nodes (shared resources)
    - Edge weights reflect scheduling pressure and constraint tightness
    """
    # Extract data
    N = json_data.get('N', 0)
    F = json_data.get('F', 0)
    max_items_per_day = json_data.get('MaxItemsPerDay', 1)
    max_day = json_data.get('MaxDay', 1)
    kind = json_data.get('kind', [])
    produced_day = json_data.get('producedDay', [])
    deadline_day = json_data.get('deadLineDay', [])
    
    if N == 0:
        return nx.Graph()
    
    G = nx.Graph()
    
    # Add item nodes (type 0) with urgency-based weights
    max_deadline = max(deadline_day) if deadline_day else max_day
    for i in range(N):
        if i < len(produced_day) and i < len(deadline_day):
            # Time window pressure - smaller window = higher weight
            time_window = deadline_day[i] - produced_day[i]
            urgency = 1.0 - (time_window / max_day) if time_window > 0 else 1.0
            # Use exponential scaling for urgency
            weight = math.exp(-2.0 * (1.0 - urgency))
        else:
            weight = 0.5
        
        G.add_node(f'item_{i}', type=0, weight=weight)
    
    # Add facility nodes (type 2) with utilization-based weights
    # Estimate facility load based on items that could use each facility
    for f in range(1, F + 1):
        # Simple heuristic: all items could potentially use any facility
        utilization = min(N / (F * max_items_per_day * max_day), 1.0)
        G.add_node(f'facility_{f}', type=2, weight=utilization)
    
    # Add time window constraint nodes (type 1)
    for i in range(N):
        if i < len(produced_day) and i < len(deadline_day):
            # Constraint tightness based on time window vs average
            avg_window = sum(deadline_day[j] - produced_day[j] 
                           for j in range(len(produced_day)) 
                           if j < len(deadline_day)) / len(produced_day)
            time_window = deadline_day[i] - produced_day[i]
            tightness = 1.0 - (time_window / avg_window) if avg_window > 0 else 0.5
            tightness = max(0.1, min(1.0, tightness))
            
            G.add_node(f'timewindow_{i}', type=1, weight=tightness)
            # Connect item to its time window constraint
            G.add_edge(f'item_{i}', f'timewindow_{i}', weight=tightness)
    
    # Add kind-based grouping constraints (type 1)
    kind_counts = {}
    for k in kind:
        kind_counts[k] = kind_counts.get(k, 0) + 1
    
    for k, count in kind_counts.items():
        # Larger groups create more scheduling pressure
        group_pressure = math.log(count + 1) / math.log(N + 1)
        G.add_node(f'kind_constraint_{k}', type=1, weight=group_pressure)
        
        # Connect all items of same kind to their grouping constraint
        for i in range(N):
            if i < len(kind) and kind[i] == k:
                G.add_edge(f'item_{i}', f'kind_constraint_{k}', weight=group_pressure)
    
    # Add daily capacity constraints (type 1)
    for day in range(1, max_day + 1):
        # Estimate demand for this day based on time windows
        potential_items = 0
        for i in range(N):
            if (i < len(produced_day) and i < len(deadline_day) and 
                produced_day[i] < day <= deadline_day[i]):
                potential_items += 1
        
        # Capacity pressure
        if potential_items > 0:
            capacity_pressure = min(potential_items / max_items_per_day, 1.0)
            if capacity_pressure > 0.1:  # Only add if significant pressure
                G.add_node(f'capacity_day_{day}', type=1, weight=capacity_pressure)
                
                # Connect items that could be scheduled on this day
                for i in range(N):
                    if (i < len(produced_day) and i < len(deadline_day) and 
                        produced_day[i] < day <= deadline_day[i]):
                        edge_weight = capacity_pressure * (1.0 / max_items_per_day)
                        G.add_edge(f'item_{i}', f'capacity_day_{day}', weight=edge_weight)
    
    # Add conflict edges between items competing for same facilities
    # Since facility data is not in JSON, use kind as proxy for facility conflicts
    for i in range(N):
        for j in range(i + 1, N):
            if (i < len(kind) and j < len(kind) and 
                i < len(produced_day) and j < len(produced_day) and
                i < len(deadline_day) and j < len(deadline_day)):
                
                # Items of same kind compete for same facilities
                if kind[i] == kind[j]:
                    # Time window overlap creates scheduling conflict
                    start_i, end_i = produced_day[i], deadline_day[i]
                    start_j, end_j = produced_day[j], deadline_day[j]
                    
                    overlap_start = max(start_i, start_j)
                    overlap_end = min(end_i, end_j)
                    
                    if overlap_start < overlap_end:
                        overlap_days = overlap_end - overlap_start
                        total_days = max(end_i - start_i, end_j - start_j, 1)
                        conflict_strength = overlap_days / total_days
                        
                        if conflict_strength > 0.3:  # Significant overlap
                            G.add_edge(f'item_{i}', f'item_{j}', 
                                     weight=min(conflict_strength, 1.0))
    
    # Connect items to facilities (estimated based on problem structure)
    for i in range(N):
        for f in range(1, F + 1):
            # Simple heuristic: items are more likely to use certain facilities
            # based on their kind and timing
            if i < len(kind):
                # Use modulo to distribute kinds across facilities
                preferred_facility = (kind[i] - 1) % F + 1
                if f == preferred_facility:
                    connection_strength = 0.8
                else:
                    connection_strength = 0.2
                
                G.add_edge(f'item_{i}', f'facility_{f}', weight=connection_strength)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()