#!/usr/bin/env python3
"""
Graph converter for Job Shop Scheduling problem.
Created using subagent_prompt.md version: v_01

This problem is about scheduling operations of jobs on machines to minimize makespan.
Key challenges: precedence constraints, machine conflicts, critical path identification,
resource bottlenecks, operation flexibility and machine utilization.
"""

import sys
import json
import networkx as nx
from pathlib import Path
import math


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the Job Shop Scheduling instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Bipartite graph model with explicit constraint nodes
    - Type 0: Operations (job_i_op_j) - decision variables for start times
    - Type 1: Constraints - precedence, machine capacity, makespan
    - Type 2: Machines - shared resources with capacity = 1
    
    What makes instances hard:
    - Long critical paths (high precedence chain length)
    - Machine bottlenecks (machines with many/long operations)
    - Tight machine utilization (total work vs. optimal makespan)
    - Operation duration variance (creates scheduling flexibility issues)
    """
    n = json_data.get('n', 0)  # number of jobs
    m = json_data.get('m', 0)  # number of machines
    job_data = json_data.get('job', [])
    
    # Parse job matrix: job[i][2*j] = machine, job[i][2*j+1] = duration
    jobs = []
    for i in range(n):
        operations = []
        for j in range(m):
            idx_machine = i * 2 * m + 2 * j
            idx_duration = i * 2 * m + 2 * j + 1
            if idx_machine < len(job_data) and idx_duration < len(job_data):
                machine = job_data[idx_machine]
                duration = job_data[idx_duration]
                operations.append((machine, duration))
        jobs.append(operations)
    
    G = nx.Graph()
    
    # Calculate problem characteristics for weight computation
    all_durations = [duration for job in jobs for _, duration in job]
    max_duration = max(all_durations) if all_durations else 1
    total_work = sum(all_durations)
    avg_duration = total_work / len(all_durations) if all_durations else 1
    
    # Machine workload for bottleneck analysis
    machine_workload = [0] * m
    machine_op_count = [0] * m
    for job in jobs:
        for machine, duration in job:
            if 0 <= machine < m:
                machine_workload[machine] += duration
                machine_op_count[machine] += 1
    
    max_workload = max(machine_workload) if machine_workload else 1
    
    # Type 0: Operation nodes (variables)
    for i in range(n):
        for j in range(len(jobs[i])):
            machine, duration = jobs[i][j]
            
            # Weight by operation criticality:
            # - Longer operations are more critical
            # - Operations on bottleneck machines are more critical
            # - Operations in longer job chains are more critical
            duration_weight = duration / max_duration
            machine_bottleneck = machine_workload[machine] / max_workload if machine < m else 0.5
            chain_length = len(jobs[i]) / m  # Job length relative to max possible
            
            # Combine factors (normalized to [0,1])
            criticality = (duration_weight + machine_bottleneck + chain_length) / 3
            
            G.add_node(f'op_{i}_{j}', type=0, weight=min(criticality, 1.0))
    
    # Type 1: Constraint nodes
    
    # 1.1: Precedence constraints (one per job)
    for i in range(n):
        job_length = len(jobs[i])
        if job_length > 1:
            # Weight by chain length and total duration
            total_job_duration = sum(duration for _, duration in jobs[i])
            chain_criticality = total_job_duration / (avg_duration * m)
            length_factor = job_length / m
            precedence_weight = min((chain_criticality + length_factor) / 2, 1.0)
            
            G.add_node(f'precedence_job_{i}', type=1, weight=precedence_weight)
    
    # 1.2: Machine capacity constraints (one per machine)
    for k in range(m):
        ops_on_machine = machine_op_count[k]
        if ops_on_machine > 0:
            # Weight by machine utilization and number of operations
            utilization = machine_workload[k] / max_workload
            congestion = ops_on_machine / (n * 2)  # Relative to potential max
            capacity_tightness = min((utilization + congestion) / 2, 1.0)
            
            G.add_node(f'machine_capacity_{k}', type=1, weight=capacity_tightness)
    
    # 1.3: Makespan constraint (single global constraint)
    # Weight by problem complexity
    complexity = (n * m) / 100  # Scale factor
    makespan_weight = min(complexity, 1.0)
    G.add_node('makespan_constraint', type=1, weight=makespan_weight)
    
    # Type 2: Machine resource nodes
    for k in range(m):
        ops_on_machine = machine_op_count[k]
        if ops_on_machine > 0:
            # Weight by scarcity (inverse of capacity utilization)
            utilization = machine_workload[k] / max_workload
            scarcity = 1.0 - utilization  # More scarce if less utilized (counterintuitive but correct)
            # Actually, reverse this - higher utilization = more scarce
            scarcity = utilization
            
            G.add_node(f'machine_{k}', type=2, weight=scarcity)
    
    # EDGES: Bipartite connections
    
    # Operation-Precedence edges
    for i in range(n):
        if len(jobs[i]) > 1:
            precedence_node = f'precedence_job_{i}'
            for j in range(len(jobs[i])):
                op_node = f'op_{i}_{j}'
                # Weight by position in chain (earlier operations more constrained)
                position_weight = 1.0 - (j / len(jobs[i]))
                G.add_edge(op_node, precedence_node, weight=position_weight)
    
    # Operation-Machine Capacity edges
    for i in range(n):
        for j in range(len(jobs[i])):
            machine, duration = jobs[i][j]
            if 0 <= machine < m:
                op_node = f'op_{i}_{j}'
                capacity_node = f'machine_capacity_{machine}'
                # Weight by duration relative to total work on this machine
                duration_ratio = duration / machine_workload[machine] if machine_workload[machine] > 0 else 0.5
                G.add_edge(op_node, capacity_node, weight=duration_ratio)
    
    # Operation-Machine Resource edges
    for i in range(n):
        for j in range(len(jobs[i])):
            machine, duration = jobs[i][j]
            if 0 <= machine < m:
                op_node = f'op_{i}_{j}'
                machine_node = f'machine_{machine}'
                # Weight by operation's impact on machine utilization
                impact = duration / max_duration
                G.add_edge(op_node, machine_node, weight=impact)
    
    # Operation-Makespan edges (only for last operations of each job)
    for i in range(n):
        if jobs[i]:
            last_op = len(jobs[i]) - 1
            op_node = f'op_{i}_{last_op}'
            # Weight by job's contribution to critical path
            job_duration = sum(duration for _, duration in jobs[i])
            contribution = job_duration / total_work if total_work > 0 else 1.0 / n
            G.add_edge(op_node, 'makespan_constraint', weight=contribution)
    
    # Add conflict edges between operations on same machine (advanced technique)
    # This captures the no-overlap constraints implicitly
    for k in range(m):
        ops_on_machine = []
        for i in range(n):
            for j in range(len(jobs[i])):
                machine, duration = jobs[i][j]
                if machine == k:
                    ops_on_machine.append((f'op_{i}_{j}', duration))
        
        # Add conflict edges between operations competing for same machine
        if len(ops_on_machine) > 1:
            # Sort by duration (longer operations create more conflicts)
            ops_on_machine.sort(key=lambda x: x[1], reverse=True)
            
            # Connect top operations that create scheduling conflicts
            for idx1 in range(min(len(ops_on_machine), 6)):  # Limit to avoid too many edges
                for idx2 in range(idx1 + 1, min(len(ops_on_machine), 6)):
                    op1, dur1 = ops_on_machine[idx1]
                    op2, dur2 = ops_on_machine[idx2]
                    
                    # Conflict strength based on duration overlap
                    conflict_strength = (dur1 + dur2) / (2 * max_duration)
                    G.add_edge(op1, op2, weight=min(conflict_strength, 1.0))
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()