#!/usr/bin/env python3
"""
Graph converter for Evil Job-Shop Scheduling problem.
Created using subagent_prompt.md version: v_02

This problem is about scheduling jobs on machines where each job has a sequence of operations.
Key challenges: precedence constraints within jobs, machine capacity conflicts, and scaled processing times.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the Evil Job-Shop instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create bipartite graph with operations and constraints
    - Operation nodes (type 0): Each operation of each job
    - Precedence constraints (type 1): Sequential ordering within jobs  
    - Machine capacity constraints (type 1): Resource sharing conflicts
    - Machine resource nodes (type 2): Shared machine resources
    
    Weights reflect processing time, machine load, and constraint tightness.
    """
    n_jobs = json_data.get('n_jobs', 0)
    n_machines = json_data.get('n_machines', 0)
    input_array = json_data.get('input', [])
    
    if not input_array or len(input_array) != n_jobs * 2 * n_machines:
        # Fallback for malformed data
        return nx.Graph()
    
    G = nx.Graph()
    
    # Parse input array into structured data
    # input[i][j] = (machine_id, duration) for job i, operation j
    operations = []
    machine_loads = [0] * n_machines  # Track total processing time per machine
    max_duration = 0
    
    for job in range(n_jobs):
        job_ops = []
        for op in range(n_machines):
            idx = job * 2 * n_machines + op * 2
            machine_id = input_array[idx]
            duration = input_array[idx + 1]
            job_ops.append((machine_id, duration))
            if machine_id < n_machines:
                machine_loads[machine_id] += duration
            max_duration = max(max_duration, duration)
        operations.append(job_ops)
    
    total_work = sum(machine_loads)
    avg_machine_load = total_work / n_machines if n_machines > 0 else 1
    
    # Add operation nodes (type 0) - decision variables
    for job in range(n_jobs):
        for op in range(n_machines):
            machine_id, duration = operations[job][op]
            node_id = f'job_{job}_op_{op}'
            
            # Weight by normalized processing time (longer operations are more critical)
            duration_weight = duration / max_duration if max_duration > 0 else 0.5
            
            # Also consider position in job sequence (later ops have less flexibility)
            position_weight = (op + 1) / n_machines
            
            # Combine with exponential emphasis on longer tasks
            weight = 0.3 + 0.7 * math.exp(-2 * (1 - duration_weight)) * position_weight
            
            G.add_node(node_id, type=0, weight=min(weight, 1.0))
    
    # Add machine resource nodes (type 2) - shared resources
    for machine in range(n_machines):
        machine_load = machine_loads[machine]
        # Weight by relative load (busier machines are more constrained)
        load_ratio = machine_load / avg_machine_load if avg_machine_load > 0 else 0.5
        # Use logarithmic scaling to avoid extreme weights
        weight = 0.2 + 0.8 * (1 - math.exp(-load_ratio))
        
        G.add_node(f'machine_{machine}', type=2, weight=min(weight, 1.0))
    
    # Add precedence constraint nodes (type 1) - job ordering
    for job in range(n_jobs):
        for op in range(n_machines - 1):  # n_machines-1 precedence constraints per job
            constraint_id = f'prec_job_{job}_op_{op}_to_{op+1}'
            
            # Weight by sum of durations involved (tighter constraints)
            curr_duration = operations[job][op][1]
            next_duration = operations[job][op+1][1]
            total_duration = curr_duration + next_duration
            
            # Normalize and add position bias (later precedences more critical)
            duration_factor = total_duration / (2 * max_duration) if max_duration > 0 else 0.5
            position_factor = (op + 1) / (n_machines - 1)
            weight = 0.4 + 0.6 * duration_factor * position_factor
            
            G.add_node(constraint_id, type=1, weight=min(weight, 1.0))
    
    # Add machine capacity constraint nodes (type 1) - one per machine
    for machine in range(n_machines):
        constraint_id = f'capacity_machine_{machine}'
        
        # Count operations on this machine
        ops_on_machine = sum(1 for job in range(n_jobs) 
                           for op in range(n_machines) 
                           if operations[job][op][0] == machine)
        
        # Weight by congestion level
        congestion = ops_on_machine / n_jobs if n_jobs > 0 else 0.5
        weight = 0.5 + 0.5 * min(congestion, 1.0)
        
        G.add_node(constraint_id, type=1, weight=weight)
    
    # Add bipartite edges: operation to constraints they participate in
    
    # 1. Precedence edges
    for job in range(n_jobs):
        for op in range(n_machines - 1):
            # Current operation to precedence constraint
            curr_op_id = f'job_{job}_op_{op}'
            next_op_id = f'job_{job}_op_{op+1}'
            constraint_id = f'prec_job_{job}_op_{op}_to_{op+1}'
            
            # Edge weights based on processing times
            curr_duration = operations[job][op][1]
            next_duration = operations[job][op+1][1]
            
            curr_weight = curr_duration / max_duration if max_duration > 0 else 0.5
            next_weight = next_duration / max_duration if max_duration > 0 else 0.5
            
            G.add_edge(curr_op_id, constraint_id, weight=curr_weight)
            G.add_edge(next_op_id, constraint_id, weight=next_weight)
    
    # 2. Machine capacity edges
    for job in range(n_jobs):
        for op in range(n_machines):
            machine_id, duration = operations[job][op]
            if machine_id < n_machines:  # Valid machine
                op_node = f'job_{job}_op_{op}'
                constraint_id = f'capacity_machine_{machine_id}'
                
                # Weight by resource consumption
                consumption_ratio = duration / max_duration if max_duration > 0 else 0.5
                G.add_edge(op_node, constraint_id, weight=consumption_ratio)
    
    # 3. Machine resource edges
    for job in range(n_jobs):
        for op in range(n_machines):
            machine_id, duration = operations[job][op]
            if machine_id < n_machines:  # Valid machine
                op_node = f'job_{job}_op_{op}'
                machine_node = f'machine_{machine_id}'
                
                # Weight by relative usage of machine
                usage_ratio = duration / machine_loads[machine_id] if machine_loads[machine_id] > 0 else 0.5
                G.add_edge(op_node, machine_node, weight=usage_ratio)
    
    # Add conflict edges for operations competing for same machine
    # Group operations by machine
    ops_by_machine = {}
    for job in range(n_jobs):
        for op in range(n_machines):
            machine_id, duration = operations[job][op]
            if machine_id < n_machines:
                if machine_id not in ops_by_machine:
                    ops_by_machine[machine_id] = []
                ops_by_machine[machine_id].append((job, op, duration))
    
    # Add conflict edges between operations on overloaded machines
    for machine_id, ops in ops_by_machine.items():
        if len(ops) > 2:  # Machine has conflicts
            # Sort by processing time (descending)
            ops.sort(key=lambda x: x[2], reverse=True)
            
            # Add conflicts between top time consumers (limited to avoid too dense graph)
            for i in range(min(len(ops), 4)):
                for j in range(i + 1, min(len(ops), 4)):
                    job1, op1, dur1 = ops[i]
                    job2, op2, dur2 = ops[j]
                    
                    node1 = f'job_{job1}_op_{op1}'
                    node2 = f'job_{job2}_op_{op2}'
                    
                    # Conflict weight based on combined duration
                    total_dur = dur1 + dur2
                    conflict_weight = min(total_dur / (2 * max_duration), 1.0) if max_duration > 0 else 0.5
                    
                    G.add_edge(node1, node2, weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()