#!/usr/bin/env python3
"""
Graph converter for test-scheduling problem.
Created using subagent_prompt.md version: v_02

This problem is about scheduling tests on machines with resource constraints.
Key challenges: Machine compatibility, resource conflicts, makespan minimization.

Note: Current JSON conversion doesn't capture possibleMachines and usedResources arrays
from DZN files (set-valued arrays are not supported). This converter works with 
available data but has limited precision.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the test scheduling problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create bipartite graph modeling scheduling constraints
    - Test nodes (type 0): Tests to be scheduled with duration-based weights
    - Machine constraint nodes (type 1): Machine capacity constraints
    - Resource constraint nodes (type 1): Resource availability constraints
    - Temporal conflict nodes (type 1): Tests that might conflict due to duration
    
    Note: Without possibleMachines and usedResources data from JSON,
    we create a simplified model based on available duration and capacity data.
    """
    # Access data from json_data dict
    n_tests = json_data.get('nTests', 0)
    duration = json_data.get('duration', [])
    capacity = json_data.get('capacity', [])
    machines = json_data.get('Machines', [])
    resources = json_data.get('Resources', [])
    
    if n_tests == 0 or not duration:
        # Return minimal graph if no data
        G = nx.Graph()
        G.add_node('dummy', type=0, weight=0.5)
        return G
    
    G = nx.Graph()
    
    # Test nodes (type 0) - weight by normalized duration (longer tests are more constraining)
    max_duration = max(duration) if duration else 1
    for t in range(n_tests):
        test_duration = duration[t] if t < len(duration) else 1
        # Non-linear weight: longer tests get exponentially higher weights
        weight = 1.0 - math.exp(-3.0 * test_duration / max_duration)
        G.add_node(f'test_{t}', type=0, weight=weight)
    
    # Machine constraint nodes (type 1) - one per machine
    # Weight by estimated load (assuming uniform distribution)
    total_work = sum(duration)
    avg_work_per_machine = total_work / len(machines) if machines else 1
    for i, machine in enumerate(machines):
        # Machines with potentially higher load get higher weights
        load_factor = min(avg_work_per_machine / max_duration, 1.0) if max_duration > 0 else 0.5
        G.add_node(f'machine_{machine}', type=1, weight=load_factor)
    
    # Resource constraint nodes (type 1) - one per resource
    # All resources have capacity 1, so weight by scarcity
    for i, resource in enumerate(resources):
        # Weight by resource utilization pressure
        resource_weight = 0.8  # High weight since all have capacity 1
        G.add_node(f'resource_{resource}', type=1, weight=resource_weight)
    
    # Duration-based conflict constraint nodes (type 1)
    # Create constraints for tests with similar durations that might conflict
    duration_thresholds = [100, 300, 500, 700]  # Duration buckets
    for threshold in duration_thresholds:
        tests_in_bucket = [t for t in range(n_tests) 
                          if t < len(duration) and duration[t] >= threshold]
        if len(tests_in_bucket) > 1:
            # Weight by how many tests compete in this duration range
            bucket_weight = min(len(tests_in_bucket) / n_tests, 1.0)
            constraint_node = f'duration_conflict_{threshold}'
            G.add_node(constraint_node, type=1, weight=bucket_weight)
            
            # Connect tests in this bucket to the constraint
            for t in tests_in_bucket:
                # Edge weight based on how much the test exceeds the threshold
                excess_ratio = (duration[t] - threshold) / max_duration if max_duration > 0 else 0.5
                edge_weight = 0.5 + 0.5 * excess_ratio
                G.add_edge(f'test_{t}', constraint_node, weight=edge_weight)
    
    # Connect tests to machine constraints
    # Since we don't have possibleMachines data, assume all tests can use all machines
    # but with varying efficiency based on test characteristics
    for t in range(n_tests):
        test_duration = duration[t] if t < len(duration) else 1
        for i, machine in enumerate(machines):
            # Edge weight based on test duration and machine index
            # Longer tests have stronger connections (more constraining)
            duration_factor = test_duration / max_duration if max_duration > 0 else 0.5
            machine_factor = 0.3 + 0.4 * (i % 3) / 2  # Vary by machine
            edge_weight = 0.5 * duration_factor + 0.5 * machine_factor
            G.add_edge(f'test_{t}', f'machine_{machine}', weight=edge_weight)
    
    # Connect tests to resource constraints
    # Since we don't have usedResources data, estimate resource usage
    # based on test duration (longer tests likely use more resources)
    for t in range(n_tests):
        test_duration = duration[t] if t < len(duration) else 1
        # Assume longer tests use more resources
        num_resources_used = min(int(test_duration / 200) + 1, len(resources))
        
        for r_idx in range(num_resources_used):
            resource = resources[r_idx % len(resources)]
            # Edge weight based on estimated resource intensity
            intensity = test_duration / max_duration if max_duration > 0 else 0.5
            edge_weight = 0.6 + 0.4 * intensity
            G.add_edge(f'test_{t}', f'resource_{resource}', weight=edge_weight)
    
    # Add conflict edges between tests with very similar durations
    # These tests might compete heavily for the same scheduling slots
    for t1 in range(n_tests):
        for t2 in range(t1 + 1, n_tests):
            if t1 < len(duration) and t2 < len(duration):
                dur1, dur2 = duration[t1], duration[t2]
                # Add conflict edge if durations are very similar (within 10%)
                if abs(dur1 - dur2) / max(dur1, dur2, 1) < 0.1 and min(dur1, dur2) > 100:
                    conflict_strength = 1.0 - abs(dur1 - dur2) / max(dur1, dur2, 1)
                    G.add_edge(f'test_{t1}', f'test_{t2}', weight=conflict_strength)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()