#!/usr/bin/env python3
"""
Graph converter for largecumulative problem.
Converter created with subagent_prompt.md v_02

This problem is about cumulative resource scheduling with a large number of tasks.
Key challenges: Resource bottlenecks, task overlap conflicts, and makespan optimization.
The cumulative constraint creates complex interactions between tasks competing for limited capacity.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the largecumulative problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph modeling cumulative scheduling structure
    - Task nodes (type 0): Individual tasks with duration/resource weight
    - Capacity constraint node (type 1): Models global resource capacity constraint
    - Resource pressure constraint nodes (type 1): Model different resource usage patterns
    - Task-task conflict edges: For tasks that likely compete for resources
    - Weights reflect resource pressure, duration impact, and scheduling difficulty
    """
    # Access data from json_data dict
    n_tasks = json_data.get('n_tasks', 0)
    capa = json_data.get('capa', 1)
    d = json_data.get('d', [])  # durations
    r = json_data.get('r', [])  # resource demands
    
    if n_tasks == 0 or len(d) != n_tasks or len(r) != n_tasks:
        # Create minimal graph for invalid data
        G = nx.Graph()
        G.add_node('dummy', type=0, weight=0.5)
        return G
    
    G = nx.Graph()
    
    # Calculate key metrics for weight computation
    max_duration = max(d) if d else 1
    max_resource = max(r) if r else 1
    total_demand = sum(r)
    total_duration = sum(d)
    avg_duration = total_duration / n_tasks if n_tasks > 0 else 1
    
    # Task nodes (type 0) - weight by resource pressure and duration impact
    for i in range(n_tasks):
        duration = d[i] if i < len(d) else 1
        resource_demand = r[i] if i < len(r) else 1
        
        # Combine resource intensity and duration impact non-linearly
        resource_intensity = resource_demand / capa
        duration_impact = math.sqrt(duration / max_duration)  # Non-linear scaling
        
        # Tasks with high resource demand and long duration are most critical
        task_weight = min(resource_intensity * duration_impact + 0.1, 1.0)
        
        G.add_node(f'task_{i}', type=0, weight=task_weight)
    
    # Global capacity constraint node (type 1)
    # Weight by overall resource pressure (total demand vs available capacity * time)
    horizon_estimate = total_duration  # Trivial upper bound
    total_capacity = capa * horizon_estimate
    capacity_pressure = min(total_demand * avg_duration / total_capacity, 1.0)
    G.add_node('global_capacity', type=1, weight=capacity_pressure)
    
    # Resource pressure constraint nodes (type 1)
    # Create constraint nodes for different resource pressure levels
    high_resource_threshold = capa * 0.4  # Tasks using high resources
    medium_resource_threshold = capa * 0.2  # Tasks using medium resources
    
    # High resource constraint - for tasks that significantly strain capacity
    high_resource_tasks = [i for i in range(n_tasks) if r[i] >= high_resource_threshold]
    if high_resource_tasks:
        avg_high_resource = sum(r[i] for i in high_resource_tasks) / len(high_resource_tasks)
        high_constraint_weight = min(avg_high_resource / capa, 1.0)
        G.add_node('high_resource_constraint', type=1, weight=high_constraint_weight)
    
    # Medium resource constraint - for tasks with moderate resource usage
    medium_resource_tasks = [i for i in range(n_tasks) if medium_resource_threshold <= r[i] < high_resource_threshold]
    if medium_resource_tasks:
        avg_medium_resource = sum(r[i] for i in medium_resource_tasks) / len(medium_resource_tasks)
        medium_constraint_weight = min(avg_medium_resource / capa, 1.0)
        G.add_node('medium_resource_constraint', type=1, weight=medium_constraint_weight)
    
    # Long duration constraint - for tasks that consume significant time
    long_duration_threshold = max_duration * 0.5
    long_duration_tasks = [i for i in range(n_tasks) if d[i] >= long_duration_threshold]
    if long_duration_tasks:
        avg_long_duration = sum(d[i] for i in long_duration_tasks) / len(long_duration_tasks)
        duration_constraint_weight = min(avg_long_duration / max_duration, 1.0)
        G.add_node('long_duration_constraint', type=1, weight=duration_constraint_weight)
    
    # Critical task constraint - for tasks that are both high resource and long duration
    critical_tasks = [i for i in range(n_tasks) if r[i] >= high_resource_threshold and d[i] >= long_duration_threshold]
    if critical_tasks:
        G.add_node('critical_task_constraint', type=1, weight=0.9)
    
    # Bipartite edges: tasks to constraints
    for i in range(n_tasks):
        resource_demand = r[i] if i < len(r) else 1
        duration = d[i] if i < len(d) else 1
        
        # Edge to global capacity constraint
        participation_weight = min(resource_demand / capa + duration / max_duration, 2.0) / 2.0
        G.add_edge(f'task_{i}', 'global_capacity', weight=participation_weight)
        
        # Edges to specific constraint nodes based on task characteristics
        if 'high_resource_constraint' in G.nodes() and resource_demand >= high_resource_threshold:
            high_resource_weight = min(resource_demand / capa + 0.2, 1.0)
            G.add_edge(f'task_{i}', 'high_resource_constraint', weight=high_resource_weight)
        
        if 'medium_resource_constraint' in G.nodes() and medium_resource_threshold <= resource_demand < high_resource_threshold:
            medium_resource_weight = min(resource_demand / capa + 0.1, 1.0)
            G.add_edge(f'task_{i}', 'medium_resource_constraint', weight=medium_resource_weight)
        
        if 'long_duration_constraint' in G.nodes() and duration >= long_duration_threshold:
            duration_weight = min(duration / max_duration + 0.1, 1.0)
            G.add_edge(f'task_{i}', 'long_duration_constraint', weight=duration_weight)
        
        if 'critical_task_constraint' in G.nodes() and resource_demand >= high_resource_threshold and duration >= long_duration_threshold:
            G.add_edge(f'task_{i}', 'critical_task_constraint', weight=0.95)
    
    # Task-task conflict edges for resource competition
    # Focus on high-resource tasks that likely cause bottlenecks
    high_resource_tasks = []
    for i in range(n_tasks):
        if r[i] >= capa * 0.3:  # Tasks using significant capacity
            high_resource_tasks.append(i)
    
    # Add conflict edges between high-resource tasks
    for i in range(len(high_resource_tasks)):
        for j in range(i + 1, min(i + 6, len(high_resource_tasks))):  # Limit conflicts
            task1 = high_resource_tasks[i]
            task2 = high_resource_tasks[j]
            
            # Conflict strength based on combined resource demand
            combined_demand = r[task1] + r[task2]
            if combined_demand > capa:
                # These tasks cannot run simultaneously
                conflict_weight = min(combined_demand / (capa * 1.5), 1.0)
                G.add_edge(f'task_{task1}', f'task_{task2}', weight=conflict_weight)
    
    # Additional conflicts for tasks with extreme durations
    long_tasks = [(i, d[i]) for i in range(n_tasks)]
    long_tasks.sort(key=lambda x: x[1], reverse=True)
    
    # Connect very long tasks that likely compete for time slots
    for i in range(min(5, len(long_tasks))):
        for j in range(i + 1, min(i + 4, len(long_tasks))):
            task1_idx, duration1 = long_tasks[i]
            task2_idx, duration2 = long_tasks[j]
            
            # Long tasks create scheduling conflicts
            if duration1 > avg_duration * 2 and duration2 > avg_duration * 2:
                time_conflict = min((duration1 + duration2) / (avg_duration * 4), 1.0)
                if not G.has_edge(f'task_{task1_idx}', f'task_{task2_idx}'):
                    G.add_edge(f'task_{task1_idx}', f'task_{task2_idx}', weight=time_conflict)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()