#!/usr/bin/env python3
"""
Graph converter for RCPSP/max (Resource-Constrained Project Scheduling with time lags) problem.
Created using subagent_prompt.md version: v_02

This problem is about scheduling tasks with resource constraints and temporal dependencies.
Key challenges: Resource conflicts, complex temporal constraints, makespan minimization.
The difficulty comes from tight resource capacities and intricate precedence relationships.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the RCPSP/max problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph with task nodes, resource nodes, and constraint nodes.
    - Tasks are type 0 (variables) weighted by resource intensity and duration
    - Resources are type 2 (resource-like) weighted by capacity utilization
    - Difference constraints are type 1 (constraint-like) weighted by tightness
    - Resource capacity constraints are type 1 weighted by utilization ratio
    - Edges model participation and conflicts
    """
    
    # Extract problem data
    n_res = json_data.get('n_res', 0)
    n_tasks = json_data.get('n_tasks', 0) 
    n_dc = json_data.get('n_dc', 0)
    rcap = json_data.get('rcap', [])
    dur = json_data.get('dur', [])
    rr = json_data.get('rr', [])  # Flattened 2D array: rr[res*n_tasks + task]
    dcons = json_data.get('dcons', [])  # Flattened 3D array: [task1, lag, task2] triples
    
    if n_tasks == 0 or n_res == 0:
        return nx.Graph()
    
    G = nx.Graph()
    
    # Calculate resource intensity for each task (for weighting)
    max_resource_intensity = 0
    task_intensities = []
    for t in range(n_tasks):
        intensity = 0
        for r in range(n_res):
            if r < len(rcap) and rcap[r] > 0:
                req = rr[r * n_tasks + t] if (r * n_tasks + t) < len(rr) else 0
                intensity += req / rcap[r]  # Normalized resource usage
        task_intensities.append(intensity)
        max_resource_intensity = max(max_resource_intensity, intensity)
    
    # Normalize intensities to avoid division by zero
    max_resource_intensity = max(max_resource_intensity, 1.0)
    
    # Add task nodes (type 0) with combined duration and resource intensity weights
    max_duration = max(dur) if dur else 1
    for t in range(n_tasks):
        task_dur = dur[t] if t < len(dur) else 1
        resource_intensity = task_intensities[t] / max_resource_intensity
        duration_factor = task_dur / max_duration
        
        # Combine factors with non-linear weighting - longer, more resource-intensive tasks are harder
        weight = 0.3 * duration_factor + 0.7 * math.sqrt(resource_intensity)
        weight = min(weight, 1.0)
        
        G.add_node(f'task_{t}', type=0, weight=weight)
    
    # Add resource nodes (type 2) weighted by expected utilization
    for r in range(n_res):
        if r < len(rcap) and rcap[r] > 0:
            # Calculate total demand on this resource
            total_demand = sum(rr[r * n_tasks + t] if (r * n_tasks + t) < len(rr) else 0 
                             for t in range(n_tasks))
            capacity = rcap[r]
            
            # Utilization ratio indicates resource tightness
            utilization = min(total_demand / capacity, 2.0) / 2.0  # Cap at 2.0, normalize to [0,1]
            weight = math.sqrt(utilization)  # Non-linear scaling
            
            G.add_node(f'resource_{r}', type=2, weight=weight)
    
    # Add difference constraint nodes (type 1) with lag-based weights
    max_abs_lag = 1
    if len(dcons) >= 3:
        for i in range(0, len(dcons), 3):
            if i + 2 < len(dcons):
                lag = abs(dcons[i + 1])
                max_abs_lag = max(max_abs_lag, lag)
    
    for dc in range(n_dc):
        base_idx = dc * 3
        if base_idx + 2 < len(dcons):
            task1 = dcons[base_idx]
            lag = dcons[base_idx + 1] 
            task2 = dcons[base_idx + 2]
            
            # Weight by relative lag magnitude (larger lags are tighter constraints)
            abs_lag = abs(lag)
            lag_weight = abs_lag / max_abs_lag
            
            # Negative lags (task2 must start before task1 finishes) are often tighter
            if lag < 0:
                lag_weight = min(lag_weight * 1.5, 1.0)
            
            # Non-linear scaling for constraint tightness
            weight = math.pow(lag_weight, 0.7)
            
            G.add_node(f'diff_constraint_{dc}', type=1, weight=weight)
    
    # Add resource capacity constraint nodes (type 1) 
    for r in range(n_res):
        if r < len(rcap) and rcap[r] > 0:
            # Calculate resource contention level
            total_demand = sum(rr[r * n_tasks + t] if (r * n_tasks + t) < len(rr) else 0 
                             for t in range(n_tasks))
            capacity = rcap[r]
            
            # Higher demand/capacity ratio means tighter constraint
            tightness = min(total_demand / capacity, 3.0) / 3.0  # Normalize, cap at 3.0
            weight = math.sqrt(tightness)  # Non-linear scaling
            
            G.add_node(f'resource_constraint_{r}', type=1, weight=weight)
    
    # Add edges: tasks to resources they use
    for t in range(n_tasks):
        for r in range(n_res):
            if r < len(rcap) and rcap[r] > 0:
                req = rr[r * n_tasks + t] if (r * n_tasks + t) < len(rr) else 0
                if req > 0:
                    # Edge weight based on resource consumption ratio
                    consumption_ratio = req / rcap[r]
                    weight = min(consumption_ratio * 2.0, 1.0)  # Scale up, cap at 1.0
                    
                    G.add_edge(f'task_{t}', f'resource_{r}', weight=weight)
                    G.add_edge(f'task_{t}', f'resource_constraint_{r}', weight=weight)
    
    # Add edges: tasks to difference constraints they participate in
    for dc in range(n_dc):
        base_idx = dc * 3
        if base_idx + 2 < len(dcons):
            task1 = dcons[base_idx] - 1  # Convert to 0-based indexing
            lag = dcons[base_idx + 1]
            task2 = dcons[base_idx + 2] - 1  # Convert to 0-based indexing
            
            if 0 <= task1 < n_tasks and 0 <= task2 < n_tasks:
                # Edge weights based on constraint importance
                # Tight time constraints (small positive or negative lags) have higher weight
                abs_lag = abs(lag)
                if abs_lag == 0:
                    constraint_weight = 1.0  # Immediate precedence is strongest
                else:
                    # Decay weight with larger lags, but negative lags get boost
                    base_weight = math.exp(-abs_lag / 10.0)
                    if lag < 0:
                        base_weight = min(base_weight * 1.3, 1.0)
                    constraint_weight = base_weight
                
                G.add_edge(f'task_{task1}', f'diff_constraint_{dc}', weight=constraint_weight)
                G.add_edge(f'task_{task2}', f'diff_constraint_{dc}', weight=constraint_weight)
    
    # Add conflict edges between tasks that compete heavily for resources
    for r in range(n_res):
        if r < len(rcap) and rcap[r] > 0:
            capacity = rcap[r]
            
            # Find tasks that use this resource heavily
            heavy_users = []
            for t in range(n_tasks):
                req = rr[r * n_tasks + t] if (r * n_tasks + t) < len(rr) else 0
                if req > capacity * 0.4:  # Uses more than 40% of resource capacity
                    heavy_users.append((t, req))
            
            # Add conflict edges between heavy users
            for i in range(len(heavy_users)):
                for j in range(i + 1, len(heavy_users)):
                    t1, req1 = heavy_users[i]
                    t2, req2 = heavy_users[j]
                    
                    # Only add conflict if they can't both run simultaneously
                    if req1 + req2 > capacity:
                        conflict_strength = (req1 + req2 - capacity) / capacity
                        weight = min(conflict_strength, 1.0)
                        
                        G.add_edge(f'task_{t1}', f'task_{t2}', weight=weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()