#!/usr/bin/env python3
"""
Graph converter for mrcpsp (Multi-mode Resource-constrained Project Scheduling) problem.
Converter created with subagent_prompt.md v_02

This problem is about scheduling tasks with multiple execution modes while respecting 
resource constraints and precedence relationships. Each task can be executed in different 
modes with different durations and resource requirements.

Key challenges: 
- Resource contention between tasks
- Mode selection for each task
- Precedence constraints
- Both renewable and non-renewable resources
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the MRCPSP instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph with task nodes, resource nodes, and mode constraint nodes.
    - Tasks (type 0): Decision variables for scheduling
    - Resource constraints (type 1): Capacity limitations
    - Mode constraints (type 1): Each task must select exactly one mode
    - Edges represent resource usage and mode selection relationships
    
    The graph captures resource contention and mode selection complexity.
    """
    n_res = json_data.get('n_res', 0)
    n_tasks = json_data.get('n_tasks', 0) 
    n_opt = json_data.get('n_opt', 0)
    rcap = json_data.get('rcap', [])
    rtype = json_data.get('rtype', [])
    dur = json_data.get('dur', [])
    rreq = json_data.get('rreq', [])
    
    if n_tasks == 0 or n_res == 0 or n_opt == 0:
        return nx.Graph()
    
    G = nx.Graph()
    
    # Calculate modes per task (assuming equal distribution)
    modes_per_task = n_opt // n_tasks if n_tasks > 0 else 1
    
    # Task nodes (type 0) - weighted by complexity
    max_total_resource_req = 0
    task_complexities = []
    
    for task in range(n_tasks):
        total_req = 0
        max_duration = 0
        mode_start = task * modes_per_task
        mode_end = min((task + 1) * modes_per_task, n_opt)
        
        # Analyze all modes for this task
        for mode in range(mode_start, mode_end):
            if mode < len(dur):
                max_duration = max(max_duration, dur[mode])
                
            # Calculate resource requirements for this mode
            mode_req = 0
            for res in range(n_res):
                rreq_idx = res * n_opt + mode
                if rreq_idx < len(rreq):
                    mode_req += rreq[rreq_idx]
            total_req = max(total_req, mode_req)
        
        task_complexity = max_duration * (1 + total_req)
        task_complexities.append(task_complexity)
        max_total_resource_req = max(max_total_resource_req, task_complexity)
    
    # Add task nodes with complexity-based weights
    for task in range(n_tasks):
        if max_total_resource_req > 0:
            weight = task_complexities[task] / max_total_resource_req
        else:
            weight = 0.5
        G.add_node(f'task_{task}', type=0, weight=weight)
    
    # Resource constraint nodes (type 1) - weighted by scarcity
    total_capacity = sum(rcap)
    for res in range(n_res):
        if res < len(rcap) and res < len(rtype):
            capacity = rcap[res]
            res_type = rtype[res]  # 1=renewable, 2=non-renewable
            
            # Weight by scarcity (inverse of capacity ratio) and resource type
            if total_capacity > 0:
                scarcity = 1.0 - (capacity / total_capacity)
                # Non-renewable resources are typically more constraining
                type_multiplier = 1.2 if res_type == 2 else 1.0
                weight = min(scarcity * type_multiplier, 1.0)
            else:
                weight = 0.5
            
            G.add_node(f'resource_{res}', type=1, weight=weight)
    
    # Mode selection constraint nodes (type 1) - one per task
    for task in range(n_tasks):
        # Weight by number of modes available (more modes = more complex decision)
        mode_start = task * modes_per_task
        mode_end = min((task + 1) * modes_per_task, n_opt)
        num_modes = mode_end - mode_start
        
        # Normalize by maximum possible modes per task
        weight = min(num_modes / modes_per_task, 1.0) if modes_per_task > 0 else 0.5
        G.add_node(f'mode_constraint_{task}', type=1, weight=weight)
    
    # Task-to-resource edges (resource usage)
    for task in range(n_tasks):
        mode_start = task * modes_per_task
        mode_end = min((task + 1) * modes_per_task, n_opt)
        
        for res in range(n_res):
            max_requirement = 0
            total_requirement = 0
            valid_modes = 0
            
            # Check all modes for this task
            for mode in range(mode_start, mode_end):
                rreq_idx = res * n_opt + mode
                if rreq_idx < len(rreq):
                    req = rreq[rreq_idx]
                    if req > 0:
                        max_requirement = max(max_requirement, req)
                        total_requirement += req
                        valid_modes += 1
            
            # Only add edge if there's actual resource usage
            if max_requirement > 0 and res < len(rcap):
                capacity = rcap[res]
                if capacity > 0:
                    # Weight by maximum resource utilization ratio
                    utilization = min(max_requirement / capacity, 1.0)
                    # Adjust by mode variability (more variable = higher weight)
                    avg_req = total_requirement / valid_modes if valid_modes > 0 else 0
                    variability = 1.0 + abs(max_requirement - avg_req) / capacity if capacity > 0 else 1.0
                    weight = min(utilization * variability, 1.0)
                    
                    G.add_edge(f'task_{task}', f'resource_{res}', weight=weight)
    
    # Task-to-mode-constraint edges (mode selection)
    for task in range(n_tasks):
        mode_start = task * modes_per_task
        mode_end = min((task + 1) * modes_per_task, n_opt)
        
        # Calculate mode selection complexity
        duration_variance = 0
        durations = []
        for mode in range(mode_start, mode_end):
            if mode < len(dur):
                durations.append(dur[mode])
        
        if durations:
            avg_duration = sum(durations) / len(durations)
            duration_variance = sum((d - avg_duration)**2 for d in durations) / len(durations)
            max_duration = max(durations)
            
            # Weight by decision complexity (variance and range)
            if max_duration > 0:
                complexity = min((duration_variance**0.5 + max_duration) / (2 * max_duration), 1.0)
            else:
                complexity = 0.5
        else:
            complexity = 0.5
            
        G.add_edge(f'task_{task}', f'mode_constraint_{task}', weight=complexity)
    
    # Resource contention edges between tasks (conflict modeling)
    for res in range(n_res):
        if res >= len(rcap):
            continue
            
        capacity = rcap[res]
        competing_tasks = []
        
        # Find tasks that compete for this resource
        for task in range(n_tasks):
            mode_start = task * modes_per_task
            mode_end = min((task + 1) * modes_per_task, n_opt)
            
            max_usage = 0
            for mode in range(mode_start, mode_end):
                rreq_idx = res * n_opt + mode
                if rreq_idx < len(rreq):
                    max_usage = max(max_usage, rreq[rreq_idx])
            
            if max_usage > 0:
                competing_tasks.append((task, max_usage))
        
        # Add conflict edges between high-resource-usage tasks
        competing_tasks.sort(key=lambda x: x[1], reverse=True)
        for i in range(min(len(competing_tasks), 8)):  # Limit to avoid too dense graph
            for j in range(i+1, min(len(competing_tasks), 8)):
                task1, usage1 = competing_tasks[i]
                task2, usage2 = competing_tasks[j]
                
                # Only add edge if combined usage exceeds capacity
                if usage1 + usage2 > capacity:
                    conflict_intensity = min((usage1 + usage2) / (2 * capacity), 1.0)
                    # Apply exponential scaling for stronger conflicts
                    weight = 1.0 - math.exp(-3.0 * conflict_intensity)
                    G.add_edge(f'task_{task1}', f'task_{task2}', weight=weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()