#!/usr/bin/env python3
"""
Graph converter for RCPSP (Resource-Constrained Project Scheduling) problem.
Created using subagent_prompt.md version: v_02

This problem is about scheduling tasks with resource constraints and precedences.
Key challenges: resource conflicts, precedence relationships, makespan optimization.

Note: JSON conversion currently doesn't handle 'suc' (precedence) field from DZN.
This converter focuses on resource-task relationships and resource conflicts.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the RCPSP instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create bipartite graph with task and resource constraint nodes
    - Task nodes (type 0): weighted by duration and resource intensity
    - Resource constraint nodes (type 1): weighted by tightness/utilization
    - Cumulative constraint nodes (type 1): for each resource type
    - Edges connect tasks to resources they use, weighted by consumption ratio
    """
    n_res = json_data.get('n_res', 0)
    n_tasks = json_data.get('n_tasks', 0)
    rc = json_data.get('rc', [])  # resource capacities
    d = json_data.get('d', [])    # task durations
    rr = json_data.get('rr', [])  # resource requirements (flattened)
    
    G = nx.Graph()
    
    if n_res == 0 or n_tasks == 0:
        return G
    
    # Reshape resource requirements from flat array to 2D
    # rr should be [n_res x n_tasks] but comes as flat array
    rr_matrix = []
    for r in range(n_res):
        row = []
        for t in range(n_tasks):
            idx = r * n_tasks + t
            if idx < len(rr):
                row.append(rr[idx])
            else:
                row.append(0)
        rr_matrix.append(row)
    
    # Calculate task intensities for weighting
    max_duration = max(d) if d else 1
    task_intensities = []
    
    for t in range(n_tasks):
        duration = d[t] if t < len(d) else 1
        # Calculate total resource consumption for this task
        total_consumption = sum(rr_matrix[r][t] for r in range(n_res))
        # Intensity = duration * resource_consumption (normalized)
        intensity = duration * total_consumption
        task_intensities.append(intensity)
    
    max_intensity = max(task_intensities) if task_intensities else 1
    
    # Add task nodes (type 0) with duration and resource intensity weights
    for t in range(n_tasks):
        duration = d[t] if t < len(d) else 1
        intensity = task_intensities[t]
        
        # Weight by combined duration and resource intensity
        duration_weight = duration / max_duration
        intensity_weight = intensity / max_intensity if max_intensity > 0 else 0.5
        
        # Non-linear combination emphasizing critical tasks
        weight = 0.6 * intensity_weight + 0.4 * duration_weight
        weight = min(weight, 1.0)
        
        G.add_node(f'task_{t+1}', type=0, weight=weight)
    
    # Calculate resource utilizations for constraint node weights
    resource_utils = []
    for r in range(n_res):
        capacity = rc[r] if r < len(rc) else 1
        total_demand = sum(rr_matrix[r][t] * d[t] if t < len(d) else 0 
                          for t in range(n_tasks))
        utilization = total_demand / capacity if capacity > 0 else 0
        resource_utils.append(utilization)
    
    # Add resource constraint nodes (type 1) weighted by tightness
    for r in range(n_res):
        capacity = rc[r] if r < len(rc) else 1
        utilization = resource_utils[r]
        
        # Tightness based on utilization (higher = more constrained)
        # Use exponential scaling to emphasize oversubscribed resources
        if utilization > 1.0:
            tightness = min(1.0, 0.5 + 0.5 * math.exp((utilization - 1.0) * 2))
        else:
            tightness = utilization * 0.5
        
        G.add_node(f'resource_{r+1}', type=1, weight=tightness)
    
    # Add edges between tasks and resources they use
    for t in range(n_tasks):
        for r in range(n_res):
            requirement = rr_matrix[r][t]
            if requirement > 0:
                capacity = rc[r] if r < len(rc) else 1
                # Edge weight based on consumption ratio with non-linear scaling
                consumption_ratio = requirement / capacity if capacity > 0 else 0.5
                # Use square root to spread out small differences
                edge_weight = min(1.0, math.sqrt(consumption_ratio * 2))
                
                G.add_edge(f'task_{t+1}', f'resource_{r+1}', weight=edge_weight)
    
    # Add conflict edges between tasks that heavily compete for oversubscribed resources
    for r in range(n_res):
        if resource_utils[r] > 1.2:  # Significantly oversubscribed
            capacity = rc[r] if r < len(rc) else 1
            # Find tasks that use this resource significantly
            heavy_users = []
            for t in range(n_tasks):
                requirement = rr_matrix[r][t]
                if requirement > capacity * 0.3:  # Uses >30% of resource
                    heavy_users.append((t, requirement))
            
            # Add conflict edges between heavy users
            for i, (t1, req1) in enumerate(heavy_users):
                for t2, req2 in heavy_users[i+1:]:
                    if req1 + req2 > capacity:  # Cannot run simultaneously
                        # Conflict strength based on over-demand
                        conflict_strength = (req1 + req2) / capacity - 1.0
                        conflict_weight = min(1.0, conflict_strength)
                        G.add_edge(f'task_{t1+1}', f'task_{t2+1}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
    
    # Print some debug info
    node_types = {}
    for node, data in G.nodes(data=True):
        node_type = data.get('type', -1)
        node_types[node_type] = node_types.get(node_type, 0) + 1
    
    print(f"Node types: {dict(node_types)}")


if __name__ == "__main__":
    main()