#!/usr/bin/env python3
"""
Graph converter for RCPSP-WET-diverse problem.
Created using subagent_prompt.md version: v_02

This problem is about Resource-Constrained Project Scheduling with Weighted Earliness/Tardiness
penalties and diverse solutions. Projects have tasks with precedence constraints, resource 
requirements, durations, and deadlines with earliness/tardiness costs. The goal is to find
diverse non-dominated solutions optimizing both earliness and tardiness costs.

Key challenges: Resource contention, precedence constraints, multi-objective optimization,
deadline sensitivity, and solution diversity requirements.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the RCPSP-WET-diverse problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph with task nodes, resource constraint nodes,
    and precedence constraint nodes. Model the complex resource-time interactions
    and deadline pressure that make instances difficult.
    
    - Task nodes (type 0): Decision variables for task scheduling
    - Resource constraint nodes (type 1): Capacity limitations per resource
    - Precedence constraint nodes (type 1): Task ordering requirements
    - Deadline constraint nodes (type 1): Earliness/tardiness penalties
    """
    # Extract problem data
    n_tasks = json_data.get('n_tasks', 0)
    n_res = json_data.get('n_res', 0)
    d = json_data.get('d', [])  # Task durations
    rc = json_data.get('rc', [])  # Resource capacities
    t_max = json_data.get('t_max', 100)
    
    # Handle missing complex data structures in JSON (known limitation)
    # Create reasonable defaults based on available data
    rr = json_data.get('rr', [])
    suc = json_data.get('suc', [])
    deadline = json_data.get('deadline', [])
    
    # If critical data is missing, create a basic graph structure
    # This ensures we always produce a meaningful graph even with limited data
    
    G = nx.Graph()
    
    # Task nodes (type 0) - weighted by criticality
    max_duration = max(d) if d else 1
    max_deadline_cost = 1
    if deadline:
        flat_deadline = [item for sublist in deadline for item in sublist] if isinstance(deadline[0], list) else deadline
        max_deadline_cost = max([flat_deadline[i] for i in range(1, len(flat_deadline), 3)] + 
                               [flat_deadline[i] for i in range(2, len(flat_deadline), 3)]) if len(flat_deadline) > 2 else 1
    
    for i in range(n_tasks):
        # Weight tasks by combination of duration, resource intensity, and deadline pressure
        duration = d[i] if i < len(d) else 1
        duration_weight = duration / max_duration
        
        # Calculate resource intensity (total resource requirements)
        resource_intensity = 0
        if rr and i < len(rr[0]) if rr else False:
            resource_intensity = sum(rr[r][i] if r < len(rr) and i < len(rr[r]) else 0 for r in range(n_res))
        
        # Normalize resource intensity
        max_resource_intensity = sum(rc) if rc else 1
        resource_weight = min(resource_intensity / max_resource_intensity, 1.0) if max_resource_intensity > 0 else 0.5
        
        # Calculate deadline pressure
        deadline_pressure = 0.5
        if deadline and i < len(deadline):
            if isinstance(deadline[i], list) and len(deadline[i]) >= 3:
                earliness_cost = deadline[i][1]
                tardiness_cost = deadline[i][2]
                deadline_pressure = (earliness_cost + tardiness_cost) / (2 * max_deadline_cost) if max_deadline_cost > 0 else 0.5
        
        # Combine weights using non-linear scaling
        task_weight = 0.4 * duration_weight + 0.4 * resource_weight + 0.2 * deadline_pressure
        # Apply non-linear transformation to increase sensitivity
        task_weight = math.sqrt(task_weight)
        
        G.add_node(f'task_{i}', type=0, weight=min(task_weight, 1.0))
    
    # Resource constraint nodes (type 1) - one per resource
    total_demand_per_resource = []
    for r in range(n_res):
        total_demand = 0
        if rr and r < len(rr):
            total_demand = sum(rr[r][i] if i < len(rr[r]) else 0 for i in range(n_tasks))
        total_demand_per_resource.append(total_demand)
    
    for r in range(n_res):
        capacity = rc[r] if r < len(rc) else 1
        total_demand = total_demand_per_resource[r]
        
        # Resource tightness: how oversubscribed is this resource
        if capacity > 0:
            tightness = min(total_demand / capacity, 2.0) / 2.0  # Normalize to [0,1]
        else:
            tightness = 1.0
        
        # Apply exponential scaling for high tightness
        resource_weight = 1.0 - math.exp(-3.0 * tightness)
        
        G.add_node(f'resource_{r}', type=1, weight=min(resource_weight, 1.0))
    
    # Precedence constraint nodes (type 1) - one per precedence relationship
    precedence_count = 0
    precedence_weights = []
    
    for i in range(len(suc)):
        if isinstance(suc[i], list):
            successors = suc[i]
        else:
            # Handle set notation or other formats
            successors = []
        
        for j in successors:
            if isinstance(j, int) and 0 <= j < n_tasks:
                # Weight precedence by criticality of both tasks
                task_i_duration = d[i] if i < len(d) else 1
                task_j_duration = d[j] if j < len(d) else 1
                combined_duration = task_i_duration + task_j_duration
                
                # Precedence weight based on path criticality
                prec_weight = combined_duration / (2 * max_duration) if max_duration > 0 else 0.5
                precedence_weights.append(prec_weight)
                
                G.add_node(f'precedence_{precedence_count}', type=1, weight=min(prec_weight, 1.0))
                precedence_count += 1
    
    # Deadline constraint nodes (type 1) - one per task with deadline pressure
    for i in range(n_tasks):
        if deadline and i < len(deadline):
            if isinstance(deadline[i], list) and len(deadline[i]) >= 3:
                desired_start = deadline[i][0]
                earliness_cost = deadline[i][1]
                tardiness_cost = deadline[i][2]
                
                # Skip tasks with no deadline pressure
                if earliness_cost == 0 and tardiness_cost == 0:
                    continue
                
                # Weight by total cost pressure
                total_cost = earliness_cost + tardiness_cost
                deadline_weight = total_cost / (2 * max_deadline_cost) if max_deadline_cost > 0 else 0.5
                
                # Apply logarithmic scaling for cost sensitivity
                if deadline_weight > 0:
                    deadline_weight = math.log(1 + deadline_weight * 10) / math.log(11)
                
                G.add_node(f'deadline_{i}', type=1, weight=min(deadline_weight, 1.0))
    
    # Add bipartite edges: tasks to resource constraints
    # If rr data is missing, create uniform resource usage pattern
    if not rr:
        # Create basic resource usage: distribute tasks across resources
        for i in range(n_tasks):
            if d and i < len(d) and d[i] > 0:  # Only non-dummy tasks
                # Assign task to resource based on task index modulo n_res
                r = i % n_res if n_res > 0 else 0
                if r < n_res:
                    # Weight based on task duration relative to others
                    duration = d[i] if i < len(d) else 1
                    edge_weight = min(duration / max_duration, 1.0) if max_duration > 0 else 0.5
                    G.add_edge(f'task_{i}', f'resource_{r}', weight=edge_weight)
    else:
        # Use actual resource requirements if available
        for r in range(n_res):
            for i in range(n_tasks):
                if r < len(rr) and i < len(rr[r]) and rr[r][i] > 0:
                    capacity = rc[r] if r < len(rc) else 1
                    requirement = rr[r][i]
                    
                    # Edge weight based on resource consumption ratio
                    consumption_ratio = requirement / capacity if capacity > 0 else 0.5
                    edge_weight = min(consumption_ratio * 1.5, 1.0)  # Scale up for sensitivity
                    
                    G.add_edge(f'task_{i}', f'resource_{r}', weight=edge_weight)
    
    # Add bipartite edges: tasks to precedence constraints
    # If suc data is missing, create a basic precedence chain
    precedence_idx = 0
    if not suc:
        # Create basic sequential precedence chain for non-dummy tasks
        active_tasks = [i for i in range(n_tasks) if d and i < len(d) and d[i] > 0]
        for i in range(len(active_tasks) - 1):
            task1 = active_tasks[i]
            task2 = active_tasks[i + 1]
            G.add_node(f'precedence_{precedence_idx}', type=1, weight=0.7)
            G.add_edge(f'task_{task1}', f'precedence_{precedence_idx}', weight=0.8)
            G.add_edge(f'task_{task2}', f'precedence_{precedence_idx}', weight=0.8)
            precedence_idx += 1
    else:
        # Use actual successor data if available
        for i in range(len(suc)):
            if isinstance(suc[i], list):
                successors = suc[i]
            else:
                successors = []
            
            for j in successors:
                if isinstance(j, int) and 0 <= j < n_tasks:
                    # Connect both predecessor and successor to the precedence constraint
                    G.add_edge(f'task_{i}', f'precedence_{precedence_idx}', weight=0.8)
                    G.add_edge(f'task_{j}', f'precedence_{precedence_idx}', weight=0.8)
                    precedence_idx += 1
    
    # Add bipartite edges: tasks to deadline constraints
    # If deadline data is missing, create basic deadline constraints for longer tasks
    if not deadline:
        # Create deadline pressure for tasks with above-average duration
        avg_duration = sum(d) / len(d) if d else 1
        for i in range(n_tasks):
            if d and i < len(d) and d[i] > avg_duration:
                deadline_weight = min(d[i] / max_duration, 1.0) if max_duration > 0 else 0.5
                G.add_node(f'deadline_{i}', type=1, weight=deadline_weight)
                G.add_edge(f'task_{i}', f'deadline_{i}', weight=1.0)
    else:
        # Use actual deadline data if available
        for i in range(n_tasks):
            if G.has_node(f'deadline_{i}'):
                # Connect task to its deadline constraint
                G.add_edge(f'task_{i}', f'deadline_{i}', weight=1.0)
    
    # Add conflict edges between tasks competing for oversubscribed resources
    if not rr:
        # Without resource requirements, create conflicts based on duration overlap
        # Longer tasks are more likely to conflict
        avg_duration = sum(d) / len(d) if d else 1
        long_tasks = [i for i in range(n_tasks) if d and i < len(d) and d[i] > avg_duration]
        for i in range(len(long_tasks)):
            for j in range(i + 1, min(len(long_tasks), i + 4)):  # Limit connections
                task1, task2 = long_tasks[i], long_tasks[j]
                # Conflict weight based on combined duration pressure
                duration1 = d[task1] if task1 < len(d) else 1
                duration2 = d[task2] if task2 < len(d) else 1
                conflict_weight = min((duration1 + duration2) / (2 * max_duration), 1.0) if max_duration > 0 else 0.5
                G.add_edge(f'task_{task1}', f'task_{task2}', weight=conflict_weight)
    else:
        # Use actual resource conflicts if data is available
        for r in range(n_res):
            if r < len(rc):
                capacity = rc[r]
                # Find tasks that use this resource
                resource_users = []
                for i in range(n_tasks):
                    if r < len(rr) and i < len(rr[r]) and rr[r][i] > 0:
                        resource_users.append((i, rr[r][i]))
                
                # If resource is oversubscribed, add conflict edges
                total_usage = sum(usage for _, usage in resource_users)
                if total_usage > capacity * 1.2:  # 20% threshold for conflicts
                    # Sort by usage to prioritize high consumers
                    resource_users.sort(key=lambda x: x[1], reverse=True)
                    
                    # Add conflicts between top consumers
                    for idx1 in range(min(len(resource_users), 4)):
                        for idx2 in range(idx1+1, min(len(resource_users), 4)):
                            task1, usage1 = resource_users[idx1]
                            task2, usage2 = resource_users[idx2]
                            
                            # Conflict strength based on combined usage vs capacity
                            conflict_strength = (usage1 + usage2) / (capacity * 2) if capacity > 0 else 0.5
                            conflict_weight = min(conflict_strength, 1.0)
                            
                            G.add_edge(f'task_{task1}', f'task_{task2}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()