#!/usr/bin/env python3
"""
Graph converter for RCPSP-WET (Resource-Constrained Project Scheduling with Weighted Earliness/Tardiness) problem.
Created using subagent_prompt.md version: v_02

This problem is about scheduling tasks with resource constraints while minimizing weighted earliness/tardiness costs.
Key challenges: 
- Resource contention between tasks
- Precedence relationships creating critical paths
- Balancing earliness vs tardiness penalties
- Complex interaction between temporal and resource constraints
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def parse_dzn_array(dzn_content, array_name):
    """Parse DZN array data when JSON is incomplete."""
    lines = dzn_content.split('\n')
    for i, line in enumerate(lines):
        if line.strip().startswith(f'{array_name} ='):
            if 'array2d' in line:
                # Handle 2D array
                data = []
                j = i
                while j < len(lines) and '];' not in lines[j]:
                    j += 1
                array_text = ' '.join(lines[i:j+1])
                # Extract data between [ and ]
                start = array_text.find('[')
                end = array_text.rfind(']')
                if start != -1 and end != -1:
                    data_str = array_text[start+1:end]
                    # Split by commas and convert to integers
                    values = [int(x.strip()) for x in data_str.split(',') if x.strip().isdigit() or (x.strip().startswith('-') and x.strip()[1:].isdigit())]
                    return values
            else:
                # Handle 1D array
                start = line.find('[')
                end = line.find(']')
                if start != -1 and end != -1:
                    data_str = line[start+1:end]
                    return [int(x.strip()) for x in data_str.split(',') if x.strip()]
    return []


def parse_dzn_sets(dzn_content):
    """Parse successor sets from DZN content."""
    lines = dzn_content.split('\n')
    for i, line in enumerate(lines):
        if line.strip().startswith('suc ='):
            successors = []
            j = i
            while j < len(lines) and '];' not in lines[j]:
                j += 1
            suc_text = ' '.join(lines[i:j+1])
            
            # Extract each set
            import re
            sets = re.findall(r'\{([^}]*)\}', suc_text)
            for s in sets:
                if s.strip():
                    successors.append([int(x.strip()) for x in s.split(',') if x.strip().isdigit()])
                else:
                    successors.append([])
            return successors
    return []


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the RCPSP-WET instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data (incomplete, need to parse DZN)
    
    Strategy: Create bipartite graph modeling scheduling decisions and constraints
    - Task nodes (type 0): Represent scheduling decisions with precedence/deadline pressure
    - Resource constraint nodes (type 1): Model resource contention  
    - Precedence constraint nodes (type 1): Model ordering requirements
    - Deadline pressure nodes (type 1): Model earliness/tardiness costs
    """
    
    # Get basic data from JSON
    n_tasks = json_data.get('n_tasks', 0)
    n_res = json_data.get('n_res', 0)
    t_max = json_data.get('t_max', 100)
    d = json_data.get('d', [])
    rc = json_data.get('rc', [])
    
    # Since JSON is incomplete, try to read DZN file for complete data
    dzn_file = str(mzn_file).replace('.mzn', '.dzn').replace('rcpsp-wet.mzn', 'j30_27_5-wet.dzn')
    try:
        with open(dzn_file, 'r') as f:
            dzn_content = f.read()
        
        # Parse missing arrays from DZN
        rr_flat = parse_dzn_array(dzn_content, 'rr')
        deadline_flat = parse_dzn_array(dzn_content, 'deadline')
        successors = parse_dzn_sets(dzn_content)
        
        # Reconstruct 2D arrays
        rr = []
        if rr_flat and n_res > 0 and n_tasks > 0:
            for r in range(n_res):
                rr.append(rr_flat[r*n_tasks:(r+1)*n_tasks])
        
        deadline = []
        if deadline_flat and n_tasks > 0:
            for t in range(n_tasks):
                deadline.append(deadline_flat[t*3:(t+1)*3])
                
    except:
        # Fallback to minimal data
        rr = [[1 for _ in range(n_tasks)] for _ in range(n_res)]
        deadline = [[t_max//2, 1, 1] for _ in range(n_tasks)]
        successors = [[] for _ in range(n_tasks)]
    
    G = nx.Graph()
    
    # Task nodes (type 0) - weighted by scheduling pressure
    max_duration = max(d) if d else 1
    for i in range(n_tasks):
        duration = d[i] if i < len(d) else 1
        
        # Calculate scheduling pressure: longer tasks + deadline pressure
        if i < len(deadline):
            desired_start = deadline[i][0]
            earliness_cost = deadline[i][1] 
            tardiness_cost = deadline[i][2]
            time_pressure = (earliness_cost + tardiness_cost) / max(10, max(1, earliness_cost + tardiness_cost))
        else:
            time_pressure = 0.5
            
        # Combine duration and time pressure
        duration_factor = duration / max_duration
        pressure = min(1.0, 0.3 * duration_factor + 0.7 * time_pressure)
        
        G.add_node(f'task_{i}', type=0, weight=pressure)
    
    # Resource constraint nodes (type 1) - weighted by scarcity/contention
    for r in range(n_res):
        capacity = rc[r] if r < len(rc) else 10
        
        # Calculate total demand for this resource
        total_demand = 0
        if r < len(rr):
            total_demand = sum(rr[r][i] for i in range(min(n_tasks, len(rr[r]))) if rr[r][i] > 0)
        
        # Resource scarcity weight
        if total_demand > 0:
            scarcity = min(1.0, total_demand / max(capacity, 1))
        else:
            scarcity = 0.1
            
        G.add_node(f'resource_{r}', type=1, weight=scarcity)
    
    # Precedence constraint nodes (type 1) - weighted by critical path impact
    precedence_id = 0
    for i in range(len(successors)):
        for j in successors[i]:
            if j > 0 and j <= n_tasks:  # Valid successor
                # Weight by combined duration of tasks in precedence
                duration_i = d[i] if i < len(d) else 1
                duration_j = d[j-1] if j-1 < len(d) else 1  # Convert to 0-indexed
                precedence_weight = min(1.0, (duration_i + duration_j) / (2 * max_duration))
                
                G.add_node(f'precedence_{precedence_id}', type=1, weight=precedence_weight)
                precedence_id += 1
    
    # Deadline pressure constraint nodes (type 1) for high-penalty tasks
    for i in range(n_tasks):
        if i < len(deadline):
            earliness_cost = deadline[i][1]
            tardiness_cost = deadline[i][2] 
            if earliness_cost + tardiness_cost > 2:  # High penalty tasks
                penalty_weight = min(1.0, (earliness_cost + tardiness_cost) / 10.0)
                G.add_node(f'deadline_pressure_{i}', type=1, weight=penalty_weight)
    
    # Edges: Task-Resource relationships (bipartite)
    for i in range(n_tasks):
        for r in range(n_res):
            if r < len(rr) and i < len(rr[r]) and rr[r][i] > 0:
                capacity = rc[r] if r < len(rc) else 10
                consumption = rr[r][i]
                # Exponential weight based on resource consumption ratio
                consumption_ratio = consumption / max(capacity, 1)
                edge_weight = min(1.0, 1.0 - math.exp(-3.0 * consumption_ratio))
                G.add_edge(f'task_{i}', f'resource_{r}', weight=edge_weight)
    
    # Edges: Task-Precedence relationships (bipartite)
    precedence_id = 0
    for i in range(len(successors)):
        for j in successors[i]:
            if j > 0 and j <= n_tasks:
                # Connect both predecessor and successor to precedence constraint
                G.add_edge(f'task_{i}', f'precedence_{precedence_id}', weight=0.8)
                G.add_edge(f'task_{j-1}', f'precedence_{precedence_id}', weight=0.8)  # Convert to 0-indexed
                precedence_id += 1
    
    # Edges: Task-Deadline pressure relationships
    for i in range(n_tasks):
        if i < len(deadline):
            earliness_cost = deadline[i][1]
            tardiness_cost = deadline[i][2]
            if earliness_cost + tardiness_cost > 2:  # High penalty tasks
                penalty_factor = (earliness_cost + tardiness_cost) / 10.0
                deadline_weight = min(1.0, penalty_factor)
                G.add_edge(f'task_{i}', f'deadline_pressure_{i}', weight=deadline_weight)
    
    # Add resource contention edges for heavily contested resources
    for r in range(n_res):
        if r < len(rr):
            capacity = rc[r] if r < len(rc) else 10
            high_consumers = []
            
            # Find tasks that consume significant portions of this resource
            for i in range(min(n_tasks, len(rr[r]))):
                if rr[r][i] > capacity * 0.3:  # More than 30% of capacity
                    high_consumers.append((i, rr[r][i]))
            
            # Add conflict edges between high consumers if resource is oversubscribed
            if len(high_consumers) > 1:
                total_demand = sum(consumption for _, consumption in high_consumers)
                if total_demand > capacity * 1.2:  # Oversubscribed
                    for idx1 in range(len(high_consumers)):
                        for idx2 in range(idx1+1, min(len(high_consumers), 4)):  # Limit conflicts
                            i1, cons1 = high_consumers[idx1]
                            i2, cons2 = high_consumers[idx2]
                            if cons1 + cons2 > capacity:  # Cannot both run at full demand
                                conflict_strength = min(1.0, (cons1 + cons2) / (2 * capacity))
                                G.add_edge(f'task_{i1}', f'task_{i2}', weight=conflict_strength)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()