#!/usr/bin/env python3
"""
Graph converter for MSPSP (Multi-Skilled Project Scheduling Problems) problem.
Created using subagent_prompt.md version: v_02

This problem involves scheduling tasks with precedence constraints, where workers have different skills
and tasks require specific skill combinations. The objective is to minimize project duration (makespan).

Key challenges: Resource contention, skill matching, precedence constraints, worker allocation conflicts.

Note: The JSON conversion appears incomplete - missing has_skills and suc data structures.
Working with available data (n_skills, n_workers, n_tasks, d, rr).
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the MSPSP instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create bipartite graph with tasks, skill requirements, and workers.
    - Tasks (type 0): Weighted by duration and skill complexity
    - Skill constraints (type 1): One per skill-task combination with requirements
    - Workers (type 2): Weighted by skill diversity (when available)
    - Task precedences would be type 1 constraints if available
    """
    n_skills = json_data.get('n_skills', 0)
    n_workers = json_data.get('n_workers', 0) 
    n_tasks = json_data.get('n_tasks', 0)
    d = json_data.get('d', [])  # durations
    rr = json_data.get('rr', [])  # resource requirements (flattened from 2D array)
    
    G = nx.Graph()
    
    if n_tasks == 0 or not d or not rr:
        return G
        
    # Convert flattened rr array back to 2D structure
    # rr[skill][task] = rr[skill * n_tasks + task]
    skill_requirements = {}
    if len(rr) >= n_skills * n_tasks:
        for skill in range(n_skills):
            skill_requirements[skill] = []
            for task in range(n_tasks):
                idx = skill * n_tasks + task
                skill_requirements[skill].append(rr[idx] if idx < len(rr) else 0)
    
    # Task nodes (type 0) - weighted by duration and skill complexity
    max_duration = max(d) if d else 1
    for task in range(n_tasks):
        duration = d[task] if task < len(d) else 1
        
        # Calculate skill complexity for this task
        skill_complexity = 0
        total_skill_demand = 0
        for skill in range(n_skills):
            if skill in skill_requirements and task < len(skill_requirements[skill]):
                demand = skill_requirements[skill][task]
                if demand > 0:
                    skill_complexity += 1  # Number of different skills needed
                    total_skill_demand += demand  # Total skill units needed
        
        # Weight combines duration and skill complexity using non-linear scaling
        duration_weight = duration / max_duration
        complexity_weight = skill_complexity / max(n_skills, 1)
        demand_weight = min(total_skill_demand / max(n_workers, 1), 1.0)  # Normalize by worker count
        
        # Non-linear combination emphasizing complexity
        node_weight = (0.4 * duration_weight + 
                      0.4 * math.sqrt(complexity_weight) + 
                      0.2 * demand_weight)
        
        G.add_node(f'task_{task}', type=0, weight=min(node_weight, 1.0))
    
    # Skill constraint nodes (type 1) - one per skill-task combination with requirements > 0
    constraint_tightness = []
    for skill in range(n_skills):
        for task in range(n_tasks):
            if (skill in skill_requirements and 
                task < len(skill_requirements[skill]) and 
                skill_requirements[skill][task] > 0):
                
                demand = skill_requirements[skill][task]
                
                # Tightness based on demand vs available workers (approximation since we don't have has_skills)
                # Assume skill distribution is somewhat uniform across workers
                estimated_workers_with_skill = max(n_workers // n_skills, 1)
                tightness = min(demand / estimated_workers_with_skill, 1.0)
                constraint_tightness.append(tightness)
                
                constraint_id = f'skill_constraint_{skill}_{task}'
                G.add_node(constraint_id, type=1, weight=tightness)
                
                # Connect task to its skill constraint
                # Edge weight represents how much this skill contributes to task difficulty
                skill_contribution = demand / max(sum(skill_requirements[s][task] 
                                                    for s in skill_requirements 
                                                    if task < len(skill_requirements[s])), 1)
                G.add_edge(f'task_{task}', constraint_id, weight=skill_contribution)
    
    # Worker nodes (type 2) - weighted by estimated importance
    # Since we don't have has_skills data, estimate based on workload distribution
    if n_workers > 0:
        avg_tightness = sum(constraint_tightness) / max(len(constraint_tightness), 1)
        for worker in range(n_workers):
            # Weight workers by their estimated importance in a tight schedule
            # Workers are more important when there are fewer of them relative to demand
            scarcity = min(n_tasks / n_workers, 1.0)
            worker_weight = 0.5 + 0.5 * scarcity * avg_tightness
            G.add_node(f'worker_{worker}', type=2, weight=worker_weight)
            
            # Connect workers to skill resource nodes to avoid isolated nodes
            # Estimate which skills each worker might have based on uniform distribution
            for skill in range(n_skills):
                # Connect worker to skill resources with probability-based weight
                # Workers with lower indices have different skill distributions
                skill_affinity = 0.3 + 0.4 * ((worker + skill) % n_skills) / n_skills
                if skill_affinity > 0.5:  # Only connect if reasonable affinity
                    resource_node = f'skill_resource_{skill}'
                    # We'll connect after creating the resource nodes
    
    # Global resource constraint nodes for each skill
    for skill in range(n_skills):
        total_demand = sum(skill_requirements[skill][task] 
                          for task in range(n_tasks)
                          if skill in skill_requirements and task < len(skill_requirements[skill]))
        
        if total_demand > 0:
            # Estimate resource pressure - higher when total demand is high relative to workers
            estimated_capacity = max(n_workers // n_skills, 1)
            resource_pressure = min(total_demand / (estimated_capacity * n_tasks), 1.0)
            
            resource_node = f'skill_resource_{skill}'
            G.add_node(resource_node, type=1, weight=resource_pressure)
            
            # Connect to all tasks that use this skill
            for task in range(n_tasks):
                if (skill in skill_requirements and 
                    task < len(skill_requirements[skill]) and 
                    skill_requirements[skill][task] > 0):
                    
                    demand = skill_requirements[skill][task]
                    usage_intensity = demand / max(total_demand, 1)
                    G.add_edge(f'task_{task}', resource_node, weight=usage_intensity)
    
    # Now connect workers to skill resource nodes based on estimated affinities
    for worker in range(n_workers):
        for skill in range(n_skills):
            skill_affinity = 0.3 + 0.4 * ((worker + skill) % n_skills) / n_skills
            if skill_affinity > 0.5:  # Only connect if reasonable affinity
                resource_node = f'skill_resource_{skill}'
                if G.has_node(resource_node):
                    G.add_edge(f'worker_{worker}', resource_node, weight=skill_affinity)
    
    # Add conflict edges between tasks that compete heavily for the same skills
    for task1 in range(n_tasks):
        for task2 in range(task1 + 1, n_tasks):
            # Calculate skill overlap and competition
            conflict_score = 0.0
            shared_skills = 0
            
            for skill in range(n_skills):
                if (skill in skill_requirements and
                    task1 < len(skill_requirements[skill]) and
                    task2 < len(skill_requirements[skill])):
                    
                    demand1 = skill_requirements[skill][task1]
                    demand2 = skill_requirements[skill][task2]
                    
                    if demand1 > 0 and demand2 > 0:
                        shared_skills += 1
                        # Conflict is higher when both tasks need a lot of the same skill
                        skill_conflict = (demand1 + demand2) / max(n_workers // n_skills, 1)
                        conflict_score += min(skill_conflict, 1.0)
            
            # Only add conflict edge if significant competition exists
            if shared_skills > 0 and conflict_score > 0.5:
                avg_conflict = conflict_score / shared_skills
                # Use exponential decay to emphasize high conflicts
                conflict_weight = 1.0 - math.exp(-3.0 * avg_conflict)
                G.add_edge(f'task_{task1}', f'task_{task2}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()