#!/usr/bin/env python3
"""
Graph converter for skill_allocation_only problem.
Created using subagent_prompt.md version: v_02

This problem is about allocating engineers to jobs based on skill requirements.
Engineers have existing skills and can learn new skills (with cost).
Key challenges: skill matching, geographic constraints, workload balancing, training budget
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the skill allocation problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create bipartite graph with explicit constraint nodes
    - Engineers (type 0): weighted by skill versatility and location constraints
    - Jobs (type 0): weighted by skill rarity and geographic isolation
    - Skill constraints (type 1): weighted by skill scarcity
    - Capacity constraints (type 1): weighted by tightness
    - Geographic constraints (type 1): weighted by interstate/overseas ratios
    """
    G = nx.Graph()
    
    # Extract problem data
    n_new_skills = json_data.get('nNewSkillsPerPerson', 1)
    n_max_jobs = json_data.get('nMaxJobs', 10)
    engineer_skills = json_data.get('engineer_skills', [])
    engineer_location = json_data.get('engineer_location', [])
    jobs = json_data.get('jobs', [])
    
    # Parse engineer skills matrix (flattened in JSON)
    # Original is 2D: engineer_skills[engineer][skill]
    n_engineers = len(engineer_location)
    n_skills = len(engineer_skills) // n_engineers if engineer_skills else 0
    
    # Parse jobs array (flattened, each job has 5 elements)
    job_list = []
    for i in range(0, len(jobs), 5):
        if i + 4 < len(jobs):
            job_list.append({
                'skill': jobs[i],
                'priority': jobs[i+1], 
                'duration': jobs[i+2],
                'location': jobs[i+3],
                'overseas': jobs[i+4]
            })
    
    n_jobs = len(job_list)
    
    if n_engineers == 0 or n_jobs == 0 or n_skills == 0:
        return G
    
    # Calculate skill demand/supply for rarity weights
    skill_demand = [0] * (n_skills + 1)  # +1 for 1-indexed skills
    skill_supply = [0] * (n_skills + 1)
    
    for job in job_list:
        skill_id = job['skill']
        if 1 <= skill_id <= n_skills:
            skill_demand[skill_id] += 1
    
    for eng in range(n_engineers):
        for skill in range(1, n_skills + 1):
            skill_idx = eng * n_skills + (skill - 1)
            if skill_idx < len(engineer_skills) and engineer_skills[skill_idx] == 1:
                skill_supply[skill] += 1
    
    # Add engineer nodes (type 0)
    for eng in range(n_engineers):
        # Calculate versatility (number of skills)
        skills_count = 0
        for skill in range(1, n_skills + 1):
            skill_idx = eng * n_skills + (skill - 1)
            if skill_idx < len(engineer_skills) and engineer_skills[skill_idx] == 1:
                skills_count += 1
        
        # Weight by versatility (more skills = higher weight)
        versatility = min(skills_count / max(n_skills // 4, 1), 1.0)
        
        # Factor in location constraints (no location = more flexible)
        location_flexibility = 0.8 if engineer_location[eng] == 0 else 0.5
        
        engineer_weight = (versatility + location_flexibility) / 2
        G.add_node(f'engineer_{eng}', type=0, weight=engineer_weight)
    
    # Add job nodes (type 0) 
    for job_idx, job in enumerate(job_list):
        skill_id = job['skill']
        
        # Weight by skill rarity (high demand, low supply = harder job)
        skill_rarity = 0.5
        if 1 <= skill_id <= n_skills:
            supply = max(skill_supply[skill_id], 1)
            demand = skill_demand[skill_id]
            skill_rarity = min(demand / supply, 2.0) / 2.0
        
        # Factor in priority and geographic isolation
        priority_weight = min(job['priority'] / 10.0, 1.0)
        overseas_penalty = 0.3 if job['overseas'] == 1 else 0.0
        
        job_weight = min((skill_rarity + priority_weight + overseas_penalty) / 2, 1.0)
        G.add_node(f'job_{job_idx}', type=0, weight=job_weight)
    
    # Add skill matching constraint nodes (type 1) - one per skill
    for skill in range(1, n_skills + 1):
        demand = skill_demand[skill]
        supply = max(skill_supply[skill], 1)
        
        # Tightness based on demand/supply ratio
        tightness = min(demand / supply, 3.0) / 3.0
        
        # Higher weight for scarce skills
        scarcity_weight = math.sqrt(tightness)
        G.add_node(f'skill_constraint_{skill}', type=1, weight=scarcity_weight)
    
    # Add workload capacity constraints (type 1) - one per engineer
    for eng in range(n_engineers):
        # Calculate potential workload vs capacity
        potential_jobs = 0
        for job in job_list:
            skill_id = job['skill']
            if 1 <= skill_id <= n_skills:
                skill_idx = eng * n_skills + (skill_id - 1)
                if skill_idx < len(engineer_skills) and engineer_skills[skill_idx] == 1:
                    potential_jobs += 1
        
        # Tightness: how overloaded could this engineer be?
        overload_ratio = potential_jobs / max(n_max_jobs, 1)
        capacity_tightness = min(overload_ratio, 2.0) / 2.0
        
        G.add_node(f'capacity_constraint_{eng}', type=1, weight=capacity_tightness)
    
    # Add geographic constraint nodes (type 1)
    interstate_jobs = sum(1 for job in job_list if job['overseas'] == 0)
    overseas_jobs = sum(1 for job in job_list if job['overseas'] == 1)
    
    if interstate_jobs > 0:
        interstate_ratio = interstate_jobs / n_jobs
        G.add_node('interstate_constraint', type=1, weight=interstate_ratio)
    
    if overseas_jobs > 0:
        overseas_ratio = min(overseas_jobs / 5.0, 1.0)  # Cap at 5 overseas jobs
        G.add_node('overseas_constraint', type=1, weight=overseas_ratio)
    
    # Add training budget constraint (type 1)
    max_training = n_engineers * n_new_skills
    training_pressure = min(max_training / (n_jobs * 0.3), 1.0)  # Assume 30% need training
    G.add_node('training_constraint', type=1, weight=training_pressure)
    
    # Add bipartite edges: engineer-skill participation
    for eng in range(n_engineers):
        for skill in range(1, n_skills + 1):
            skill_idx = eng * n_skills + (skill - 1)
            if skill_idx < len(engineer_skills) and engineer_skills[skill_idx] == 1:
                # Engineer has this skill - connect to skill constraint
                G.add_edge(f'engineer_{eng}', f'skill_constraint_{skill}', weight=0.8)
    
    # Add bipartite edges: job-skill requirements  
    for job_idx, job in enumerate(job_list):
        skill_id = job['skill']
        if 1 <= skill_id <= n_skills:
            # Job requires this skill
            duration_weight = min(job['duration'] / 14.0, 1.0)  # Normalize by 2 weeks
            G.add_edge(f'job_{job_idx}', f'skill_constraint_{skill_id}', weight=duration_weight)
    
    # Add bipartite edges: engineer-capacity constraints
    for eng in range(n_engineers):
        G.add_edge(f'engineer_{eng}', f'capacity_constraint_{eng}', weight=1.0)
    
    # Add bipartite edges: job-capacity constraints (potential assignments)
    for eng in range(n_engineers):
        for job_idx, job in enumerate(job_list):
            skill_id = job['skill']
            if 1 <= skill_id <= n_skills:
                skill_idx = eng * n_skills + (skill_id - 1)
                # If engineer has skill or could learn it
                if skill_idx < len(engineer_skills):
                    has_skill = engineer_skills[skill_idx] == 1
                    if has_skill:
                        G.add_edge(f'job_{job_idx}', f'capacity_constraint_{eng}', weight=0.9)
                    else:
                        # Could learn skill (training needed)
                        G.add_edge(f'job_{job_idx}', f'capacity_constraint_{eng}', weight=0.3)
    
    # Add geographic constraint edges
    for job_idx, job in enumerate(job_list):
        if job['overseas'] == 1 and 'overseas_constraint' in G:
            G.add_edge(f'job_{job_idx}', 'overseas_constraint', weight=1.0)
        elif job['overseas'] == 0 and 'interstate_constraint' in G:
            # Check if job is interstate for engineers
            for eng in range(n_engineers):
                eng_location = engineer_location[eng]
                if eng_location > 0:  # Engineer has location
                    job_location = job['location']
                    # Simplified interstate check (different thousands digit)
                    eng_region = eng_location // 1000
                    job_region = job_location // 1000
                    if eng_region != job_region:
                        interstate_weight = 0.7
                        G.add_edge(f'job_{job_idx}', 'interstate_constraint', weight=interstate_weight)
                        break
    
    # Add training constraint edges (potential training needs)
    training_edges_added = 0
    for eng in range(n_engineers):
        for job_idx, job in enumerate(job_list):
            skill_id = job['skill']
            if 1 <= skill_id <= n_skills:
                skill_idx = eng * n_skills + (skill_id - 1)
                if skill_idx < len(engineer_skills) and engineer_skills[skill_idx] == 0:
                    # Engineer doesn't have skill - would need training
                    training_weight = min(job['priority'] / 10.0, 1.0)
                    G.add_edge(f'engineer_{eng}', 'training_constraint', weight=training_weight)
                    training_edges_added += 1
                    if training_edges_added >= max_training:  # Limit edges
                        break
        if training_edges_added >= max_training:
            break
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()