#!/usr/bin/env python3
"""
Graph converter for bool-model-sickness problem.
Converter created with subagent_prompt.md v_02

This problem is about employee shift scheduling with expertise requirements.
Employees have contract hours and must be assigned to shifts matching their expertise.
Key challenges: temporal overlap conflicts, expertise matching, contract fulfillment
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the shift scheduling problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Bipartite graph with employees and shifts as variables, constraints as explicit nodes
    - Employee nodes (type 0): weighted by contract hours (normalized)
    - Shift nodes (type 0): weighted by duration and expertise rarity 
    - Constraint nodes (type 1): temporal overlap, expertise requirements, contract limits
    - Edges represent participation in constraints with weights based on conflict potential
    """
    # Extract problem data
    n_shifts = json_data.get('n_shifts', 0)
    n_employees = json_data.get('n_employees', 0)
    n_expertises = json_data.get('n_expertises', 3)
    
    contract = json_data.get('contract', [])
    start_time = json_data.get('start_time', [])
    stop_time = json_data.get('stop_time', [])
    req_expertise = json_data.get('req_expertise', [])
    
    if not all([n_shifts, n_employees, contract, start_time, stop_time, req_expertise]):
        return nx.Graph()
    
    G = nx.Graph()
    
    # Employee nodes (type 0) - weighted by normalized contract hours with non-linear scaling
    max_contract = max(contract) if contract else 1
    for e in range(n_employees):
        if e < len(contract):
            # Use logarithmic scaling for contract hours to emphasize differences
            normalized_hours = contract[e] / max_contract if max_contract > 0 else 0.5
            weight = 0.1 + 0.9 * math.sqrt(normalized_hours)  # Non-linear scaling
        else:
            weight = 0.1
        G.add_node(f'emp_{e}', type=0, weight=weight)
    
    # Shift nodes (type 0) - weighted by duration and expertise rarity
    shift_durations = []
    expertise_counts = [0] * (n_expertises + 1)  # Count shifts by expertise
    
    for s in range(n_shifts):
        if s < len(start_time) and s < len(stop_time):
            duration = stop_time[s] - start_time[s]
            shift_durations.append(duration)
        else:
            shift_durations.append(1)
            
        if s < len(req_expertise):
            expertise = req_expertise[s]
            if 1 <= expertise <= n_expertises:
                expertise_counts[expertise] += 1
    
    max_duration = max(shift_durations) if shift_durations else 1
    total_shifts = sum(expertise_counts[1:]) if sum(expertise_counts[1:]) > 0 else 1
    
    for s in range(n_shifts):
        duration = shift_durations[s] if s < len(shift_durations) else 1
        expertise = req_expertise[s] if s < len(req_expertise) else 1
        
        # Duration component with exponential scaling
        duration_weight = 1.0 - math.exp(-3.0 * duration / max_duration)
        
        # Expertise rarity component
        if 1 <= expertise <= n_expertises and expertise_counts[expertise] > 0:
            rarity_weight = 1.0 - (expertise_counts[expertise] / total_shifts)
        else:
            rarity_weight = 0.5
            
        # Combine weights with emphasis on rarity
        weight = 0.3 * duration_weight + 0.7 * rarity_weight
        weight = max(0.1, min(1.0, weight))
        
        G.add_node(f'shift_{s}', type=0, weight=weight)
    
    # Temporal overlap constraint nodes (type 1)
    overlap_constraints = []
    for s1 in range(n_shifts):
        for s2 in range(s1 + 1, n_shifts):
            if (s1 < len(start_time) and s1 < len(stop_time) and 
                s2 < len(start_time) and s2 < len(stop_time)):
                # Check if shifts overlap
                if (start_time[s1] < stop_time[s2] and start_time[s2] < stop_time[s1]):
                    overlap_constraints.append((s1, s2))
    
    # Create constraint nodes for temporal overlaps
    for i, (s1, s2) in enumerate(overlap_constraints):
        # Weight based on overlap intensity and shift importance
        overlap_duration = min(stop_time[s1], stop_time[s2]) - max(start_time[s1], start_time[s2])
        total_duration = (stop_time[s1] - start_time[s1]) + (stop_time[s2] - start_time[s2])
        overlap_ratio = overlap_duration / max(total_duration, 1)
        
        weight = min(1.0, overlap_ratio * 2.0)  # Emphasize high-overlap conflicts
        constraint_id = f'overlap_{s1}_{s2}'
        G.add_node(constraint_id, type=1, weight=weight)
        overlap_constraints[i] = (s1, s2, constraint_id)
    
    # Expertise requirement constraint nodes (type 1)
    # Group shifts by expertise requirement
    expertise_groups = {}
    for s in range(n_shifts):
        if s < len(req_expertise):
            exp = req_expertise[s]
            if exp not in expertise_groups:
                expertise_groups[exp] = []
            expertise_groups[exp].append(s)
    
    # Create constraint nodes for each expertise type
    for exp, shifts in expertise_groups.items():
        if len(shifts) > 1:  # Only create constraint if multiple shifts need same expertise
            # Weight by expertise rarity and number of shifts competing
            rarity = 1.0 - (len(shifts) / n_shifts) if n_shifts > 0 else 0.5
            scope_factor = min(1.0, len(shifts) / 10.0)  # Normalize scope
            weight = 0.6 * rarity + 0.4 * scope_factor
            
            constraint_id = f'expertise_{exp}'
            G.add_node(constraint_id, type=1, weight=weight)
    
    # Contract fulfillment constraint nodes (type 1)
    for e in range(n_employees):
        if e < len(contract) and contract[e] > 0:
            # Weight by contract size relative to available work
            total_work_hours = sum(stop_time[s] - start_time[s] for s in range(min(n_shifts, len(start_time))))
            if total_work_hours > 0:
                contract_ratio = contract[e] / total_work_hours
                weight = min(1.0, contract_ratio * 5.0)  # Emphasize high contract demands
            else:
                weight = 0.5
                
            G.add_node(f'contract_{e}', type=1, weight=weight)
    
    # Add bipartite edges: employees and shifts to constraints
    
    # Employee-contract constraint edges
    for e in range(n_employees):
        if e < len(contract) and contract[e] > 0:
            G.add_edge(f'emp_{e}', f'contract_{e}', weight=1.0)
    
    # Shift-expertise constraint edges
    for exp, shifts in expertise_groups.items():
        if len(shifts) > 1:
            constraint_id = f'expertise_{exp}'
            for s in shifts:
                # Weight by shift importance and constraint participation
                shift_weight = G.nodes[f'shift_{s}']['weight']
                edge_weight = min(1.0, shift_weight * 1.5)
                G.add_edge(f'shift_{s}', constraint_id, weight=edge_weight)
    
    # Employee-shift-overlap constraint edges  
    for s1, s2, constraint_id in overlap_constraints:
        # All employees participate in this temporal constraint
        constraint_weight = G.nodes[constraint_id]['weight']
        for e in range(n_employees):
            # Weight by potential for conflict (higher contract = more likely to be scheduled)
            emp_weight = G.nodes[f'emp_{e}']['weight']
            edge_weight = min(1.0, constraint_weight * emp_weight)
            
            G.add_edge(f'emp_{e}', constraint_id, weight=edge_weight * 0.8)
    
    # Add shift participation in overlap constraints
    for s1, s2, constraint_id in overlap_constraints:
        constraint_weight = G.nodes[constraint_id]['weight']
        
        G.add_edge(f'shift_{s1}', constraint_id, weight=constraint_weight)
        G.add_edge(f'shift_{s2}', constraint_id, weight=constraint_weight)
    
    # Add employee-shift compatibility edges (expertise-based conflicts)
    # Since employee_expertises is not available in JSON, we create general conflicts
    # based on shift requirements and employee contract sizes
    for e in range(n_employees):
        emp_weight = G.nodes[f'emp_{e}']['weight']
        for s in range(n_shifts):
            shift_weight = G.nodes[f'shift_{s}']['weight']
            
            # Create weak compatibility edges based on relative importance
            # Higher weight employees are more likely to conflict with high-importance shifts
            compatibility = emp_weight * shift_weight
            if compatibility > 0.5:  # Only add edges for significant relationships
                G.add_edge(f'emp_{e}', f'shift_{s}', weight=compatibility * 0.3)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()