#!/usr/bin/env python3
"""
Graph converter for work-task-variation problem.
Created using subagent_prompt.md version: v_02

This problem is about scheduling workers (resources) to activities across time slots while 
minimizing run costs and frequency costs. Key challenges: managing temporal continuity, 
resource constraints, and the trade-off between run length and frequency.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the work-task-variation problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph with temporal slot constraints and resource-activity relationships.
    - Resource nodes: represent workers/machines
    - Slot constraint nodes: ensure requirements are met per time slot
    - Activity-slot nodes: represent activity demand in specific slots
    - Temporal consistency constraints: ensure run continuity costs are captured
    - Fixed assignment constraints: handle pre-assigned activities
    """
    # Extract data from JSON
    slots = json_data.get('slots', 0)
    resources = json_data.get('Resources', [])
    # Note: Activities array is empty in JSON, but we can infer from requirements structure
    requirements = json_data.get('requirements', [])
    fixed = json_data.get('fixed', [])
    activity_run_cost = json_data.get('activity_run_cost', [])
    activity_frequency_cost = json_data.get('activity_frequency_cost', [])
    
    # Infer number of activities from requirements array structure
    # requirements appears to be flattened, need to determine dimensions
    if requirements and slots > 0:
        num_activities = len(requirements) // slots
    else:
        num_activities = 0
    
    num_resources = len(resources)
    
    G = nx.Graph()
    
    # Resource nodes (type 0) - workers/machines
    for r_idx in range(num_resources):
        # Weight by availability (resources with more fixed slots are less flexible)
        fixed_count = 0
        start_idx = r_idx * slots
        end_idx = (r_idx + 1) * slots
        if start_idx < len(fixed) and end_idx <= len(fixed):
            fixed_slots = fixed[start_idx:end_idx]
            fixed_count = sum(1 for f in fixed_slots if f != 1)  # 1 seems to be None/free
        
        flexibility = 1.0 - (fixed_count / slots) if slots > 0 else 0.5
        G.add_node(f'resource_{r_idx}', type=0, weight=flexibility)
    
    # Activity-slot nodes (type 0) - represent demand for specific activities in specific slots
    total_demand = sum(requirements) if requirements else 1
    max_slot_demand = max(requirements) if requirements else 1
    
    for slot in range(slots):
        for activity in range(num_activities):
            req_idx = activity * slots + slot
            if req_idx < len(requirements):
                demand = requirements[req_idx]
                if demand > 0:  # Only create nodes for slots with demand
                    # Weight by demand intensity relative to total and peak
                    demand_weight = (demand / max_slot_demand) * 0.7 + (demand / total_demand) * 0.3
                    G.add_node(f'activity_{activity}_slot_{slot}', type=0, weight=demand_weight)
    
    # Slot constraint nodes (type 1) - ensure requirements are met per slot
    for slot in range(slots):
        slot_total_demand = 0
        for activity in range(num_activities):
            req_idx = activity * slots + slot
            if req_idx < len(requirements):
                slot_total_demand += requirements[req_idx]
        
        if slot_total_demand > 0:
            # Weight by how constraining this slot is (high demand = tight constraint)
            constraint_tightness = min(slot_total_demand / num_resources, 1.0) if num_resources > 0 else 0.5
            G.add_node(f'slot_constraint_{slot}', type=1, weight=constraint_tightness)
    
    # Run cost constraint nodes (type 1) - capture run length cost structure
    if activity_run_cost:
        run_costs_per_activity = len(activity_run_cost) // (slots + 1) if slots > 0 else 0
        for activity in range(min(num_activities, run_costs_per_activity)):
            # Analyze cost structure for this activity
            start_idx = activity * (slots + 1)
            end_idx = (activity + 1) * (slots + 1)
            if start_idx < len(activity_run_cost) and end_idx <= len(activity_run_cost):
                costs = activity_run_cost[start_idx:end_idx]
                max_cost = max(costs) if costs else 1
                if max_cost > 0:
                    # Weight by cost penalty severity
                    cost_severity = math.log(max_cost + 1) / 10.0  # Logarithmic scaling
                    G.add_node(f'run_cost_constraint_{activity}', type=1, weight=min(cost_severity, 1.0))
    
    # Frequency cost constraint nodes (type 1)
    if activity_frequency_cost:
        freq_costs_per_activity = len(activity_frequency_cost) // (slots + 1) if slots > 0 else 0
        for activity in range(min(num_activities, freq_costs_per_activity)):
            start_idx = activity * (slots + 1)
            end_idx = (activity + 1) * (slots + 1)
            if start_idx < len(activity_frequency_cost) and end_idx <= len(activity_frequency_cost):
                costs = activity_frequency_cost[start_idx:end_idx]
                max_cost = max(costs) if costs else 1
                if max_cost > 0:
                    freq_severity = math.log(max_cost + 1) / 10.0
                    G.add_node(f'freq_cost_constraint_{activity}', type=1, weight=min(freq_severity, 1.0))
    
    # Resource contention nodes (type 2) - represent competition for scarce resources
    for slot in range(slots):
        slot_demand = 0
        for activity in range(num_activities):
            req_idx = activity * slots + slot
            if req_idx < len(requirements):
                slot_demand += requirements[req_idx]
        
        if slot_demand > num_resources:  # Oversubscribed slot
            contention_level = (slot_demand - num_resources) / num_resources
            G.add_node(f'resource_contention_{slot}', type=2, weight=min(contention_level, 1.0))
    
    # Edges: Resource-slot constraint participation (bipartite)
    for r_idx in range(num_resources):
        for slot in range(slots):
            if G.has_node(f'slot_constraint_{slot}'):
                # Weight by how critical this resource is for this slot (based on fixed assignments)
                fixed_idx = r_idx * slots + slot
                if fixed_idx < len(fixed):
                    is_fixed = fixed[fixed_idx] != 1  # 1 seems to be None/free
                    weight = 0.9 if is_fixed else 0.3
                    G.add_edge(f'resource_{r_idx}', f'slot_constraint_{slot}', weight=weight)
    
    # Edges: Activity-slot to slot constraints
    for slot in range(slots):
        if G.has_node(f'slot_constraint_{slot}'):
            for activity in range(num_activities):
                activity_slot_node = f'activity_{activity}_slot_{slot}'
                if G.has_node(activity_slot_node):
                    req_idx = activity * slots + slot
                    if req_idx < len(requirements):
                        demand = requirements[req_idx]
                        # Weight by relative demand contribution
                        slot_total = sum(requirements[a * slots + slot] for a in range(num_activities) 
                                       if a * slots + slot < len(requirements))
                        weight = demand / slot_total if slot_total > 0 else 0.5
                        G.add_edge(activity_slot_node, f'slot_constraint_{slot}', weight=weight)
    
    # Edges: Activity-slot to run cost constraints
    for activity in range(num_activities):
        if G.has_node(f'run_cost_constraint_{activity}'):
            for slot in range(slots):
                activity_slot_node = f'activity_{activity}_slot_{slot}'
                if G.has_node(activity_slot_node):
                    # Weight by potential run cost impact
                    run_cost_idx = activity * (slots + 1) + min(slot + 1, slots)
                    if run_cost_idx < len(activity_run_cost):
                        cost = activity_run_cost[run_cost_idx]
                        max_cost = max(activity_run_cost[activity * (slots + 1):(activity + 1) * (slots + 1)]) if activity_run_cost else 1
                        weight = math.sqrt(cost / max_cost) if max_cost > 0 else 0.3
                        G.add_edge(activity_slot_node, f'run_cost_constraint_{activity}', weight=weight)
    
    # Edges: Activity-slot to frequency cost constraints
    for activity in range(num_activities):
        if G.has_node(f'freq_cost_constraint_{activity}'):
            for slot in range(slots):
                activity_slot_node = f'activity_{activity}_slot_{slot}'
                if G.has_node(activity_slot_node):
                    # Weight by frequency cost sensitivity
                    freq_cost_idx = activity * (slots + 1) + 1  # Single run frequency cost
                    if freq_cost_idx < len(activity_frequency_cost):
                        cost = activity_frequency_cost[freq_cost_idx]
                        if cost > 0:
                            weight = min(math.log(cost + 1) / 10.0, 1.0)
                            G.add_edge(activity_slot_node, f'freq_cost_constraint_{activity}', weight=weight)
    
    # Edges: Resource contention relationships
    for slot in range(slots):
        contention_node = f'resource_contention_{slot}'
        if G.has_node(contention_node):
            # Connect to resources
            for r_idx in range(num_resources):
                weight = 0.8  # High weight for resource contention
                G.add_edge(f'resource_{r_idx}', contention_node, weight=weight)
            
            # Connect to high-demand activities
            for activity in range(num_activities):
                req_idx = activity * slots + slot
                if req_idx < len(requirements):
                    demand = requirements[req_idx]
                    if demand > 0:
                        activity_slot_node = f'activity_{activity}_slot_{slot}'
                        if G.has_node(activity_slot_node):
                            weight = min(demand / num_resources, 1.0) if num_resources > 0 else 0.5
                            G.add_edge(activity_slot_node, contention_node, weight=weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()