#!/usr/bin/env python3
"""
Graph converter for Zephyrus cloud deployment problem.
Created using subagent_prompt.md version: v_02

This problem is about deploying software components on cloud locations while minimizing cost.
Key challenges: Resource capacity constraints, port requirement/provision matching, location costs,
and binding constraints between components through ports.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the Zephyrus cloud deployment problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model as bipartite graph with components and locations as primary entities.
    - Component nodes (type 0): Software components to deploy
    - Location nodes (type 2): Cloud locations/resources
    - Constraint nodes (type 1): Resource capacity, port requirements, cost constraints
    - Port requirement nodes (type 1): Port matching constraints
    - Weights based on resource pressure, port complexity, and cost efficiency
    """
    
    # Extract data from JSON
    costs = json_data.get('costs', [])
    requirement_port_nums = json_data.get('requirement_port_nums', [])
    provide_port_nums = json_data.get('provide_port_nums', [])
    resource_provisions = json_data.get('resource_provisions', [])
    resource_consumptions = json_data.get('resource_consumptions', [])
    
    # Infer dimensions from data structure
    n_locations = len(costs) if costs else 0
    n_components = len(resource_consumptions) if resource_consumptions else 0
    
    # Handle empty data gracefully
    if n_locations == 0 or n_components == 0:
        G = nx.Graph()
        G.add_node('dummy', type=0, weight=0.5)
        return G
    
    # Infer number of resources
    n_resources = len(resource_provisions) // n_locations if n_locations > 0 else 1
    
    # Infer port structure - requirement and provide arrays should match component count
    ports_per_comp = len(requirement_port_nums) // n_components if n_components > 0 else 0
    provide_ports_per_comp = len(provide_port_nums) // n_components if n_components > 0 else 0
    
    G = nx.Graph()
    
    # Component nodes (type 0) - software components to deploy
    max_consumption = max(resource_consumptions) if resource_consumptions else 1
    for c in range(n_components):
        # Weight by resource consumption intensity (higher consumption = more critical)
        total_consumption = resource_consumptions[c] if c < len(resource_consumptions) else 0
        consumption_weight = total_consumption / max_consumption if max_consumption > 0 else 0.5
        
        # Factor in port complexity
        port_complexity = 0
        if ports_per_comp > 0:
            for p in range(ports_per_comp):
                idx = c * ports_per_comp + p
                if idx < len(requirement_port_nums):
                    port_complexity += requirement_port_nums[idx]
        
        if provide_ports_per_comp > 0:
            for p in range(provide_ports_per_comp):
                idx = c * provide_ports_per_comp + p
                if idx < len(provide_port_nums):
                    provide_val = provide_port_nums[idx]
                    if provide_val == -1:  # Infinite provider
                        port_complexity += 10
                    elif provide_val > 0:
                        port_complexity += provide_val
        
        # Combine consumption and port complexity with non-linear weighting
        final_weight = 0.7 * consumption_weight + 0.3 * math.tanh(port_complexity / 10.0)
        G.add_node(f'comp_{c}', type=0, weight=min(final_weight, 1.0))
    
    # Location nodes (type 2) - cloud resources/machines
    min_cost = min(costs) if costs else 1
    max_cost = max(costs) if costs else 1
    cost_range = max_cost - min_cost if max_cost > min_cost else 1
    
    for l in range(n_locations):
        location_cost = costs[l] if l < len(costs) else min_cost
        
        # Calculate total resource provision for this location
        total_provision = 0
        for r in range(n_resources):
            prov_idx = l * n_resources + r
            if prov_idx < len(resource_provisions):
                total_provision += resource_provisions[prov_idx]
        
        # Weight by cost efficiency (lower cost + higher provision = better)
        cost_factor = 1.0 - ((location_cost - min_cost) / cost_range) if cost_range > 0 else 0.5
        provision_factor = math.log(1 + total_provision) / math.log(1 + max(resource_provisions)) if resource_provisions else 0.5
        
        # Non-linear combination favoring cost-efficient high-capacity locations
        efficiency_weight = math.sqrt(cost_factor * provision_factor)
        G.add_node(f'loc_{l}', type=2, weight=efficiency_weight)
    
    # Resource capacity constraint nodes (type 1)
    max_provision = max(resource_provisions) if resource_provisions else 1
    for r in range(n_resources):
        # Calculate total demand vs supply for this resource type
        total_demand = sum(resource_consumptions[c] if c < len(resource_consumptions) else 0 
                          for c in range(n_components))
        total_supply = sum(resource_provisions[l * n_resources + r] 
                          if l * n_resources + r < len(resource_provisions) else 0
                          for l in range(n_locations))
        
        # Resource pressure indicates constraint tightness
        resource_pressure = total_demand / total_supply if total_supply > 0 else 1.0
        constraint_weight = min(resource_pressure, 1.0)
        
        G.add_node(f'resource_constraint_{r}', type=1, weight=constraint_weight)
        
        # Connect components that consume this resource
        for c in range(n_components):
            consumption = resource_consumptions[c] if c < len(resource_consumptions) else 0
            if consumption > 0:
                # Edge weight by consumption intensity
                edge_weight = consumption / max_consumption if max_consumption > 0 else 0.5
                G.add_edge(f'comp_{c}', f'resource_constraint_{r}', weight=edge_weight)
        
        # Connect locations that provide this resource
        for l in range(n_locations):
            prov_idx = l * n_resources + r
            if prov_idx < len(resource_provisions):
                provision = resource_provisions[prov_idx]
                if provision > 0:
                    # Edge weight by provision capacity
                    edge_weight = provision / max_provision if max_provision > 0 else 0.5
                    G.add_edge(f'loc_{l}', f'resource_constraint_{r}', weight=edge_weight)
    
    # Port requirement constraints (type 1)
    if ports_per_comp > 0 and requirement_port_nums:
        max_requirement = max(requirement_port_nums) if requirement_port_nums else 1
        for p in range(ports_per_comp):
            # Check if any component requires this port
            has_requirements = False
            total_requirements = 0
            
            for c in range(n_components):
                idx = c * ports_per_comp + p
                if idx < len(requirement_port_nums):
                    req = requirement_port_nums[idx]
                    if req > 0:
                        has_requirements = True
                        total_requirements += req
            
            if has_requirements:
                # Weight by total port demand (higher demand = tighter constraint)
                port_weight = min(total_requirements / (max_requirement * n_components), 1.0)
                G.add_node(f'port_req_{p}', type=1, weight=port_weight)
                
                # Connect components with requirements
                for c in range(n_components):
                    idx = c * ports_per_comp + p
                    if idx < len(requirement_port_nums):
                        req = requirement_port_nums[idx]
                        if req > 0:
                            edge_weight = req / max_requirement if max_requirement > 0 else 0.5
                            G.add_edge(f'comp_{c}', f'port_req_{p}', weight=edge_weight)
    
    # Port provision constraints (type 1) 
    if provide_ports_per_comp > 0 and provide_port_nums:
        for p in range(provide_ports_per_comp):
            # Check if any component provides this port
            has_providers = False
            total_provision = 0
            
            for c in range(n_components):
                idx = c * provide_ports_per_comp + p
                if idx < len(provide_port_nums):
                    prov = provide_port_nums[idx]
                    if prov != 0:  # 0 means no provision, -1 means infinite
                        has_providers = True
                        if prov == -1:
                            total_provision += 100  # Treat infinite as large number
                        else:
                            total_provision += prov
            
            if has_providers:
                # Weight by scarcity (finite vs infinite providers)
                infinite_providers = sum(1 for c in range(n_components) 
                                       for idx in [c * provide_ports_per_comp + p]
                                       if idx < len(provide_port_nums) and provide_port_nums[idx] == -1)
                
                scarcity_weight = 0.3 if infinite_providers > 0 else 0.8
                G.add_node(f'port_prov_{p}', type=1, weight=scarcity_weight)
                
                # Connect providing components
                for c in range(n_components):
                    idx = c * provide_ports_per_comp + p
                    if idx < len(provide_port_nums):
                        prov = provide_port_nums[idx]
                        if prov != 0:
                            if prov == -1:
                                edge_weight = 1.0  # Infinite provider
                            else:
                                edge_weight = min(prov / 10.0, 1.0)  # Normalize finite provision
                            G.add_edge(f'comp_{c}', f'port_prov_{p}', weight=edge_weight)
    
    # Add deployment edges between components and locations
    # These represent potential deployment decisions
    for c in range(n_components):
        comp_consumption = resource_consumptions[c] if c < len(resource_consumptions) else 0
        
        for l in range(n_locations):
            location_cost = costs[l] if l < len(costs) else min_cost
            
            # Calculate if location can handle component (resource feasibility)
            can_deploy = True
            feasibility_score = 1.0
            
            for r in range(n_resources):
                prov_idx = l * n_resources + r
                if prov_idx < len(resource_provisions):
                    provision = resource_provisions[prov_idx]
                    if comp_consumption > provision:
                        can_deploy = False
                        break
                    else:
                        # Calculate utilization factor
                        utilization = comp_consumption / provision if provision > 0 else 0
                        feasibility_score *= (1.0 - utilization)
            
            if can_deploy:
                # Edge weight combines feasibility and cost efficiency
                cost_efficiency = (max_cost - location_cost) / cost_range if cost_range > 0 else 0.5
                edge_weight = 0.6 * feasibility_score + 0.4 * cost_efficiency
                G.add_edge(f'comp_{c}', f'loc_{l}', weight=min(edge_weight, 1.0))
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()