#!/usr/bin/env python3
"""
Graph converter for WordPress problem.
Created using subagent_prompt.md version: v_02

This problem is about deploying WordPress application components to virtual machines 
in the cloud while minimizing cost and satisfying resource requirements and deployment constraints.
Key challenges: Component conflicts, resource capacity constraints, deployment dependencies, and cost optimization.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the WordPress deployment problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph with components and constraints.
    - Components (type 0): WordPress instances, MySQL, load balancers, Varnish cache
    - VM resources (type 2): Available VM types with different specs and prices  
    - Constraints (type 1): Capacity limits, conflicts, deployment requirements
    - Edges represent participation in constraints and resource consumption
    """
    # Extract data
    wp_instances = json_data.get('WPInstances', 10)
    vm_count = json_data.get('VM', 23)
    no_components = json_data.get('NoComponents', 5)
    hardware_req = json_data.get('HardwareREQ', 3)
    vm_offers = json_data.get('VMOffers', 500)
    vm_price = json_data.get('VMPrice', [])
    vm_specs = json_data.get('VMSpecs', [])
    comp_req = json_data.get('CompREQ', [])
    
    # Create graph
    G = nx.Graph()
    
    # Component constants from MZN file
    WORDPRESS = 1
    MYSQL = 2
    DNS_LOADBALANCER = 3
    HTTP_LOADBALANCER = 4
    VARNISH = 5
    
    component_names = ['wordpress', 'mysql', 'dns_lb', 'http_lb', 'varnish']
    
    # Calculate component criticality based on requirements and constraints
    max_cpu = max([comp_req[i*hardware_req] for i in range(no_components)] if comp_req else [1])
    max_memory = max([comp_req[i*hardware_req + 1] for i in range(no_components)] if comp_req else [1])
    max_storage = max([comp_req[i*hardware_req + 2] for i in range(no_components)] if comp_req else [1])
    
    # Add component nodes (type 0) with criticality-based weights
    for i in range(no_components):
        comp_name = component_names[i] if i < len(component_names) else f'component_{i+1}'
        
        # Calculate resource intensity
        cpu_req = comp_req[i*hardware_req] if i*hardware_req < len(comp_req) else 1
        memory_req = comp_req[i*hardware_req + 1] if i*hardware_req + 1 < len(comp_req) else 1
        storage_req = comp_req[i*hardware_req + 2] if i*hardware_req + 2 < len(comp_req) else 1
        
        # Non-linear weight based on resource intensity and special constraints
        resource_intensity = (cpu_req/max_cpu + memory_req/max_memory + storage_req/max_storage) / 3
        
        # Special component weights based on architectural importance
        if i == WORDPRESS - 1:  # WordPress instances are critical
            criticality = 0.9 * math.sqrt(wp_instances / 10.0)  # Scale with instance count
        elif i == MYSQL - 1:  # Database is critical
            criticality = 0.8
        elif i == DNS_LOADBALANCER - 1 or i == HTTP_LOADBALANCER - 1:  # Load balancers
            criticality = 0.7  # Important but exclusive deployment
        elif i == VARNISH - 1:  # Cache layer
            criticality = 0.6
        else:
            criticality = 0.5
        
        # Combine resource intensity with architectural criticality
        weight = min(0.7 * criticality + 0.3 * resource_intensity, 1.0)
        
        G.add_node(comp_name, type=0, weight=weight)
    
    # Add VM resource nodes (type 2) with scarcity-based weights
    min_price = min(vm_price) if vm_price else 1
    max_price = max(vm_price) if vm_price else 1
    price_range = max_price - min_price if max_price > min_price else 1
    
    vm_types_used = set()
    for i in range(0, min(len(vm_specs), vm_offers * hardware_req), hardware_req):
        vm_type_id = i // hardware_req
        if vm_type_id in vm_types_used:
            continue
        vm_types_used.add(vm_type_id)
        
        # VM specifications
        cpu = vm_specs[i] if i < len(vm_specs) else 1
        memory = vm_specs[i + 1] if i + 1 < len(vm_specs) else 1
        storage = vm_specs[i + 2] if i + 2 < len(vm_specs) else 1
        price = vm_price[vm_type_id] if vm_type_id < len(vm_price) else min_price
        
        # Calculate value-to-cost ratio with non-linear scaling
        capacity_score = math.log(cpu + 1) + math.log(memory + 1) + math.log(storage + 1)
        cost_factor = (price - min_price) / price_range if price_range > 0 else 0
        
        # Weight represents resource scarcity (high capacity, low cost = high weight)
        scarcity = math.exp(-2.0 * cost_factor) * (capacity_score / 20.0)
        weight = min(scarcity, 1.0)
        
        G.add_node(f'vm_type_{vm_type_id}', type=2, weight=weight)
    
    # Add constraint nodes (type 1) with tightness-based weights
    
    # 1. Basic allocation constraint (all components must be deployed)
    G.add_node('basic_allocation', type=1, weight=0.9)  # High importance
    
    # 2. Capacity constraints for each VM type
    for vm_type_id in vm_types_used:
        # Calculate constraint tightness based on component demands vs VM capacity
        vm_idx = vm_type_id * hardware_req
        if vm_idx + 2 < len(vm_specs):
            vm_cpu = vm_specs[vm_idx]
            vm_memory = vm_specs[vm_idx + 1] 
            vm_storage = vm_specs[vm_idx + 2]
            
            # Total component demand
            total_cpu_demand = sum([comp_req[i*hardware_req] for i in range(no_components)] if comp_req else [0])
            total_memory_demand = sum([comp_req[i*hardware_req + 1] for i in range(no_components)] if comp_req else [0])
            total_storage_demand = sum([comp_req[i*hardware_req + 2] for i in range(no_components)] if comp_req else [0])
            
            # Tightness calculation
            cpu_ratio = total_cpu_demand / max(vm_cpu, 1)
            memory_ratio = total_memory_demand / max(vm_memory, 1)
            storage_ratio = total_storage_demand / max(vm_storage, 1)
            
            max_ratio = max(cpu_ratio, memory_ratio, storage_ratio)
            tightness = min(max_ratio / 2.0, 1.0)  # Scale tightness
            
            G.add_node(f'capacity_{vm_type_id}', type=1, weight=tightness)
    
    # 3. Conflict constraints (high tightness due to mutual exclusion)
    conflict_sets = [
        ('varnish_conflicts', ['dns_lb', 'http_lb', 'mysql'], 0.85),
        ('dns_lb_conflicts', ['wordpress', 'mysql', 'varnish'], 0.85),
        ('http_lb_conflicts', ['wordpress', 'mysql', 'varnish'], 0.85)
    ]
    
    for constraint_name, conflicting_components, weight in conflict_sets:
        G.add_node(constraint_name, type=1, weight=weight)
    
    # 4. Exclusive deployment constraint (DNS vs HTTP load balancer)
    G.add_node('exclusive_lb_deployment', type=1, weight=0.8)
    
    # 5. Dependency constraints (require-provide relationships)
    G.add_node('wordpress_lb_dependency', type=1, weight=0.75)
    G.add_node('wordpress_mysql_dependency', type=1, weight=0.8)
    
    # 6. Instance count constraints
    G.add_node('wordpress_instances', type=1, weight=0.7)
    G.add_node('varnish_instances', type=1, weight=0.6)
    G.add_node('mysql_instances', type=1, weight=0.7)
    G.add_node('dns_lb_limit', type=1, weight=0.9)  # Upper bound constraint
    
    # Add bipartite edges: components to constraints
    components = ['wordpress', 'mysql', 'dns_lb', 'http_lb', 'varnish']
    
    # Basic allocation edges
    for comp in components:
        if comp != 'dns_lb' and comp != 'http_lb':  # Excluded from basic allocation
            G.add_edge(comp, 'basic_allocation', weight=1.0)
    
    # Capacity constraint edges (components to VM capacity constraints)
    for comp_idx, comp in enumerate(components):
        if comp_idx < no_components:
            cpu_req = comp_req[comp_idx*hardware_req] if comp_idx*hardware_req < len(comp_req) else 1
            memory_req = comp_req[comp_idx*hardware_req + 1] if comp_idx*hardware_req + 1 < len(comp_req) else 1
            storage_req = comp_req[comp_idx*hardware_req + 2] if comp_idx*hardware_req + 2 < len(comp_req) else 1
            
            for vm_type_id in vm_types_used:
                vm_idx = vm_type_id * hardware_req
                if vm_idx + 2 < len(vm_specs):
                    vm_cpu = vm_specs[vm_idx]
                    vm_memory = vm_specs[vm_idx + 1]
                    vm_storage = vm_specs[vm_idx + 2]
                    
                    # Edge weight based on resource consumption ratio
                    cpu_ratio = cpu_req / max(vm_cpu, 1)
                    memory_ratio = memory_req / max(vm_memory, 1)
                    storage_ratio = storage_req / max(vm_storage, 1)
                    
                    consumption = max(cpu_ratio, memory_ratio, storage_ratio)
                    edge_weight = min(consumption * 2, 1.0)
                    
                    G.add_edge(comp, f'capacity_{vm_type_id}', weight=edge_weight)
    
    # Conflict constraint edges
    for constraint_name, conflicting_components, _ in conflict_sets:
        for comp in conflicting_components:
            if comp in components:
                G.add_edge(comp, constraint_name, weight=0.9)
    
    # Exclusive deployment edges
    G.add_edge('dns_lb', 'exclusive_lb_deployment', weight=1.0)
    G.add_edge('http_lb', 'exclusive_lb_deployment', weight=1.0)
    
    # Dependency constraint edges
    G.add_edge('wordpress', 'wordpress_lb_dependency', weight=0.8)
    G.add_edge('dns_lb', 'wordpress_lb_dependency', weight=0.6)
    G.add_edge('http_lb', 'wordpress_lb_dependency', weight=0.6)
    
    G.add_edge('wordpress', 'wordpress_mysql_dependency', weight=0.9)
    G.add_edge('mysql', 'wordpress_mysql_dependency', weight=0.9)
    
    # Instance count constraint edges
    G.add_edge('wordpress', 'wordpress_instances', weight=1.0)
    G.add_edge('varnish', 'varnish_instances', weight=1.0)
    G.add_edge('mysql', 'mysql_instances', weight=1.0)
    G.add_edge('dns_lb', 'dns_lb_limit', weight=1.0)
    
    # Add edges between components and VM resources (consumption relationships)
    for comp_idx, comp in enumerate(components):
        if comp_idx < no_components:
            for vm_type_id in vm_types_used:
                vm_idx = vm_type_id * hardware_req
                if vm_idx + 2 < len(vm_specs) and vm_type_id < len(vm_price):
                    # Weight based on cost-effectiveness for this component
                    cpu_req = comp_req[comp_idx*hardware_req] if comp_idx*hardware_req < len(comp_req) else 1
                    memory_req = comp_req[comp_idx*hardware_req + 1] if comp_idx*hardware_req + 1 < len(comp_req) else 1
                    
                    vm_cpu = vm_specs[vm_idx]
                    vm_memory = vm_specs[vm_idx + 1]
                    vm_price_val = vm_price[vm_type_id]
                    
                    # Suitability based on how well VM fits component needs
                    cpu_fit = min(vm_cpu / max(cpu_req, 1), 5.0)  # Cap at 5x to avoid extreme values
                    memory_fit = min(vm_memory / max(memory_req, 1), 5.0)
                    
                    # Cost efficiency (lower price = higher weight)
                    cost_efficiency = (max_price - vm_price_val) / price_range if price_range > 0 else 0.5
                    
                    # Combined suitability with exponential scaling
                    suitability = math.exp(-1.0 / max(cpu_fit * memory_fit, 0.1))
                    edge_weight = min(0.7 * suitability + 0.3 * cost_efficiency, 1.0)
                    
                    G.add_edge(comp, f'vm_type_{vm_type_id}', weight=edge_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()