#!/usr/bin/env python3
"""
Graph converter for SPOT5 satellite scheduling problem.
Created using subagent_prompt.md version: v_02

This problem is about scheduling photographs from the SPOT5 earth observation satellite.
Key challenges: conflicting observation times, limited instrument capacity, weighted importance
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the SPOT5 satellite scheduling problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Bipartite graph modeling photograph variables and scheduling constraints
    - Variables represent photographs with importance-based weights
    - Binary constraints model temporal conflicts between photographs
    - Ternary constraints model more complex scheduling restrictions
    - Edge weights reflect constraint tightness and conflict severity
    """
    # Extract basic problem data
    num_variables = json_data.get('num_variables', 0)
    costs = json_data.get('costs', [])
    min_domain = json_data.get('min_domain', 0) 
    max_domain = json_data.get('max_domain', 0)
    
    # Binary constraint data
    num_constraints2 = json_data.get('num_constraints2', 0)
    scopes2x = json_data.get('scopes2x', [])
    scopes2y = json_data.get('scopes2y', [])
    num_tuples2 = json_data.get('num_tuples2', [])
    
    # Ternary constraint data  
    num_constraints3 = json_data.get('num_constraints3', 0)
    scopes3x = json_data.get('scopes3x', [])
    scopes3y = json_data.get('scopes3y', [])
    scopes3z = json_data.get('scopes3z', [])
    num_tuples3 = json_data.get('num_tuples3', [])
    
    G = nx.Graph()
    
    # Add photograph variable nodes (type 0)
    # Weight by normalized importance (cost values)
    max_cost = max(costs) if costs else 1
    min_cost = min(costs) if costs else 0
    cost_range = max_cost - min_cost if max_cost > min_cost else 1
    
    for i in range(num_variables):
        if i < len(costs):
            # Normalize cost to [0,1] - higher cost = more important
            normalized_importance = (costs[i] - min_cost) / cost_range
        else:
            normalized_importance = 0.5
            
        G.add_node(f'photo_{i}', type=0, weight=normalized_importance)
    
    # Add binary constraint nodes (type 1) 
    # Each represents a temporal conflict or resource constraint
    for j in range(num_constraints2):
        # Weight by constraint restrictiveness (fewer allowed tuples = tighter)
        domain_size = (max_domain - min_domain + 1) ** 2
        allowed_tuples = num_tuples2[j] if j < len(num_tuples2) else domain_size
        
        # Tightness: 1.0 - (allowed_tuples / total_possible_tuples)
        tightness = 1.0 - min(allowed_tuples / domain_size, 1.0)
        
        G.add_node(f'binary_constraint_{j}', type=1, weight=max(tightness, 0.1))
        
        # Connect variables involved in this constraint
        if j < len(scopes2x) and j < len(scopes2y):
            var1_idx = scopes2x[j] - 1  # Convert to 0-based indexing
            var2_idx = scopes2y[j] - 1
            
            if 0 <= var1_idx < num_variables and 0 <= var2_idx < num_variables:
                # Edge weight based on constraint tightness and variable importance
                var1_importance = normalized_importance if var1_idx < len(costs) else 0.5
                var2_importance = normalized_importance if var2_idx < len(costs) else 0.5
                edge_weight = (tightness + var1_importance + var2_importance) / 3
                
                G.add_edge(f'photo_{var1_idx}', f'binary_constraint_{j}', 
                          weight=min(edge_weight, 1.0))
                G.add_edge(f'photo_{var2_idx}', f'binary_constraint_{j}', 
                          weight=min(edge_weight, 1.0))
    
    # Add ternary constraint nodes (type 1)
    # These represent more complex scheduling constraints involving 3 photographs
    for j in range(num_constraints3):
        # Weight by constraint restrictiveness
        domain_size = (max_domain - min_domain + 1) ** 3
        allowed_tuples = num_tuples3[j] if j < len(num_tuples3) else domain_size
        
        tightness = 1.0 - min(allowed_tuples / domain_size, 1.0)
        
        G.add_node(f'ternary_constraint_{j}', type=1, weight=max(tightness, 0.1))
        
        # Connect the three variables involved
        if (j < len(scopes3x) and j < len(scopes3y) and j < len(scopes3z)):
            var1_idx = scopes3x[j] - 1
            var2_idx = scopes3y[j] - 1 
            var3_idx = scopes3z[j] - 1
            
            for var_idx in [var1_idx, var2_idx, var3_idx]:
                if 0 <= var_idx < num_variables:
                    var_importance = (costs[var_idx] - min_cost) / cost_range if var_idx < len(costs) else 0.5
                    edge_weight = (tightness + var_importance) / 2
                    
                    G.add_edge(f'photo_{var_idx}', f'ternary_constraint_{j}', 
                              weight=min(edge_weight, 1.0))
    
    # Add conflict edges between high-value photographs that appear in many tight constraints
    # This models competition for limited observation windows
    photo_constraint_counts = {}
    photo_tightness_sums = {}
    
    # Count constraints and sum tightness for each photo
    for j in range(num_constraints2):
        if j < len(scopes2x) and j < len(scopes2y) and j < len(num_tuples2):
            var1_idx = scopes2x[j] - 1
            var2_idx = scopes2y[j] - 1
            domain_size = (max_domain - min_domain + 1) ** 2
            tightness = 1.0 - min(num_tuples2[j] / domain_size, 1.0)
            
            for var_idx in [var1_idx, var2_idx]:
                if 0 <= var_idx < num_variables:
                    photo_constraint_counts[var_idx] = photo_constraint_counts.get(var_idx, 0) + 1
                    photo_tightness_sums[var_idx] = photo_tightness_sums.get(var_idx, 0) + tightness
    
    # Find highly constrained photos
    highly_constrained = []
    avg_constraints = sum(photo_constraint_counts.values()) / len(photo_constraint_counts) if photo_constraint_counts else 0
    
    for var_idx, count in photo_constraint_counts.items():
        if count > avg_constraints * 1.5:  # Above average constraint involvement
            avg_tightness = photo_tightness_sums.get(var_idx, 0) / count
            if avg_tightness > 0.5:  # High average constraint tightness
                highly_constrained.append((var_idx, avg_tightness))
    
    # Add conflict edges between highly constrained, high-value photos
    highly_constrained.sort(key=lambda x: x[1], reverse=True)  # Sort by avg tightness
    for i in range(min(len(highly_constrained), 10)):  # Limit to top 10
        for j in range(i+1, min(len(highly_constrained), 10)):
            var1_idx, tightness1 = highly_constrained[i]
            var2_idx, tightness2 = highly_constrained[j]
            
            # Higher conflict weight for high-value, highly-constrained photos
            var1_value = (costs[var1_idx] - min_cost) / cost_range if var1_idx < len(costs) else 0.5
            var2_value = (costs[var2_idx] - min_cost) / cost_range if var2_idx < len(costs) else 0.5
            
            conflict_weight = (tightness1 + tightness2 + var1_value + var2_value) / 4
            
            G.add_edge(f'photo_{var1_idx}', f'photo_{var2_idx}', 
                      weight=min(conflict_weight, 1.0))
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()