#!/usr/bin/env python3
"""
Graph converter for Maximum DAG problem.
Created using subagent_prompt.md version: v_02

This problem is about finding the largest subset of edges from a directed graph 
that forms a DAG (Directed Acyclic Graph). 
Key challenges: Avoiding cycles while maximizing edge count, handling complex 
dependency structures, managing variable-depth graph hierarchies.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the Maximum DAG problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph modeling the decision structure
    - Type 0 nodes: Vertices in the original graph (decision variables)
    - Type 1 nodes: Edges in the original graph (choice variables) 
    - Type 1 nodes: Acyclicity constraints (distance-based constraints)
    - Edges connect vertices to edge-choices and constraint-enforcement nodes
    
    The challenge is balancing maximizing edges vs maintaining acyclicity.
    Dense graphs with many potential cycles are hardest to solve.
    """
    
    # Extract basic problem parameters
    nb_vertices = json_data.get('nbV', 0)
    nb_edges = json_data.get('nbE', 0)
    tails = json_data.get('tails', [])
    heads = json_data.get('heads', [])
    
    if nb_vertices == 0 or nb_edges == 0:
        # Empty graph case
        G = nx.Graph()
        G.add_node('dummy', type=0, weight=0.5)
        return G
    
    G = nx.Graph()
    
    # Type 0 nodes: Vertices in the original directed graph
    # Weight by in-degree + out-degree (centrality in the graph)
    in_degree = [0] * (nb_vertices + 1)  # 1-indexed
    out_degree = [0] * (nb_vertices + 1)
    
    for i in range(len(tails)):
        if i < len(heads):
            tail = tails[i]
            head = heads[i] 
            if 1 <= tail <= nb_vertices:
                out_degree[tail] += 1
            if 1 <= head <= nb_vertices:
                in_degree[head] += 1
    
    max_degree = max(max(in_degree), max(out_degree)) if nb_vertices > 0 else 1
    
    for v in range(1, nb_vertices + 1):
        total_degree = in_degree[v] + out_degree[v]
        # Higher degree vertices are more central and constrained
        centrality = total_degree / (2 * max_degree) if max_degree > 0 else 0.5
        # Use non-linear scaling - more central vertices have exponentially higher weights
        weight = min(1.0, 0.3 + 0.7 * math.exp(2.0 * centrality) / math.exp(2.0))
        G.add_node(f'vertex_{v}', type=0, weight=weight)
    
    # Type 1 nodes: Edge choices (the main decision variables)
    # Weight by potential for creating cycles (harder edges to include)
    for i in range(nb_edges):
        if i < len(tails) and i < len(heads):
            tail = tails[i]
            head = heads[i]
            
            # Calculate cycle potential: how likely is this edge to create cycles?
            # Edges connecting high-degree vertices are more likely to create cycles
            tail_degree = (in_degree[tail] + out_degree[tail]) if 1 <= tail <= nb_vertices else 0
            head_degree = (in_degree[head] + out_degree[head]) if 1 <= head <= nb_vertices else 0
            avg_degree = (tail_degree + head_degree) / 2
            
            # Self-loops or high-degree connections are problematic
            if tail == head:
                cycle_risk = 1.0
            else:
                cycle_risk = min(1.0, avg_degree / (2 * max_degree)) if max_degree > 0 else 0.5
            
            # Use logarithmic scaling for cycle risk
            weight = 0.2 + 0.8 * (1.0 - math.log(1 + cycle_risk) / math.log(2))
            G.add_node(f'edge_{i}', type=1, weight=weight)
    
    # Type 1 nodes: Distance constraints (acyclicity enforcement)
    # Each vertex has distance constraints that must be satisfied
    for v in range(1, nb_vertices + 1):
        # Count incoming edges that could affect this vertex's distance
        incoming_edges = sum(1 for i in range(len(heads)) if i < len(heads) and heads[i] == v)
        
        if incoming_edges > 0:
            # More incoming edges = tighter distance constraint
            constraint_tightness = min(1.0, incoming_edges / max(1, nb_edges // nb_vertices))
            # Use square root scaling for constraint tightness
            weight = 0.4 + 0.6 * math.sqrt(constraint_tightness)
            G.add_node(f'distance_constraint_{v}', type=1, weight=weight)
    
    # Bipartite edges: vertex-edge participation
    for i in range(nb_edges):
        if i < len(tails) and i < len(heads):
            tail = tails[i]
            head = heads[i]
            
            if 1 <= tail <= nb_vertices and 1 <= head <= nb_vertices:
                # Connect vertices to edges they participate in
                # Weight by degree of participation (outgoing vs incoming)
                
                # Tail vertex is the source - weight by out-degree importance
                tail_out_weight = out_degree[tail] / max_degree if max_degree > 0 else 0.5
                G.add_edge(f'vertex_{tail}', f'edge_{i}', 
                          weight=0.3 + 0.7 * tail_out_weight)
                
                # Head vertex is the target - weight by in-degree importance  
                head_in_weight = in_degree[head] / max_degree if max_degree > 0 else 0.5
                G.add_edge(f'vertex_{head}', f'edge_{i}',
                          weight=0.3 + 0.7 * head_in_weight)
    
    # Connect vertices to their distance constraints
    for v in range(1, nb_vertices + 1):
        constraint_node = f'distance_constraint_{v}'
        if constraint_node in G:
            vertex_node = f'vertex_{v}'
            # Weight by how constrained this vertex is
            incoming_count = sum(1 for i in range(len(heads)) if i < len(heads) and heads[i] == v)
            constraint_strength = min(1.0, incoming_count / max(1, nb_edges // nb_vertices))
            weight = 0.5 + 0.5 * constraint_strength
            G.add_edge(vertex_node, constraint_node, weight=weight)
    
    # Add conflict edges between edges that would create short cycles
    # This captures direct cycle conflicts not mediated by distance constraints
    for i in range(nb_edges):
        for j in range(i + 1, nb_edges):
            if (i < len(tails) and i < len(heads) and 
                j < len(tails) and j < len(heads)):
                
                tail_i, head_i = tails[i], heads[i]
                tail_j, head_j = tails[j], heads[j]
                
                # Check for potential 2-cycles
                if tail_i == head_j and head_i == tail_j:
                    # Direct conflict: these edges form a 2-cycle
                    conflict_weight = 0.8 + 0.2 * min(in_degree[tail_i] + out_degree[tail_i],
                                                     in_degree[tail_j] + out_degree[tail_j]) / max_degree
                    G.add_edge(f'edge_{i}', f'edge_{j}', weight=conflict_weight)
                
                # Check for triangular conflicts (length-3 cycles)
                elif (head_i == tail_j and in_degree[tail_i] > 0):
                    # Look for potential third edge completing triangle
                    triangle_risk = 0.0
                    for k in range(nb_edges):
                        if (k != i and k != j and k < len(tails) and k < len(heads) and
                            tails[k] == head_j and heads[k] == tail_i):
                            triangle_risk = 1.0
                            break
                    
                    if triangle_risk > 0.5:
                        conflict_weight = 0.6 + 0.3 * triangle_risk
                        G.add_edge(f'edge_{i}', f'edge_{j}', weight=conflict_weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()