#!/usr/bin/env python3
"""
Graph converter for efm_cstr problem.
# Converter created with subagent_prompt.md v_02

This problem is about finding Elementary Flux Modes (EFMs) in metabolic networks.
Key challenges: Large stoichiometric matrices, reversibility constraints, steady-state balance
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the EFM constraint problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model metabolic network structure for EFM computation
    - Metabolites (type 0): Network nodes, weighted by connectivity and role
    - Reactions (type 2): Resources with capacity constraints, weighted by reversibility and scope
    - Stoichiometric constraints (type 1): Mass balance constraints per metabolite
    - Reversibility constraints (type 1): Directional constraints for reaction pairs
    
    EFM difficulty correlates with network topology, constraint tightness, and reversibility complexity
    """
    # Access data directly from json_data dict
    n = json_data.get('n', 0)  # number of reactions
    m = json_data.get('m', 0)  # number of metabolites  
    k = json_data.get('k', 0)  # number of reversible reactions
    
    # S is the stoichiometric matrix [metabolites x reactions] stored as flat array
    S_flat = json_data.get('S', [])
    # Revs is the reversibility matrix [reversibles x reactions] stored as flat array  
    Revs_flat = json_data.get('Revs', [])
    
    # Create graph
    G = nx.Graph()
    
    # Convert flat arrays to 2D matrices
    S = []
    for i in range(m):
        row = []
        for j in range(n):
            idx = i * n + j
            if idx < len(S_flat):
                row.append(S_flat[idx])
            else:
                row.append(0)
        S.append(row)
    
    Revs = []
    for i in range(k):
        row = []
        for j in range(n):
            idx = i * n + j
            if idx < len(Revs_flat):
                row.append(Revs_flat[idx])
            else:
                row.append(0)
        Revs.append(row)
    
    # Add metabolite nodes (type 0) - decision variables in flux balance
    metabolite_connectivity = []
    for i in range(m):
        # Calculate connectivity (how many reactions this metabolite participates in)
        connectivity = sum(1 for j in range(n) if abs(S[i][j]) > 0)
        metabolite_connectivity.append(connectivity)
    
    max_connectivity = max(metabolite_connectivity) if metabolite_connectivity else 1
    
    for i in range(m):
        connectivity = metabolite_connectivity[i]
        # Weight by connectivity - highly connected metabolites are more critical
        # Use log scaling to handle wide ranges of connectivity
        if connectivity > 0:
            weight = min(1.0, math.log(connectivity + 1) / math.log(max_connectivity + 1))
        else:
            weight = 0.1  # Isolated metabolites
        
        G.add_node(f'metabolite_{i}', type=0, weight=weight)
    
    # Add reaction nodes (type 2) - resources with directional constraints
    reaction_scope = []
    for j in range(n):
        # Calculate scope (how many metabolites this reaction affects)
        scope = sum(1 for i in range(m) if abs(S[i][j]) > 0)
        reaction_scope.append(scope)
    
    max_scope = max(reaction_scope) if reaction_scope else 1
    
    for j in range(n):
        scope = reaction_scope[j]
        # Check if this reaction is reversible
        is_reversible = any(abs(Revs[r][j]) > 0 for r in range(k))
        
        # Weight by scope and reversibility
        # Reversible reactions add complexity, larger scope adds complexity
        scope_weight = scope / max_scope if max_scope > 0 else 0.5
        reversible_bonus = 0.3 if is_reversible else 0.0
        weight = min(1.0, scope_weight + reversible_bonus)
        
        G.add_node(f'reaction_{j}', type=2, weight=weight)
    
    # Add stoichiometric constraint nodes (type 1) - mass balance for each metabolite
    for i in range(m):
        # Weight by the number of reactions affecting this metabolite
        connectivity = metabolite_connectivity[i]
        # More connections = tighter constraint = higher weight
        weight = min(1.0, connectivity / max_connectivity) if max_connectivity > 0 else 0.5
        
        G.add_node(f'stoich_constraint_{i}', type=1, weight=weight)
    
    # Add reversibility constraint nodes (type 1) - directional constraints
    for r in range(k):
        # Count how many reactions this reversibility constraint affects
        affected_reactions = sum(1 for j in range(n) if abs(Revs[r][j]) > 0)
        # Weight by scope - constraints affecting more reactions are more critical
        weight = min(1.0, affected_reactions / n) if n > 0 else 0.5
        
        G.add_node(f'rev_constraint_{r}', type=1, weight=weight)
    
    # Add edges: metabolite-stoichiometric constraint participation
    for i in range(m):
        G.add_edge(f'metabolite_{i}', f'stoich_constraint_{i}', weight=1.0)
    
    # Add edges: reaction-stoichiometric constraint participation
    # Weight by stoichiometric coefficient magnitude
    max_coeff = 0
    for i in range(m):
        for j in range(n):
            max_coeff = max(max_coeff, abs(S[i][j]))
    
    if max_coeff == 0:
        max_coeff = 1
    
    for i in range(m):
        for j in range(n):
            if abs(S[i][j]) > 0:
                # Weight by relative stoichiometric coefficient
                coeff_weight = abs(S[i][j]) / max_coeff
                G.add_edge(f'reaction_{j}', f'stoich_constraint_{i}', weight=coeff_weight)
    
    # Add edges: reaction-reversibility constraint participation
    max_rev_coeff = 0
    for r in range(k):
        for j in range(n):
            max_rev_coeff = max(max_rev_coeff, abs(Revs[r][j]))
    
    if max_rev_coeff == 0:
        max_rev_coeff = 1
    
    for r in range(k):
        for j in range(n):
            if abs(Revs[r][j]) > 0:
                # Weight by reversibility coefficient
                rev_weight = abs(Revs[r][j]) / max_rev_coeff
                G.add_edge(f'reaction_{j}', f'rev_constraint_{r}', weight=rev_weight)
    
    # Add metabolite-reaction interaction edges for highly coupled pairs
    # Only for strong stoichiometric relationships
    threshold = max_coeff * 0.5 if max_coeff > 0 else 0
    
    for i in range(m):
        for j in range(n):
            if abs(S[i][j]) > threshold:
                # Strong coupling between metabolite and reaction
                coupling_strength = abs(S[i][j]) / max_coeff
                # Use exponential decay to emphasize strong couplings
                weight = math.exp(-2.0 * (1.0 - coupling_strength))
                G.add_edge(f'metabolite_{i}', f'reaction_{j}', weight=weight)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()