#!/usr/bin/env python3
"""
Graph converter for handball problem.
Converter created with subagent_prompt.md v_02

This problem is about scheduling a 14-team handball league with complex constraints.
Key challenges: managing division assignments, venue unavailabilities, derby constraints, 
complementary schedules, and break minimization across multiple tournament rounds.
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the handball scheduling problem.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Model scheduling complexity through teams, time periods, venue constraints,
    and division structure. Focus on what makes scheduling difficult - unavailabilities,
    derby constraints, and division balance requirements.
    
    Key entities:
    - Teams (decision variables for scheduling)
    - Time periods (resources with capacity constraints)  
    - Venue constraints (unavailability restrictions)
    - Division constraints (group assignment rules)
    - Derby constraints (special matchup requirements)
    """
    # Extract data with defaults
    derby_period = json_data.get('derby_period', [])
    nohome = json_data.get('nohome', [])  # Flattened 14x33 matrix
    group1 = json_data.get('group1', [])
    group2 = json_data.get('group2', [])
    
    # Constants from the problem
    num_teams = 14
    num_periods = 33  # Full season periods
    periods_per_team = 33
    
    G = nx.Graph()
    
    # Team nodes (type 0) - weighted by scheduling difficulty
    for team in range(1, num_teams + 1):
        # Calculate team's total unavailability burden
        unavailable_count = 0
        if nohome:
            start_idx = (team - 1) * periods_per_team
            end_idx = start_idx + periods_per_team
            if end_idx <= len(nohome):
                unavailable_count = sum(nohome[start_idx:end_idx])
        
        # Normalize unavailability to [0,1] - more unavailable periods = higher weight
        unavailability_ratio = unavailable_count / periods_per_team if periods_per_team > 0 else 0
        
        # Teams with more constraints are harder to schedule
        team_weight = min(0.3 + unavailability_ratio * 0.7, 1.0)
        
        G.add_node(f'team_{team}', type=0, weight=team_weight)
    
    # Period nodes (type 2) - resource nodes representing time slots
    for period in range(1, num_periods + 1):
        # Early and late season periods are typically more constrained
        if period <= 7 or period >= 28:  # First/last rounds more critical
            period_weight = 0.8
        elif period in [14, 20, 26]:  # Mid-season break periods
            period_weight = 0.9
        else:
            period_weight = 0.5
        
        G.add_node(f'period_{period}', type=2, weight=period_weight)
    
    # Division constraint nodes (type 1)
    if group1 and group2:
        # North division constraint
        division_tightness = abs(len(group1) - 7) / 7.0  # Should be exactly 7 teams
        G.add_node('north_division', type=1, weight=0.7 + division_tightness * 0.3)
        
        # South division constraint  
        division_tightness = abs(len(group2) - 7) / 7.0
        G.add_node('south_division', type=1, weight=0.7 + division_tightness * 0.3)
        
        # Connect teams to their division constraints
        for team in group1:
            if 1 <= team <= num_teams:
                G.add_edge(f'team_{team}', 'north_division', weight=0.8)
        
        for team in group2:
            if 1 <= team <= num_teams:
                G.add_edge(f'team_{team}', 'south_division', weight=0.8)
    
    # Derby constraint nodes (type 1) - special matchup requirements
    for i, period in enumerate(derby_period):
        if 1 <= period <= num_periods:
            # Derby constraints are high-priority scheduling requirements
            derby_weight = 0.9  # High importance
            G.add_node(f'derby_{i+1}', type=1, weight=derby_weight)
            
            # Connect to the specific period where derby must occur
            if period <= num_periods:
                G.add_edge(f'derby_{i+1}', f'period_{period}', weight=0.9)
    
    # Venue unavailability constraint nodes (type 1)
    unavailability_constraints = []
    if nohome:
        for team in range(1, num_teams + 1):
            start_idx = (team - 1) * periods_per_team
            end_idx = start_idx + periods_per_team
            
            if end_idx <= len(nohome):
                team_unavailable = nohome[start_idx:end_idx]
                unavailable_periods = [i+1 for i, val in enumerate(team_unavailable) if val == 1]
                
                if unavailable_periods:
                    constraint_name = f'unavail_team_{team}'
                    # Weight by proportion of unavailable periods - more restrictions = higher weight
                    unavail_ratio = len(unavailable_periods) / periods_per_team
                    constraint_weight = 0.4 + unavail_ratio * 0.6
                    
                    G.add_node(constraint_name, type=1, weight=constraint_weight)
                    unavailability_constraints.append((constraint_name, team, unavailable_periods))
                    
                    # Connect team to its unavailability constraint
                    G.add_edge(f'team_{team}', constraint_name, weight=constraint_weight)
    
    # Connect teams to periods they can use (based on availability)
    for team in range(1, num_teams + 1):
        start_idx = (team - 1) * periods_per_team
        end_idx = start_idx + periods_per_team
        
        if nohome and end_idx <= len(nohome):
            team_unavailable = nohome[start_idx:end_idx]
            
            for period in range(1, min(num_periods + 1, periods_per_team + 1)):
                period_idx = period - 1
                if period_idx < len(team_unavailable):
                    if team_unavailable[period_idx] == 0:  # Available
                        # Available periods have normal weight
                        G.add_edge(f'team_{team}', f'period_{period}', weight=0.5)
                    else:  # Unavailable - conflict edge with high weight
                        G.add_edge(f'team_{team}', f'period_{period}', weight=0.95)
        else:
            # If no unavailability data, connect to all periods
            for period in range(1, num_periods + 1):
                G.add_edge(f'team_{team}', f'period_{period}', weight=0.5)
    
    # Add structural constraint nodes for problem complexity
    
    # Balance constraint (home/away balance within 1)
    G.add_node('balance_constraint', type=1, weight=0.8)
    for team in range(1, num_teams + 1):
        G.add_edge(f'team_{team}', 'balance_constraint', weight=0.6)
    
    # AVR constraint (alternate venue rule - consecutive meetings at different venues)
    G.add_node('avr_constraint', type=1, weight=0.9)
    for team in range(1, num_teams + 1):
        G.add_edge(f'team_{team}', 'avr_constraint', weight=0.7)
    
    # Break minimization constraint (minimize home-home or away-away pairs)
    G.add_node('break_minimization', type=1, weight=0.85)
    for team in range(1, num_teams + 1):
        # Teams with more unavailabilities are more likely to have breaks
        start_idx = (team - 1) * periods_per_team
        end_idx = start_idx + periods_per_team
        if nohome and end_idx <= len(nohome):
            unavail_count = sum(nohome[start_idx:end_idx])
            break_likelihood = unavail_count / periods_per_team
            edge_weight = 0.5 + break_likelihood * 0.4
        else:
            edge_weight = 0.6
        
        G.add_edge(f'team_{team}', 'break_minimization', weight=edge_weight)
    
    # Complementary schedule constraint (3 pairs per division)
    G.add_node('complementary_schedules', type=1, weight=0.75)
    for team in range(1, num_teams + 1):
        G.add_edge(f'team_{team}', 'complementary_schedules', weight=0.55)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()