#!/usr/bin/env python3
"""
Graph converter for Group problem.
Converter created with subagent_prompt.md v_02

This problem is about group splitting for activities where people want to do 
activities (Cinema then Restaurant) in subgroups that match their preferences better.
The aim is to find the best activities and group combinations to recommend.

Key challenges: 
- Matching user preferences with activity ratings
- Balancing group sizes with minimum constraints
- Managing temporal constraints between sequential activities
- Optimizing both user satisfaction and public activity ratings
"""

import sys
import json
import math
import networkx as nx
from pathlib import Path


def build_graph(mzn_file, json_data):
    """
    Build graph representation of the group problem instance.
    
    Args:
        mzn_file: Path to .mzn file (for reference)
        json_data: Dict containing parsed DZN data
    
    Strategy: Create a bipartite graph modeling the complex relationships
    between users, activities, groups, and constraints in this social optimization problem.
    
    Entities:
    - Users (type 0): Decision points for group membership and activity selection
    - Activities (type 0): Available activities for phase 1 and 2 
    - Group constraints (type 1): Min group size, scheduling, preference matching
    - Temporal constraints (type 1): Activity timing and sequencing
    - Resource constraints (type 1): Location distances and capacity limits
    
    The difficulty comes from balancing user preferences, group dynamics,
    temporal scheduling, and resource allocation across two activity phases.
    """
    
    # Extract key parameters
    usersn = json_data.get('usersn', 0)
    min_group_size = json_data.get('min_group_size', 1)
    max_wait = json_data.get('max_wait', 6)
    eta = json_data.get('eta', 5)  # Balance between user and public ratings
    
    # Extract data arrays (these are flattened in JSON)
    oid1 = json_data.get('oid1', [])
    oid2 = json_data.get('oid2', [])
    preferences1 = json_data.get('preferences1', [])
    preferences2 = json_data.get('preferences2', []) 
    activities1 = json_data.get('activities1', [])
    activities2 = json_data.get('activities2', [])
    distances = json_data.get('distances', [])
    
    # Infer dimensions
    n_activities1 = len(oid1) if oid1 else 0
    n_activities2 = len(oid2) if oid2 else 0
    
    # Handle potentially large problem sizes
    if usersn == 0 or n_activities1 == 0 or n_activities2 == 0:
        G = nx.Graph()
        G.add_node('dummy', type=0, weight=0.5)
        return G
    
    G = nx.Graph()
    
    # Limit activity count for scalability while ensuring connectivity
    max_act1 = min(n_activities1, 20)
    max_act2 = min(n_activities2, 20)
    
    # Add user nodes with preference diversity weights
    for u in range(usersn):
        # Calculate preference diversity as a complexity indicator
        pref_variance = 0.0
        if preferences1 and preferences2:
            user_prefs = []
            # Sample preferences (arrays are flattened)
            base1 = u * n_activities1 if len(preferences1) >= (u + 1) * n_activities1 else 0
            base2 = u * n_activities2 if len(preferences2) >= (u + 1) * n_activities2 else 0
            
            for i in range(min(10, n_activities1)):  # Sample first 10 activities
                if base1 + i < len(preferences1):
                    user_prefs.append(preferences1[base1 + i])
            for i in range(min(10, n_activities2)):
                if base2 + i < len(preferences2):
                    user_prefs.append(preferences2[base2 + i])
            
            if user_prefs:
                mean_pref = sum(user_prefs) / len(user_prefs)
                pref_variance = sum((p - mean_pref) ** 2 for p in user_prefs) / len(user_prefs)
        
        # Higher variance = more critical user (harder to satisfy)
        user_weight = min(0.9, 0.3 + math.sqrt(pref_variance) / 3.0)
        G.add_node(f'user_{u}', type=0, weight=user_weight)
    
    # Add activity nodes for both phases with desirability weights
    for a in range(max_act1):
        # Activity desirability based on overall preference ratings
        avg_rating = 0.0
        rating_count = 0
        for u in range(usersn):
            base = u * n_activities1
            if base + a < len(preferences1):
                avg_rating += preferences1[base + a]
                rating_count += 1
        
        if rating_count > 0:
            avg_rating /= rating_count
            # Normalize rating from [-2,2] to [0,1] weight
            activity_weight = max(0.1, min(0.9, (avg_rating + 2) / 4.0))
        else:
            activity_weight = 0.5
            
        G.add_node(f'activity1_{a}', type=0, weight=activity_weight)
    
    for a in range(max_act2):
        avg_rating = 0.0
        rating_count = 0
        for u in range(usersn):
            base = u * n_activities2
            if base + a < len(preferences2):
                avg_rating += preferences2[base + a]
                rating_count += 1
        
        if rating_count > 0:
            avg_rating /= rating_count
            activity_weight = max(0.1, min(0.9, (avg_rating + 2) / 4.0))
        else:
            activity_weight = 0.5
            
        G.add_node(f'activity2_{a}', type=0, weight=activity_weight)
    
    # Add constraint nodes representing different constraint types
    
    # Group size constraints (critical for feasibility)
    group_constraint_weight = min(0.9, 0.5 + (min_group_size - 1) / 10.0)
    G.add_node('group_size_constraint', type=1, weight=group_constraint_weight)
    
    # Temporal sequencing constraint (phase 1 -> phase 2)
    temporal_weight = min(0.9, 0.4 + max_wait / 20.0)  # Tighter timing = harder
    G.add_node('temporal_constraint', type=1, weight=temporal_weight)
    
    # Preference balancing constraint (eta parameter effect)
    balance_weight = max(0.2, min(0.9, abs(eta - 5) / 10.0))  # Extreme eta values make optimization harder
    G.add_node('preference_balance_constraint', type=1, weight=balance_weight)
    
    # Distance/location constraint
    if distances:
        # Calculate average distance complexity
        nonzero_distances = [d for d in distances if d > 0]
        if nonzero_distances:
            max_dist = max(nonzero_distances)
            avg_dist = sum(nonzero_distances) / len(nonzero_distances)
            distance_complexity = min(0.9, avg_dist / max_dist if max_dist > 0 else 0.5)
        else:
            distance_complexity = 0.3
        G.add_node('distance_constraint', type=1, weight=distance_complexity)
    
    # Add edges representing participation and conflicts
    
    # Users participate in ALL constraints (ensuring connectivity)
    for u in range(usersn):
        # Group size participation
        participation_weight = max(0.1, 1.0 / usersn)
        G.add_edge(f'user_{u}', 'group_size_constraint', weight=participation_weight)
        
        # Temporal participation
        G.add_edge(f'user_{u}', 'temporal_constraint', weight=0.8)
        
        # Preference balancing participation
        conflict_weight = 0.5  # Default
        if preferences1 and preferences2:
            base1 = u * n_activities1 if len(preferences1) >= (u + 1) * n_activities1 else 0
            base2 = u * n_activities2 if len(preferences2) >= (u + 1) * n_activities2 else 0
            
            # Check for conflicting preferences (user wants different quality levels)
            if base1 < len(preferences1) and base2 < len(preferences2):
                pref1_sample = preferences1[base1] if base1 < len(preferences1) else 0
                pref2_sample = preferences2[base2] if base2 < len(preferences2) else 0
                conflict_weight = min(0.9, 0.3 + abs(pref1_sample - pref2_sample) / 4.0)
        
        G.add_edge(f'user_{u}', 'preference_balance_constraint', weight=conflict_weight)
        
        # Distance participation (if exists)
        if 'distance_constraint' in G:
            G.add_edge(f'user_{u}', 'distance_constraint', weight=0.6)
    
    # Activities connect to constraints
    for a in range(max_act1):
        G.add_edge(f'activity1_{a}', 'temporal_constraint', weight=0.6)
        G.add_edge(f'activity1_{a}', 'preference_balance_constraint', weight=0.5)
        if 'distance_constraint' in G:
            G.add_edge(f'activity1_{a}', 'distance_constraint', weight=0.4)
    
    for a in range(max_act2):
        G.add_edge(f'activity2_{a}', 'temporal_constraint', weight=0.7)  # Phase 2 more constrained
        G.add_edge(f'activity2_{a}', 'preference_balance_constraint', weight=0.5)
        if 'distance_constraint' in G:
            G.add_edge(f'activity2_{a}', 'distance_constraint', weight=0.4)
    
    # User-activity preference edges for connectivity and conflict modeling
    for u in range(usersn):
        # Connect to top activities based on preferences (both positive and negative)
        user_act1_prefs = []
        user_act2_prefs = []
        
        # Collect preferences for this user
        for a in range(max_act1):
            base = u * n_activities1
            if base + a < len(preferences1):
                pref = preferences1[base + a]
                user_act1_prefs.append((a, pref))
        
        for a in range(max_act2):
            base = u * n_activities2
            if base + a < len(preferences2):
                pref = preferences2[base + a]
                user_act2_prefs.append((a, pref))
        
        # Sort by absolute preference strength and connect to top activities
        user_act1_prefs.sort(key=lambda x: abs(x[1]), reverse=True)
        user_act2_prefs.sort(key=lambda x: abs(x[1]), reverse=True)
        
        # Connect to top 10 activities from each phase for meaningful connectivity
        for i, (a, pref) in enumerate(user_act1_prefs[:10]):
            if pref != 0:
                # Exponential decay for preference strength
                base_weight = abs(pref) / 2.0
                position_factor = math.exp(-i / 3.0)  # Exponential decay based on preference rank
                edge_weight = min(0.9, max(0.1, base_weight * position_factor))
                G.add_edge(f'user_{u}', f'activity1_{a}', weight=edge_weight)
        
        for i, (a, pref) in enumerate(user_act2_prefs[:10]):
            if pref != 0:
                base_weight = abs(pref) / 2.0
                position_factor = math.exp(-i / 3.0)
                edge_weight = min(0.9, max(0.1, base_weight * position_factor))
                G.add_edge(f'user_{u}', f'activity2_{a}', weight=edge_weight)
    
    # Add inter-constraint edges to ensure connectivity
    G.add_edge('group_size_constraint', 'temporal_constraint', weight=0.7)
    G.add_edge('temporal_constraint', 'preference_balance_constraint', weight=0.6)
    if 'distance_constraint' in G:
        G.add_edge('group_size_constraint', 'distance_constraint', weight=0.5)
        G.add_edge('preference_balance_constraint', 'distance_constraint', weight=0.4)
    
    return G


def main():
    if len(sys.argv) != 4:
        print("Usage: python converter.py <mzn_file> <dzn_file> <json_file>")
        sys.exit(1)
    
    mzn_file = sys.argv[1]
    dzn_file = sys.argv[2]
    json_file = sys.argv[3]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)
    
    # Build graph
    G = build_graph(mzn_file, json_data)
    
    # Graph is returned by build_graph for direct feature extraction
    print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


if __name__ == "__main__":
    main()
