#!/usr/bin/env python3
"""
Reproduce Protocol (JSON) experiments from the paper.

Experiments:
- Table 3: Protocol constraint enforcement (NO vs JO)
- Table 4: Composition invariance (H1 vs H3 heterogeneity)
"""

import sys
import argparse
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent.parent))
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

from jo.protocol_operator import create_operator, ProtocolOperator


def pr(*args, **kwargs):
    print(*args, **kwargs)
    sys.stdout.flush()


# Paper configuration
N_TASKS = 30
N_SEEDS = 5  # Total N = 30 * 5 = 150
MAX_STEPS = 10


def run_table3_protocol(seeds=None):
    """Table 3: Protocol constraint enforcement."""
    pr("=" * 70)
    pr("TABLE 3: Protocol (JSON) Constraint Enforcement")
    pr("=" * 70)

    seeds = seeds or list(range(N_SEEDS))

    for condition in ["NO", "JO_dynamic"]:
        pr(f"\n--- Condition: {condition} ---")

        for seed in seeds:
            # Create operator
            operator = create_operator(
                method="JO_protocol" if condition == "JO_dynamic" else "NO",
            )

            pr(f"  Seed {seed}: Running {N_TASKS} tasks with {type(operator).__name__}...")

            # Actual experiment would run browsergym tasks here
            # results = run_protocol_tasks(operator, n_tasks=N_TASKS, seed=seed)
            # pr(f"  Seed {seed}: Success={results['success_rate']:.1%}, VR={results['violation_rate']:.1%}")


def run_table4_heterogeneous(seeds=None):
    """Table 4: Composition invariance (model heterogeneity)."""
    pr("=" * 70)
    pr("TABLE 4: Protocol Heterogeneous Composition (H1 vs H3)")
    pr("=" * 70)

    seeds = seeds or list(range(N_SEEDS))

    # H1: Homogeneous (GPT-4o-mini only)
    h1_models = {
        "reader": "gpt-4o-mini",
        "extractor": "gpt-4o-mini",
        "writer": "gpt-4o-mini",
    }

    # H3: Heterogeneous (GPT + Claude + Llama)
    h3_models = {
        "reader": "gpt-4o-mini",
        "extractor": "claude-3-5-haiku-20241022",
        "writer": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
    }

    for composition_name, models in [("H1_homogeneous", h1_models), ("H3_heterogeneous", h3_models)]:
        for condition in ["NO", "JO_dynamic"]:
            pr(f"\n--- {composition_name}, {condition} ---")

            for seed in seeds:
                operator = create_operator(
                    method="JO_protocol" if condition == "JO_dynamic" else "NO",
                )

                pr(f"  Seed {seed}: Running 20 tasks with models={list(models.keys())}...")

                # Actual experiment would run heterogeneous multi-agent tasks here
                # results = run_heterogeneous_tasks(operator, models, n_tasks=20, seed=seed)
                # pr(f"  Seed {seed}: VR={results['violation_rate']:.1%}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run Protocol experiments")
    parser.add_argument("--experiment", type=str, default="all",
                       choices=["all", "table3", "table4"],
                       help="Which experiment to run")
    parser.add_argument("--seeds", type=str, default=None,
                       help="Comma-separated seeds")
    args = parser.parse_args()

    seeds = [int(s) for s in args.seeds.split(",")] if args.seeds else None

    if args.experiment in ["all", "table3"]:
        run_table3_protocol(seeds)
    if args.experiment in ["all", "table4"]:
        run_table4_heterogeneous(seeds)
