#!/usr/bin/env python3
"""
Launcher script to automate running all four experiments (exp1–exp4) over multiple train sizes and seeds.
Executes up to 50 concurrent processes using a threading.Semaphore to limit concurrency.
Test set size is fixed inside main.py (to 100000 examples as configured).
"""
import subprocess
import itertools
import os
import threading

# Configuration
experiments = ['exp1', 'exp2', 'exp3', 'exp4']
train_sizes = [300, 1000, 3000, 10000, 30000, 100000, 300000]
seeds = range(500) 
output_dir = 'results'
max_concurrent = 32

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Semaphore to limit max concurrent threads
sem = threading.Semaphore(max_concurrent)
threads = []

# Worker function
def run_experiment(exp, size, seed):
    sem.acquire()
    try:
        cmd = [
            'python', 'main.py',
            f'--exp={exp}',
            f'--train_size={size}',
            f'--seed={seed}',
            f'--output_dir={output_dir}'
        ]
        print(f"Running: {' '.join(cmd)}")
        subprocess.run(cmd, check=True)
    except subprocess.CalledProcessError as e:
        print(f"Error: Experiment {exp}, train_size={size}, seed={seed} failed with exit code {e.returncode}")
    finally:
        sem.release()

# Launch threads for all combinations
for exp, size, seed in itertools.product(experiments, train_sizes, seeds):
    t = threading.Thread(target=run_experiment, args=(exp, size, seed))
    t.start()
    threads.append(t)

# Wait for all to finish
for t in threads:
    t.join()

print("All experiments completed.")

# also run main.py with stats flag

cmd = [
    'python', 'main.py',
    '--exp=stats',
    f'--output_dir={output_dir}',
    f'--seed=0',
    f'--train_size=300000',
]
print(f"Running: {' '.join(cmd)}")
subprocess.run(cmd, check=True)
print("Statistics computation completed.") 