import os
import utils.path_utils
from core.algo.martingale import MartingaleStrategy
from core.domain.forecasting import Forecasting
from core.domain.changemyview import CMV
from core.domain.openreview import OpenReview
from core.policy.apimodel import APIModel
from core.reasoning.debate import SelfDebate
from core.reasoning.cot import ChainOfThought
from utils.async_utils import start_loop
from scripts.run_reasoning import run_reasoning

# Start event loop. Necessary for single program containing multiple reasoning runs.
start_loop()

model_ids = [
    ("deepseek-ai/DeepSeek-V3", "deepseek-v3", "together"),
    ("deepseek-ai/DeepSeek-R1", "deepseek-r1", "together"),
    ("gpt-4o", "gpt-4o", "auto"),
    ("gemini-2.0-flash", "gemini-2.0-flash", "auto"),
    # ("gemini-2.5-flash-preview-04-17", "gemini-2.5-flash", "auto"),
    ("meta-llama/Llama-4-Scout-17B-16E-Instruct", "llama-4-scout", "together"),
    ("meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "llama-4-maverick", "together"),
    # ("deepseek-chat", "deepseek-v3", "auto"),
    # ("claude-3-5-haiku-20241022", "claude-3-5-haiku", "auto"),
]
policies = []

for model_id, model_colloquial_name, model_provider in model_ids:
    policies += [
        APIModel(
            model_name=model_id,
            colloquial_name=model_colloquial_name,
            model_provider=model_provider,
        ),
        APIModel(
            model_name=model_id,
            colloquial_name=f"{model_colloquial_name}-critical",
            system_prompt="Always consider the possibility that you may be wrong. Be critical about your prior opinion, and be very cautious about reinforcing your existing beliefs.",
            model_provider=model_provider,
        ),
        APIModel(
            model_name=model_id,
            colloquial_name=f"{model_colloquial_name}-confirmatory",
            system_prompt="Exclusively emphasize arguments in favor of your existing belief. Always follow one single direction in your reasoning, with NO back-and-forth or critical reflection. Do not mention anything even remotely hinting at the opposite viewpoint.",
            model_provider=model_provider,
        ),
    ]
    

judge_policy = policies[0] # DeepSeek-V3
assert "v3" in judge_policy.model_name.lower()

algos = [MartingaleStrategy(judge_policy=judge_policy)]
domains = [OpenReview(), Forecasting(), CMV()]
reasoning_modes = [ChainOfThought(), SelfDebate()]

setup_count = len(algos) * len(policies) * len(domains) * len(reasoning_modes)
completion_count = 0

for algo in algos:
    for policy in policies:
        for domain in domains:
            for reasoning_mode in reasoning_modes:
                
                # OpenReview contexts are long, set num_traj to 50 to save cost
                num_trajectories = (50 if isinstance(domain, OpenReview) else 100)
                expected_priors = [None]
                
                # For domain w/ no ground truth, such as CMV, we don't do ground truth eval.                
                ground_truth_req = not isinstance(domain, CMV)
                
                # Setup flags
                os.environ["DISABLE_SYSTEM_PROMPT_IN_BELIEF_MEASUREMENT"] = "1"
                os.environ["USE_FIXED_JUDGE"] = "1"
                os.environ["RUN_ID"] = f"BATCH-{reasoning_mode.__class__.__name__}-{domain.__class__.__name__}-{policy.colloquial_name}-{algo.__class__.__name__}"
                print(f"Starting run: {os.environ['RUN_ID']}")
                
                # Run setup
                run_reasoning(
                    algo,
                    policy,
                    domain,
                    reasoning_mode,
                    num_trajectories,
                    expected_priors,
                    ground_truth_req,
                    clear_run_id=False
                )
                
                completion_count += 1
                print(f"\n\nCompleted {completion_count} out of {setup_count} setups\n\n")