from pydantic import BaseModel

from nightjar import nj_llm_factory


class Response(BaseModel):
    res: int


def main(p, q, r, nj_llm) -> float:
    average = nj_llm(
        f"Compute the average of <p>, <q>, and <r>.\n<p>{p}</p>\n<q>{q}</q>\n<r>{r}</r>",
        output_format=Response,
    ).res
    return average


#### Tests ####
import logging
from typing import Any, Dict, List, Tuple

logging.basicConfig(level=logging.INFO)


def run(
    model_name: str,
) -> Tuple[Dict[str, Tuple[Any, Any]], Dict[str, Any], Dict[str, bool], Dict[str, str]]:
    nj_llm, usage = nj_llm_factory(model_name, max_calls=100)

    # "openai/gpt-4.1"
    inps = [
        (1.0, 2.0, 3.0),
        ("one", "two", "three"),
        ("uno", "二", "три"),
    ]
    outputs = {}
    errors = {}
    hard_results = {}

    for i, inp in enumerate(inps):
        outputs[f"test_{i}"] = None
        errors[f"test_{i}"] = None
        hard_results[f"test_{i}"] = False

        try:
            outputs[f"test_{i}"] = main(*inp, nj_llm=nj_llm)
        except Exception as e:
            errors[f"test_{i}"] = e
        else:
            try:
                hard_results[f"test_{i}"] = outputs[f"test_{i}"] == 2
            except Exception as e:
                errors[f"test_{i}"] = e

    return outputs, errors, hard_results, usage


if __name__ == "__main__":
    results, errors, hard_results, usage = run("anthropic/claude-sonnet-4-20250514")
    print(results)
    print(hard_results)
    print(errors)
    print(usage)
