import pandas as pd
from typing import Dict, Any, Optional, Callable, List
from radar.data import datamodel
from radar.baselines import evaluate

PREAMBLE = """
  You are an expert-level data scientist. Your job is to answer a data analysis question in rigorous manner given a data table.
  In your analysis:
  * Carefully address
    1) missing data: empty or null entries simulating incomplete information
    2) bad values: clearly erroneous or placeholder entries (e.g., `-1`, `9999`, `TEST`, `#REF!` etc.)
    3) outliers: implausible extreme values that distort analysis (e.g., 220 breathing rate per minute)
    4) inconsistent formatting: variations in representing the same value (e.g., `22 lbs`, `22 pounds`, `weight = 22`)
    5) inconsistent logic: cross-field contradictions violating common-sense logic (e.g., end time before start time)
  * Attempt to safely recover or correct flawed data when reasonable based on the existing data. If data is irrecoverable or suspect, discard the row.
  * Do NOT write or execute any code. Focus purely on logical reasoning and analytical judgment.
  You must conclude with your most reasonable answer.

  When you provide the final answer, please use the prefix "The answer is:" \
  without any modification, and provide the answer directly, with no formatting, no bolding, and \
  no markup. For instance: "The answer is: 42" or "The answer is: yes". If the question asks \
  for a list of values, then the answer should be a comma-separated list of values, \
  without any formatting, no bolding, and no markup. For instance: "The answer is: 42, 43, 44" or "The answer is: yes, no".
"""

TASK_PROMPT = """
  Data:
  {table}
  Based on the given table, answer the following question:
  {question}
"""


def run_direct_prompt(
    task: datamodel.TaskInstance, llm_call: Callable[[str | List[Dict[str, str]]], str]
) -> Dict[str, Any]:
    """
    Runs the direct prompt baseline.
    Args:
        task: The task to answer.
        llm_call: A function that takes a list of messages and returns a string.
    Returns:
        A dictionary containing the prompt, the language model response, the language model answer, the ground truth, and the task.
    """
    prompt_info = task.get_prompt_info()
    prompt = [
        {
            "role": "system",
            "content": PREAMBLE,
        },
        {
            "role": "user",
            "content": TASK_PROMPT.format(**prompt_info),
        },
    ]
    resp = llm_call(prompt)
    answer = evaluate.extract_value_from_answer(resp)
    ret = {
        "prompt": prompt,
        "llm_response": resp,
        "llm_answer": answer,
        "ground_truth": task.answer,
        "is_correct": evaluate.evaluate_answer(answer, task.answer),
        "task": task,
    }
    return ret
