from typing import Tuple
import re
from vllm import LLM, SamplingParams
from src.function_evaluation import python_eval
from src.utils import RawInput, img2base64, IOExamples
import os
import json

def code_prompt(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"{sample.text_input}",
        }
    )
    prompt_content.append(
        {
            "type": "text",
            "text": f"\nAnalyze the provided input and output self-contained Python code at the end enclosed in a markdown code block such that executing the code stores the answer in the variable 'answer'. Do not use a main() function.",
        }
    )

    prompt = [{"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=temperature, max_tokens=50000, top_p=0.9)
    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )
    logs["output"] = output

    try:
        if "\[ \\boxed{" in output:
            ans_str = re.findall(r"\[ \\boxed{(.*)}", output, re.DOTALL)[-1]

        if "```python" in output:
            ans_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
        elif "```" in output:
            ans_str = re.findall(r"```(.*?)```", output, re.DOTALL)[-1]

        code_output, stdout, err = python_eval(ans_str)

        return "FINAL ANSWER: " + str(code_output), logs
    except Exception:
        return None, logs


def code_interpreter_prompt(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"{sample.text_input}",
        }
    )
    prompt_content.append(
        {
            "type": "text",
            "text": f"\nAnalyze the provided input and generate and run Python code to solve the problem. Output the final answer after 'FINAL ANSWER:'.",
        }
    )

    prompt = [{"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=temperature, max_tokens=50000, top_p=0.9)
    try:
        output = (
            model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        )
    except Exception as e:
        output = "None"
    logs["output"] = output

    try:
        ans_str = re.findall(r"FINAL ANSWER:(.*)", output, re.DOTALL)[-1]
        return "FINAL ANSWER: " + ans_str, logs
    except Exception:
        return None, logs


def our_method(
    sample: str,
    model: LLM,
    reasoning_plan=None,
) -> Tuple[str, dict]:

    def query_llm(prompt, stop=None):
        sampling_params = SamplingParams(temperature=0.6, max_tokens=10000, top_p=0.9, stop=[stop] if stop else None)
        if type(prompt) != list:
            prompt = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
        return (
            model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        )

    def generate_task_instances(problem_instance):
        """
        Generate task instances for the given problem instance.
        """
        prompt = f"Given the following problem instance, generate two additional problems in exactly the same format, each after \"Problem:\":\n{problem_instance}"
        task_instances = query_llm(prompt)
        return task_instances

    def execute_reasoning_structure(reasoning_structure, task_instance):
        """
        Execute the reasoning structure to solve a specific task instance.
        """
        prompt = f"Use the following reasoning structure to solve the provided problem: {reasoning_structure}\n\nSolve this task, providing your final answer at the end after 'FINAL ANSWER:': {task_instance}.\nTo call Python code, output a Python code block without any main function. All code is in the same context, so do not redefine variables in previous steps. Also, if code throws errors, try to fix them. Your goal is to solve the problem rather than write perfect code, so once you have a working solution, you can stop."
        prompt = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
        solution = ""
        code_context = f"problem_instance = '''{task_instance}'''\n"
        while "<|eot_id|>" not in solution and len(solution) < 40000:
            # if len(solution) > 0:
            #     prompt += "\n" + f"So far, you have done:\n{solution}\nFollow the reasoning structure to solve the rest of the problem, or provide the final answer after 'FINAL ANSWER:'. All code is in the same context, so do not redefine variables in previous steps. Also, if code throws errors, try to fix them."
            # print("Prompt:", prompt)
            step = query_llm(prompt, stop="```\n")
            # print("Step:", step)
            if "```python" in step:
                step += "```\n"
                code = re.search(r"```python(.*?)```", step, re.DOTALL).group(1)
                code_output, stdout, err = python_eval(code, code_context)
                code_context += code
                prompt.append({"role": "assistant", "content": [{"type": "text", "text": step}]})
                prompt.append({"role": "user", "content": [{"type": "text", "text": f"The result of executing the above code is:\n{code_output if err is None else err}"}]})
                step += f"The result of executing the above code is:\n{code_output if err is None else err}\n"
            else:
                step = query_llm(prompt, stop=None)
                prompt.append({"role": "assistant", "content": [{"type": "text", "text": step}]})
                solution += step
                # print("Step:", step)
                break
            # print("Step:", step)
            solution += step
        # if len(solution) >= 10000:
        #     solution += "\n\nThe solution exceeds the maximum token limit. Please provide reasoning structure which results in a shorter solution."
        return solution

    plan_prompt = '''Create a plan for solving the task represented by the provided problem without actually solving the particular problem. The plan is like the recipe for solving the problem. The plan should be instance independent, so don't refer to specific details of the problem, but you can provide examples from the problems or make up your own examples.
Start each step with "Step X:" where X is the step number. Steps can contain Python code snippets which automate parts of the solving task.
For instance, if you need to find the 700th prime number, you can provide a code snippet rather than providing the instruction to manually count.

Some problem solving strategies you can use for forming the plan are:
- Breaking down the problem into smaller, more manageable parts
- Identifying key assumptions and constraints
- Considering alternative perspectives or viewpoints
- Devising an experiment to help solve the problem
- Reducing the problem to a simpler form
- Using SAT solvers or other automated reasoning tools (you can use the sympy and z3 libraries in Python, but be sure to provide the necessary imports)
- Processing the problem in some way to make it easier to understand.
- Printing intermediate data processing steps to help understand the problem better using code.
- Constructing a data structure (such as a graph) and then printing it to help understand the problem better.
One way to form the plan is to consider which of the above strategies are helpful and then adapt them to the specific task.
Problems:'''

    revise_prompt = '''Revise the following reasoning structure based on the resulting solution for the given problem instance. 
The reasoning structure should be like the recipe for solving the problem. The reasoning structure should be instance independent, so don't refer to specific details of the problem, but you can provide examples from the problems or make up your own examples.
Start each step with "Step X:" where X is the step number. Steps can contain Python code snippets which automate parts of the solving task.

To help improve the reasoning structure, think about each of the following:
- Is the final answer found in the solution correct and does it match the expected output?
- Replace any hardcoded values/examples given in the reasoning structure with the simplest cases or examples.
- If code is used in the solution, be extremely careful to ensure that the conversion of any information from text into the code is correct. Add a simple example conversion (from problem text to code) to the reasoning structure to guide the problem solver.
- If no solution was found, was it because the problem was misunderstood, something was missed, or an inadequate strategy was used?
- Are there any steps that could be automated with code? Try to provide code snippets for steps which involve repetitive or computational tasks to avoid manual work and reduce the chances of errors.
- Could something be printed or visualized to aid in problem solving?
- Does the solution contain any errors that can be avoided by adding notes in the reasoning structure?
- Is this solution appropriate, or should the reasoning structure be revised to provide a better solution?
- If code is used in the reasoning structure, was it actually useful in solving the problem? If not, try a different approach.

After considering the above, output the new and improved reasoning structure at the end after "Revised Reasoning Structure:" Be sure to include the complete reasoning structure rather than just the changes made.
Problem Instance:
{problem_instance}

Reasoning Structure:
{reasoning_structure}

Solution:
{solution}'''
    
    print(sample)

    optimize_plan = True if reasoning_plan is None else False
    if reasoning_plan is None:
        # additional_problems = generate_task_instances(sample)
        # print("Additional problems:", additional_problems)
        reasoning_plan = query_llm(plan_prompt + "\n" + sample)
        print("Reasoning plan:", reasoning_plan)

    output = execute_reasoning_structure(reasoning_plan, sample)
    print("Plan execution:", output)

    if optimize_plan:
        for _ in range(2):
            reasoning_plan = query_llm(revise_prompt.format(reasoning_structure=reasoning_plan, problem_instance=sample, solution=output))
            try:
                reasoning_plan = re.search(r"Revised Reasoning Structure:\n(.*?)$", reasoning_plan, re.DOTALL).group(1)
            except:
                pass
            print("Revised reasoning plan:", reasoning_plan)

            output = execute_reasoning_structure(reasoning_plan, sample)
            print("Plan execution:", output)

        if not os.path.exists("logs/reasoning_plans/"):
            os.makedirs("logs/reasoning_plans/")
        with open("logs/reasoning_plans/log.txt", "a") as f:
            f.write(f"Problem: {sample}\nReasoning Plan: {reasoning_plan}\n\n")

    logs = {"reasoning_structure": reasoning_plan, "output": output}

    return output, logs


def zs_cot(
    sample: RawInput,
    model: LLM,
    num_gen: int = 1,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Answer the problem by thinking step-by-step and output the final answer at the end after 'FINAL ANSWER:'.",
        }
    )

    prompt = [{"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=temperature, max_tokens=50000, top_p=0.9)
    outputs = [
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text for _ in range(num_gen)]
    logs["output"] = outputs

    answers = []
    for out in outputs:
        # extract the answer from the output
        try:
            ans_str = re.findall(r"FINAL ANSWER:(.*)", out, re.DOTALL)[-1]
            answers.append(ans_str)
        except Exception:
            ans_str = ""
    
    logs["answers"] = answers

    return outputs[0], logs

def zs_cots(
    samples: str,
    model: LLM,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    batch_prompts = []
    for sample in samples:
        prompt_content = []
        prompt_content.append(
            {
                "type": "text",
                "text": f"Answer the following problem by thinking step-by-step and output the final answer at the end after 'FINAL ANSWER:'.\n{sample}",
            }
        )
        prompt = [{"role": "user", "content": prompt_content}]
        batch_prompts.append(prompt)

    sampling_params = SamplingParams(temperature=0.6, max_tokens=50000, top_p=0.9)
    # print(prompt)
    responses = model.chat(batch_prompts, sampling_params=sampling_params, use_tqdm=True)
    outputs = [i.outputs[0].text for i in responses]
    
    logs["output"] = outputs

    return outputs, logs


def self_discover(
    sample: str,
    model: LLM,
    reasoning_structure=None,
) -> Tuple[str, dict]:
    def query_llm(prompt):
        sampling_params = SamplingParams(temperature=0.6, max_tokens=50000, top_p=0.9)
        prompt = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
        return (
            model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        ).replace("<|eot_id|>", "")

    def select_reasoning_modules(task_description, reasoning_modules):
        """
        Step 1: SELECT relevant reasoning modules for the task.
        """
        prompt = (
            f"Given the task: {task_description}, which of the following reasoning modules are relevant? Do not elaborate on why.\n\n"
            + "\n".join(reasoning_modules)
        )
        selected_modules = query_llm(prompt)
        return selected_modules

    def adapt_reasoning_modules(selected_modules, task_example):
        """
        Step 2: ADAPT the selected reasoning modules to be more specific to the task.
        """
        prompt = f"Without working out the full solution, adapt the following reasoning modules to be specific to our task:\n{selected_modules}\n\nOur task:\n{task_example}"
        adapted_modules = query_llm(prompt)
        return adapted_modules

    def implement_reasoning_structure(adapted_modules, task_description):
        """
        Step 3: IMPLEMENT the adapted reasoning modules into an actionable reasoning structure.
        """
        prompt = f"Without working out the full solution, create an actionable reasoning structure for the task using these adapted reasoning modules:\n{adapted_modules}\n\nTask Description:\n{task_description}"
        reasoning_structure = query_llm(prompt)
        return reasoning_structure

    # STAGE 2

    def execute_reasoning_structure(reasoning_structure, task_instance):
        """
        Execute the reasoning structure to solve a specific task instance.
        """
        prompt = f"Using the following reasoning structure: {reasoning_structure}\n\nSolve this task, providing your final answer after 'FINAL ANSWER:': {task_instance}"
        solution = query_llm(prompt)
        return solution

    reasoning_modules = [
        "1. How could I devise an experiment to help solve that problem?",
        "2. Make a list of ideas for solving this problem, and apply them one by one to the problem to see if any progress can be made.",
        # "3. How could I measure progress on this problem?",
        "4. How can I simplify the problem so that it is easier to solve?",
        "5. What are the key assumptions underlying this problem?",
        "6. What are the potential risks and drawbacks of each solution?",
        "7. What are the alternative perspectives or viewpoints on this problem?",
        "8. What are the long-term implications of this problem and its solutions?",
        "9. How can I break down this problem into smaller, more manageable parts?",
        "10. Critical Thinking: This style involves analyzing the problem from different perspectives, questioning assumptions, and evaluating the evidence or information available. It focuses on logical reasoning, evidence-based decision-making, and identifying potential biases or flaws in thinking.",
        "11. Try creative thinking, generate innovative and out-of-the-box ideas to solve the problem. Explore unconventional solutions, thinking beyond traditional boundaries, and encouraging imagination and originality.",
        # "12. Seek input and collaboration from others to solve the problem. Emphasize teamwork, open communication, and leveraging the diverse perspectives and expertise of a group to come up with effective solutions.",
        "13. Use systems thinking: Consider the problem as part of a larger system and understanding the interconnectedness of various elements. Focuses on identifying the underlying causes, feedback loops, and interdependencies that influence the problem, and developing holistic solutions that address the system as a whole.",
        "14. Use Risk Analysis: Evaluate potential risks, uncertainties, and tradeoffs associated with different solutions or approaches to a problem. Emphasize assessing the potential consequences and likelihood of success or failure, and making informed decisions based on a balanced analysis of risks and benefits.",
        # "15. Use Reflective Thinking: Step back from the problem, take the time for introspection and self-reflection. Examine personal biases, assumptions, and mental models that may influence problem-solving, and being open to learning from past experiences to improve future approaches.",
        "16. What is the core issue or problem that needs to be addressed?",
        "17. What are the underlying causes or factors contributing to the problem?",
        "18. Are there any potential solutions or strategies that have been tried before? If yes, what were the outcomes and lessons learned?",
        "19. What are the potential obstacles or challenges that might arise in solving this problem?",
        "20. Are there any relevant data or information that can provide insights into the problem? If yes, what data sources are available, and how can they be analyzed?",
        "21. Are there any stakeholders or individuals who are directly affected by the problem? What are their perspectives and needs?",
        "22. What resources (financial, human, technological, etc.) are needed to tackle the problem effectively?",
        "23. How can progress or success in solving the problem be measured or evaluated?",
        "24. What indicators or metrics can be used?",
        "25. Is the problem a technical or practical one that requires a specific expertise or skill set? Or is it more of a conceptual or theoretical problem?",
        "26. Does the problem involve a physical constraint, such as limited resources, infrastructure, or space?",
        "27. Is the problem related to human behavior, such as a social, cultural, or psychological issue?",
        "28. Does the problem involve decision-making or planning, where choices need to be made under uncertainty or with competing objectives?",
        "29. Is the problem an analytical one that requires data analysis, modeling, or optimization techniques?",
        "30. Is the problem a design challenge that requires creative solutions and innovation?",
        "31. Does the problem require addressing systemic or structural issues rather than just individual instances?",
        "32. Is the problem time-sensitive or urgent, requiring immediate attention and action?",
        "33. What kinds of solution typically are produced for this kind of problem specification?",
        "34. Given the problem specification and the current best solution, have a guess about other possible solutions."
        "35. Let's imagine the current best solution is totally wrong, what other ways are there to think about the problem specification?"
        "36. What is the best way to modify this current best solution, given what you know about these kinds of problem specification?"
        "37. Ignoring the current best solution, create an entirely new solution to the problem."
        # "38. Let’s think step by step."
        "39. Let's make a step by step plan and implement it with good notation and explanation.",
    ]

    logs = {}
    if reasoning_structure is None:
        selected_modules = select_reasoning_modules(sample, reasoning_modules)

        adapted_modules = adapt_reasoning_modules(selected_modules, sample)

        reasoning_structure = implement_reasoning_structure(adapted_modules, sample)

    result = execute_reasoning_structure(reasoning_structure, sample)
    logs["reasoning_structure"] = reasoning_structure

    return result, logs


def autogen_prompt(
    sample: str,
    model: LLM,
    num_gen: int = 1,
    temperature: float = 0.0,
) -> str:
    prompt = """You are a helpful AI assistant. Solve tasks using your coding and language skills. In the
following cases, suggest python code (in a python coding block) or shell script (in a sh coding
block) for the user to execute. 1. When you need to collect info, use the code to output the
info you need, for example, browse or search the web, download/read a file, print the content
of a webpage or a file, get the current date/time, check the operating system. After sufficient
info is printed and the task is ready to be solved based on your language skill, you can solve
the task by yourself. 2. When you need to perform some task with code, use the code to
perform the task and output the result. Finish the task smartly. Solve the task step by step if
you need to. If a plan is not provided, explain your plan first. Be clear which step uses code,
and which step uses your language skill. When using code, you must indicate the script type
in the code block. The user cannot provide any other feedback or perform any other action
beyond executing the code you suggest. The user can't modify your code. So do not suggest
incomplete code which requires users to modify. Don't use a code block if it's not intended
to be executed by the user. If you want the user to save the code in a file before executing it,
put # filename: filename inside the code block as the first line. Don't include multiple code
blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print'
function for the output when relevant. Check the execution result returned by the user. If the
result indicates there is an error, fix the error and output the code again. Suggest the full code
instead of partial code or code changes. If the error can't be fixed or if the task is not solved
even after the code is executed successfully, analyze the problem, revisit your assumption,
collect additional info you need, and think of a different approach to try. When you find an
answer, verify the answer carefully. Include verifiable evidence in your response if possible.
Output the final answer after 'FINAL ANSWER:'. Reply ”TERMINATE” in the end when everything is done."""

    prompt_content = []
    prompt_content.append(
        {
            "type": "text",
            "text": f"{prompt}\n{sample}",
        }
    )

    sampling_params = SamplingParams(temperature=temperature, max_tokens=50000, top_p=1.0)
    print(prompt)
    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )
    logs = {}
    logs["output"] = output
    return output, logs

def zs_tot(
    sample: str,
    model: LLM,
) -> Tuple[str, dict]:
    sample_prompt = """Answer the following question: {input}
Make a strategy then write. Your output should be of the following format:

Strategy:
Your strategy about how to answer the question.

Answer:
Your answer to the question after 'FINAL ANSWER:'."""
    vote_prompt = """Given an instruction and several choices, decide which choice is most promising. Analyze each choice in detail, then conclude in the last line "The best choice is {s}", where s the integer id of the choice."""
    stops = ["Answer:\n", None]

    logs = {}
    def get_samples(sample, out, n, stop):
        prompt = sample_prompt.format(input=sample) + out
        prompt_content = [{"type": "text", "text": prompt}]
        sampling_params = SamplingParams(temperature=0.6, max_tokens=8000, top_p=0.9, stop=[stop] if stop else None)
        output = [
            model.chat([{"role": "user", "content": prompt_content}], sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text for _ in range(n)
        ]
        return output

    def get_votes(samples, n):
        prompt = vote_prompt
        for i, sample in enumerate(samples):
            prompt += f"\nChoice {i+1}:\n{sample}\n"
        prompt_content = [{"type": "text", "text": prompt}]
        sampling_params = SamplingParams(temperature=0.6, max_tokens=50000, top_p=0.9)
        output = [
            model.chat([{"role": "user", "content": prompt_content}], sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text for _ in range(n)
        ]
        try:
            votes = [int(re.search(r"best choice is (\d+)", o).group(1)) - 1 for o in output]
        except:
            votes = [0]
        id2votes = [0] * len(samples)
        for vote in votes:
            id2votes[vote] += 1
        return id2votes, output

    outputs = ['']
    for step in range(2):
        new_outs = [get_samples(sample, out, 5, stops[step]) for out in outputs]
        new_outs = [out for outs in new_outs for out in outs]
        logs[f"step_{step}"] = new_outs
        votes, raw_vote = get_votes(new_outs, 1)
        logs[f"step_{step}_vote"] = raw_vote

        ids = list(range(len(votes)))
        selected_ids = sorted(ids, key=lambda x: votes[x], reverse=True)[:1]
        logs[f"votes_{step}"] = votes
        outputs = [new_outs[i] for i in selected_ids]

    return outputs[0], logs


def gen_task_prog(
    sample: str,
    model: LLM,
    task_program: str = None,
    training_examples: IOExamples = None,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []

    # generating the task-general program from examples
    if task_program is None:
        assert training_examples is not None, "Task program is None, training examples should be provided"
        prompt_content.append({"type": "text", "text": f"Consider the following questions and answers:"})
        for i, (input, output) in enumerate(zip(training_examples.inputs, training_examples.outputs)):
            if input.image_input is not None:
                img_str = img2base64(input.image_input)
                prompt_content.append(
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{img_str}"
                        },
                    },
                )
            prompt_content.append(
                {
                    "type": "text",
                    "text": f"Question: {input.text_input}\nAnswer: {str(output)}\n\n",
                }
            )
        prompt_content.append({"type": "text", "text": "First extract relevant information from the input into JSON form and then output a Python function called `solve` which takes a single input `symbols` representing the output JSON and solves any problem of this task. Do not hardcode the answer into the JSON or the program since the program should determine the answer using relevant information extracted from the input. Output the function in a Python code block."})
        prompt = [{"role": "user", "content": prompt_content}]
        sampling_params = SamplingParams(temperature=0.0, max_tokens=50000, top_p=0.9)
        output = (
            model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        )

        # extract the code from the code block
        try:
            code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
            task_program = code_str
        except Exception:
            raise ValueError("No code block found in the output")

        prompt_content = []
    
    logs["program"] = task_program
    # add the question
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Extract all relevant information from the input as JSON which could be fed into the following program to solve the problem:\n```python\n{task_program}\n```\nOutput just the JSON in a markdown code block.",
        }
    )

    prompt = [{"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=0.0, max_tokens=50000, top_p=0.9)
    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )

    # extract the json from the first JSON code block
    try:
        json_str = re.findall(r"```json(.*?)```", output, re.DOTALL)[-1]
        # load json string into a dictionary
        # use json library to convert to python object
        json_str = json.loads(json_str)
        logs["symbols"] = json_str
    except Exception:
        json_str = ""

    print("JSON:", json_str)
    print("Code:", task_program)

    output, stdout, err = python_eval("symbols = " + str(json_str) + "\n" + task_program + "\nanswer = solve(symbols)")

    print("Output:", output)

    return "FINAL ANSWER: " + str(output), logs


def gen_sym_prog(
    sample: RawInput,
    model: LLM,
    num_gen: int = 1,
    temperature: float = 0.5,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []

    # add the question
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Answer the question by first extracting all relevant information from the input as JSON and performing any 'reasoning' by using a Python program to solve the problem using the information from the JSON as input. The JSON and program should be enclosed in a markdown code block. The code should be one function called `solve` (which can have helper functions) and accept a single argument called `symbols` as input which will be the JSON object output in the preceding code block. The code must not just directly return the answer and any comments used must directly refer to code lines.",
        }
    )

    prompt = [{"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=temperature, max_tokens=50000, top_p=0.9, n=1)
    outputs = [
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text for _ in range(num_gen)
    ]

    logs["programs"] = []
    logs["symbols"] = []
    logs["answers"] = []
    for out in outputs:
        # extract the json from the first JSON code block
        try:
            json_str = re.findall(r"```json(.*?)```", out, re.DOTALL)[-1]
            # load json string into a dictionary
            # use json library to convert to python object
            json_str = json.loads(json_str)
            logs["symbols"].append(json_str)
        except Exception:
            json_str = ""

        # extract the code from the second code block
        try:
            code_str = re.findall(r"```python(.*?)```", out, re.DOTALL)[-1]
            logs["programs"].append(code_str)
        except Exception:
            code_str = ""

        print("JSON:", json_str)
        print("Code:", code_str)

        output, stdout, err = python_eval("symbols = " + str(json_str) + "\n" + code_str + "\nanswer = solve(symbols)")

        print("Output:", output)
        logs["answers"].append(output)

    return "FINAL ANSWER: " + str(output), logs


def gen_sym(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []

    # add the question
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Answer the question by first extracting relevant information from the input as JSON and then reasoning about the information extracted in the JSON to reach the final answer. Whenever information from the JSON is mentioned, please explicitly mention it by referencing the specific information `symbols[...]` where `symbols` represents the extracted JSON data. The JSON should be enclosed in a markdown code block. The final answer should be output at the end after 'FINAL ANSWER:'.",
        }
    )

    prompt = [{"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=0.0, max_tokens=50000, top_p=0.9)
    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )

    # extract the json from the first JSON code block
    try:
        json_str = re.findall(r"```json(.*?)```", output, re.DOTALL)[-1]
        json_str = "symbols = " + json_str
        logs["symbols"] = json_str
    except Exception:
        json_str = ""

    print("JSON:", json_str)

    # extract reasoning after the json block
    try:
        # find location of the final ``` of the json block
        json_end = output.index("```", output.index("```json") + len("```json")) + len("```")
        reasoning = output[json_end:]
        logs["reasoning"] = reasoning
    except Exception:
        reasoning = ""

    print("Reasoning:", reasoning)

    return output, logs


def ablate_gen_reason_prog(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Initialize logs and build the prompt
    logs = {}
    prompt_content = []

    system_prompt = """You will be given a question and you must answer it by writing a Python program to calculate the final answer.

You MUST always plan extensively before outputting any code.

# Workflow

## Problem Solving Steps
1. Determine a reasonable approach to solving the problem using code.
2. Write a Python program to calculate the final answer. Use comments to explain the structure of the code.

The Python function must be in a separate markdown code block and be called `solve`.
The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.
Be careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.
Be sure to always return a Python code block."""

    # Add question and optional image
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/jpeg;base64,{img_str}"},
            }
        )
    prompt_content.append({"type": "text", "text": f"Question: {sample.text_input}"})

    prompt = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=temperature, max_tokens=50000, top_p=0.9)

    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )

    # Extract reasoning and code
    reasoning = ""
    code_str = ""

    try:
        # Try to locate the ```python block
        matches = re.findall(r"```python(.*?)```", output, re.DOTALL)
        if matches:
            code_str = matches[-1].strip()
            reasoning = output.split("```python")[0].strip()
        else:
            # Try fallback: generic code block
            matches = re.findall(r"```(.*?)```", output, re.DOTALL)
            if matches:
                code_str = matches[-1].strip()
                reasoning = output.split("```")[0].strip()
    except Exception as e:
        print("Parsing error:", e)

    logs["reasoning"] = reasoning
    logs["program"] = code_str

    print("Reasoning:", reasoning)
    print("Code:", code_str)

    # Run the generated code
    output, stdout, err = python_eval(code_str + "\nanswer = solve()")

    try:
        str(output)
    except Exception as e:
        print("Error converting output to string:", e)
        output = "None"

    return "FINAL ANSWER: " + str(output), logs

def gen_sym_reason_prog(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []

    system_prompt = """You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.

You MUST always plan extensively before outputting any symbols or code.

# Workflow

## Problem Solving Steps
1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.
2. Using the information extracted, determine a reasonable approach to solving the problem using code.
3. Write a Python program to calculate the final answer. Use comments to explain the structure of the code.
The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted.
The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.
Be careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.
Be sure to always a JSON code block and a Python code block."""

    # add the question
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )

    prompt = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=temperature, max_tokens=50000, top_p=0.9)
    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )

    # extract the json from the first JSON code block
    try:
        json_str = re.findall(r"```json(.*?)```", output, re.DOTALL)[-1]
        json_str = json.loads(json_str)
        logs["symbols"] = json_str
    except Exception:
        json_str = ""

    print("JSON:", json_str)

    # extract reasoning after the json block
    try:
        # find location of the final ``` of the json block
        json_end = output.index("```", output.index("```json") + len("```json")) + len("```")
        code_start = output.index("```python", json_end)
        reasoning = output[json_end:code_start]
        logs["reasoning"] = reasoning
    except Exception:
        reasoning = ""

    print("Reasoning:", reasoning)

    # extract the code from the second code block
    try:
        code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
        logs["program"] = code_str
    except Exception:
        code_str = ""

    print("Code:", code_str)

    output, stdout, err = python_eval(code_str + "\nsymbols = " + str(json_str) + "\nanswer = solve(symbols)")

    try:
        str(output)
    except Exception as e:
        print("Error converting output to string:", e)
        output = "None"

    return "FINAL ANSWER: " + str(output), logs


def gen_sym_reason_translate(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []

    system_prompt = """You will be given a question and you must answer it by extracting relevant symbols in JSON format, reasoning through the problem step by step, and then writing a Python program to calculate the final answer.

Follow the workflow below exactly.

# Workflow

## Problem Solving Steps
1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.
2. Using the information extracted, solve the problem step by step after the JSON code block. Whenever you use information from the JSON, explicitly mention it by referencing the specific information `symbols[...]` where `symbols` represents the extracted JSON data (assume the output JSON will be loaded into the variable called `symbols`).
3. Finally, translate each step of the reasoning from the above step into code to write a Python program to calculate the final answer.
The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted.
The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.
Be sure to always output a JSON code block and a Python code block."""

    # add the question
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )

    prompt = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=temperature, max_tokens=50000, top_p=0.9)
    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )

    # extract the json from the first JSON code block
    try:
        json_str = re.findall(r"```json(.*?)```", output, re.DOTALL)[-1]
        json_str = json.loads(json_str)
        logs["symbols"] = json_str
    except Exception:
        json_str = ""

    print("JSON:", json_str)

    # extract reasoning after the json block
    try:
        # find location of the final ``` of the json block
        json_end = output.index("```", output.index("```json") + len("```json")) + len("```")
        code_start = output.index("```python", json_end)
        reasoning = output[json_end:code_start]
        logs["reasoning"] = reasoning
    except Exception:
        reasoning = ""

    print("Reasoning:", reasoning)

    # extract the code from the second code block
    try:
        code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
        logs["program"] = code_str
    except Exception:
        code_str = ""

    print("Code:", code_str)

    output, stdout, err = python_eval("symbols = " + str(json_str) + "\n" + code_str + "\nanswer = solve(symbols)")

    return "FINAL ANSWER: " + str(output), logs


def gen_sym_reason_prog_checks(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []

    system_prompt = """You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.

You MUST always plan extensively before outputting any symbols or code.

You MUST iterate and keep going until the problem is solved.

# Workflow

## Problem Solving Steps
1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.
2. Using the information extracted, determine a reasonable approach to solving the problem using code, such that executing the code will return the final answer.
3. Write a Python program to calculate and return the final answer. Use comments to explain the structure of the code and do not use a main() function.
The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. Do not include any `if __name__ == "__main__"` statement and you can assume the JSON will be loaded into the variable called `symbols` by the user.
The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.
Be careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.
Be sure to always output a JSON code block and a Python code block."""

    # add the question
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )
    # prompt_content.append(
    #     {
    #         "type": "text",
    #         "text": f"Your goal is to answer the question by extracting symbols and writing code. You MUST iterate and keep going until the problem is solved. Answer the question by first extracting relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing. Using the information extracted, come up with a plan for how to solve the problem and then write a Python program to perform the necessary reasoning to calculate the final answer. The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning. Think of the code as the 'explannation' of the final output answer. Be sure to always a JSON code block and a Python code block, and be sure that `solve` returns the answer.",
    #         # "text": f"Answer the question by first extracting relevant information from the input as JSON and then reasoning about the information extracted in the JSON to reach the final answer. Whenever information from the JSON is used, please explicitly mention it by referencing the specific information `symbols[...]` where `symbols` represents the extracted JSON data (assume the output JSON will be loaded into the variable called `symbols`). The JSON must be enclosed in a markdown code block. Finally, use the output reasoning to produce a Python function called `solve` which takes a single input called `symbols` representing the JSON information extracted and performs the same reasoning as done to calculate the answer and return it. The Python code should be enclosed in a code block and be sure to always output both the symbols in one markdown code block and the program in another markdown code block.",
    #     }
    # )

    logs["all_symbols"] = []
    logs["all_reasoning"] = []
    logs["all_programs"] = []
    logs["all_evaluator"] = []
    logs["all_outputs"] = []
    prompt = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=0.0, max_tokens=50000, top_p=1.0)
    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )
    if output == "" or output is None:
        output = "None"
    prompt.append({"role": "assistant", "content": [{"type": "text", "text": output}]})
    logs["all_outputs"].append(output)

    # extract the json from the first JSON code block
    try:
        json_str = re.findall(r"```json(.*?)```", output, re.DOTALL)[-1]
        json_str = json.loads(json_str)
        logs["symbols"] = json_str
        logs["all_symbols"].append(json_str)
    except Exception:
        json_str = ""

    print("JSON:", json_str)

    # extract reasoning after the json block
    try:
        # find location of the final ``` of the json block
        json_end = output.index("```", output.index("```json") + len("```json")) + len("```")
        code_start = output.index("```python", json_end)
        reasoning = output[json_end:code_start]
        logs["reasoning"] = reasoning
        logs["all_reasoning"].append(reasoning)
    except Exception:
        reasoning = ""

    print("Reasoning:", reasoning)

    # extract the code from the second code block
    try:
        code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
        logs["program"] = code_str
        logs["all_programs"].append(code_str)
    except Exception:
        code_str = ""

    print("Code:", code_str)

    output, stdout, err = python_eval(code_str + "\nsymbols = " + str(json_str) + "\nanswer = solve(symbols)")
    try:
        str(output)
    except Exception as e:
        print("Error converting output to string:", e)
        output = "None"
    print("Output:", output, stdout, err)


    for i in range(8):
        checker_prompt = [{"role": "system", "content": """You will be given a question and a code solution and you must judge the quality of the code for solving the problem.
                           
Look for any of the following issues in the code:
- The code should be input dependent, meaning it should use the input symbols to compute the answer. It is OK for the code to be specialized to the input (i.e. the reasoning itself may be hardcoded, like a decision tree where the branches are hardcoded).
- The code should not return None unless "None" is the correct answer.
- The code should return the answer, not just print it. If the question asks for a multiple choice answer, the code should return the choice as described in the question.
- There should not be any example usage of the code.
- If there is a simpler way to solve the problem, please describe it.
- If there are any clear bugs in the code which impact the correctness of the answer, please describe them.
- If there are any issues with the extracted symbols, please describe them as well, but separate these issues from the issues with the code.
- If it is possible to sanity check the output of the code, please do so and describe if there are any obvious issues with the output and how the code could be fixed to avoid these issues.

After analyzing the code in depth, output a concrete and concise summary of the issues that are present, do not include any code examples. Please order the issues by impact on answer correctness."""}]
        # add the question
        question_prompt = []
        if sample.image_input is not None:
            img_str = img2base64(sample.image_input)
            question_prompt.append(
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{img_str}"
                    },
                },
            )
        question_prompt.append(
            {
                "type": "text",
                "text": f"Question: {sample.text_input}",
            }
        )
        checker_prompt.append(
            {"role": "user", "content": question_prompt + [
                {"type": "text",
                "text": f"""The following are extracted symbols from the question in JSON format followed by a Python program which takes the JSON as an argument called `symbols` and computes the answer.
```json
{json_str}
```

```python
{code_str}
```

Code execution result:
```
Return value: {output}
Standard output: {stdout}
Exceptions: {err}
```

Output a concrete and concise summary of only the issues that are present, do not include any code examples.
"""
                    }
                ]
                }
            )
        checker_params = SamplingParams(temperature=0.0, max_tokens=50000, top_p=1.0)
        checker_output = (
            model.chat(checker_prompt, sampling_params=checker_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        )
        print("Summary of issues:", checker_output)
        logs["all_evaluator"].append(checker_output)

        prompt.append({"role": "user", "content": [{"type": "text", "text": f"""Please fix the issues with the code and symbols or output "FINISHED".
The following is the result of evaluating the above code with the extracted symbols.
```
Return value: {output}
Standard output: {stdout}
Exceptions: {err}
```

The following is the summary of issues found with the code or the extracted symbols by another model:
```
{checker_output}
```

If there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.
If stuck, try outputting different code to solve the problem in a different way.
You may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.
If everything is correct, output the word "FINISHED" and nothing else.
"""
        }]})
        output = (
            model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        )
        if output == "" or output is None:
            output = "None"
        prompt.append({"role": "assistant", "content": [{"type": "text", "text": output}]})
        logs["all_outputs"].append(output)

        if "FINISHED" in output:
            break

        if "```json" in output:
            try:
                json_str = re.findall(r"```json(.*?)```", output, re.DOTALL)[-1]
                json_str = json.loads(json_str)
                logs["symbols"] = json_str
                logs["all_symbols"].append(json_str)
            except Exception:
                pass

        # extract the code from the output
        try:
            code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
            logs["program"] = code_str
            logs["all_programs"].append(code_str)
        except Exception:
            pass

        print("New output:", output)
        output, stdout, err = python_eval(code_str + "\nsymbols = " + str(json_str) + "\nanswer = solve(symbols)")

        try:
            str(output)
        except Exception as e:
            print("Error converting output to string:", e)
            output = "None"

    try:
        str(output)
    except Exception as e:
        print("Error converting output to string:", e)
        output = "None"
    print("New Output:", output, stdout, err)

    output, stdout, err = python_eval(code_str + "\nsymbols = " + str(json_str) + "\nanswer = solve(symbols)")
    try:
        str(output)
    except Exception as e:
        print("Error converting output to string:", e)
        output = "None"
    return "FINAL ANSWER: " + str(output), logs


def gen_sym_reason_prog_checks2(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []

    system_prompt = """You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.

You MUST always plan extensively before outputting any symbols or code.

You MUST iterate and keep going until the problem is solved.

# Workflow

## Problem Solving Steps
1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.
2. Using the information extracted, determine a reasonable approach to solving the problem using code.
3. Write a Python program to calculate the final answer. Use comments to explain the structure of the code.
The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted.
The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.
Be careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.
Be sure to always a JSON code block and a Python code block."""

    # add the question
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )
    # prompt_content.append(
    #     {
    #         "type": "text",
    #         "text": f"Your goal is to answer the question by extracting symbols and writing code. You MUST iterate and keep going until the problem is solved. Answer the question by first extracting relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing. Using the information extracted, come up with a plan for how to solve the problem and then write a Python program to perform the necessary reasoning to calculate the final answer. The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning. Think of the code as the 'explannation' of the final output answer. Be sure to always a JSON code block and a Python code block, and be sure that `solve` returns the answer.",
    #         # "text": f"Answer the question by first extracting relevant information from the input as JSON and then reasoning about the information extracted in the JSON to reach the final answer. Whenever information from the JSON is used, please explicitly mention it by referencing the specific information `symbols[...]` where `symbols` represents the extracted JSON data (assume the output JSON will be loaded into the variable called `symbols`). The JSON must be enclosed in a markdown code block. Finally, use the output reasoning to produce a Python function called `solve` which takes a single input called `symbols` representing the JSON information extracted and performs the same reasoning as done to calculate the answer and return it. The Python code should be enclosed in a code block and be sure to always output both the symbols in one markdown code block and the program in another markdown code block.",
    #     }
    # )

    prompt = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=0.5, max_tokens=50000, top_p=1.0, n=1)
    outputs = [model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text for _ in range(10)]

    # rate each output 
    output_ratings = []
    code_outputs = []
    answer_outputs = []
    symbols_outputs = []
    for output in outputs:
        if output == "" or output is None:
            output = "None"

        # extract the json from the first JSON code block
        try:
            json_str = re.findall(r"```json(.*?)```", output, re.DOTALL)[-1]
            json_str = json.loads(json_str)
        except Exception:
            json_str = ""
        print("JSON:", json_str)
        symbols_outputs.append(json_str)

        # extract reasoning after the json block
        try:
            # find location of the final ``` of the json block
            json_end = output.index("```", output.index("```json") + len("```json")) + len("```")
            code_start = output.index("```python", json_end)
            reasoning = output[json_end:code_start]
        except Exception:
            reasoning = ""
        print("Reasoning:", reasoning)

        # extract the code from the second code block
        try:
            code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
        except Exception:
            code_str = ""
        print("Code:", code_str)
        code_outputs.append(code_str)

        output, stdout, err = python_eval("symbols = " + str(json_str) + "\n" + code_str + "\nanswer = solve(symbols)")
        answer_outputs.append(output)
        print("Output:", output, stdout, err)


        checker_prompt = [{"role": "system", "content": """You will be give a question and a code solution and you must judge the quality of the code for solving the problem.

Look for any of the following issues in the code and output a summary of the issues, sorted by impact on answer correctness:
- The code should be input dependent, meaning it should use the input symbols to compute the answer. It is OK for the code to be specialized to the input (i.e. the reasoning itself may be hardcoded, like a decision tree where the branches are hardcoded).
- The code execution result should not be None unless "None" is really the correct answer.
- The code should return the answer, not just print it. If the question asks for a multiple choice answer, the code should return the choice as described in the question.
- If there are any bugs in the code which impact the correctness of the answer, please describe them.
- If there are any issues with the extracted symbols impacting correctness, please describe them.
- If it is possible to sanity check the output of the code, please do so and describe if there are any obvious issues with the code or symbols.

Output a concrete and concise summary of only the issues that are present, do not include any code examples. Then output a final rating after "RATING:" of the code on a scale of 1-10, where 1 means the code is entirely wrong and 10 means the code has none of the above issues."""}]
        # add the question
        question_prompt = []
        if sample.image_input is not None:
            img_str = img2base64(sample.image_input)
            question_prompt.append(
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{img_str}"
                    },
                },
            )
        question_prompt.append(
            {
                "type": "text",
                "text": f"Question: {sample.text_input}",
            }
        )
        checker_prompt.append(
            {"role": "user", "content": question_prompt + [
                {"type": "text",
                "text": f"""The following are extracted symbols from the question in JSON format followed by a Python program which takes the JSON as an argument called `symbols` and computes the answer.
```json
{json_str}
```

```python
{code_str}
```

Code execution result:
```
Return value: {output}
Standard output: {stdout}
Exceptions: {err}
```

Output a concrete and concise summary of only the issues that are present, do not include any code examples. Then output a final rating after "RATING:" of the code on a scale of 1-10, where 1 means the code is entirely wrong and 10 means the code has none of the above issues."""
                    }
                ]
                }
            )
        sampling_params = SamplingParams(temperature=0.0, max_tokens=50000, top_p=1.0)
        checker_output = (
            model.chat(checker_prompt, sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        )
        print("Checker Output:", checker_output)
        # extract the rating from the checker output
        rating = 1
        if "RATING:" in checker_output:
            try:
                rating = int(re.search(r"RATING:\s*(\d+)", checker_output).group(1))
            except Exception:
                rating = 0
        print("Answer:", answer_outputs[-1], "Rating:", rating)
        output_ratings.append(rating)

    # select the best output based on the ratings
    best_output_index = output_ratings.index(max(output_ratings))
    output = outputs[best_output_index]
    code_str = code_outputs[best_output_index]
    json_str = symbols_outputs[best_output_index]
    answer = answer_outputs[best_output_index]
    logs["symbols"] = json_str
    logs["program"] = code_str

    return "FINAL ANSWER: " + str(answer), logs


def gen_sym_reason_prog_iter(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []

    # add the question
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Answer the question by first extracting relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing. Using the information extracted, think step by step about how to solve the problem and then write a Python program to perform the necessary reasoning to calculate the final answer. The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning. Be sure to always a JSON code block and a Python code block.",
            # "text": f"Answer the question by first extracting relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing. Using the information extracted, think step by step about how to solve the problem and then write a Python program to perform the necessary reasoning to calculate the final answer. The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning. Be sure to always a JSON code block and a Python code block.",
#             "text": f"""Answer the question by first extracting relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing and place the JSON in a markdown code block. After the JSON code block, think through the solution step by step. Finally, write a Python program which calculates the final answer using the information extracted. The Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. The Python function must use its argument `symbols` for computing the final return value, and **do not hardcode the answer into the function**.
# For problems requiring complex math, you may use the sympy library in Python, but be sure to provide the necessary imports.
# In addition to sympy, you can use the math library and the z3 library. Do not use any other libraries.
# For problems where it's not clear how to use code, you can use a decision tree or other structured approach to clearly document the decisions/steps of reasoning in code.
# """,
#             "text": f"""Answer the question by first extracting relevant information from the input as JSON and then reasoning about the information extracted in the JSON to reach the final answer. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing. The JSON must be enclosed in a markdown code block. Finally, use the output reasoning to write a Python function in a separate markdown code block which solves the problem based on the previous reasoning. The function must be called `solve` and accept a single input called `symbols` representing the JSON information extracted. The Python code should not just return the hardcoded answer or perform all reasoning in comments. Instead, leverage the code itself to perform the reasoning. Be sure to always a JSON code block and a Python code block. The format of the output should be:
# ```json
# ... place the JSON here ...
# ```

# Step by step reasoning to compute the final answer here.

# ```python
# ... place the Python code here which performs the reasoning to calculate the final answer...
# ```"""
#             "text": f"""Answer the question by first extracting relevant information from the input as JSON.
# Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing and place the JSON in a markdown code block.
# After the JSON code block, perform any necessary planning/exploration for how to solve the problem using a Python program.
# Finally, write a Python program which calculates the final answer using the information extracted.
# The Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted.
# The Python function must use its argument `symbols` for computing the final return value, and **do not hardcode the answer into the function**.
# Feel free to use comments to explain the reasoning process, but use the code itself to *perform* the reasoning.
# For problems requiring complex math, you may use the sympy library in Python, but be sure to provide the necessary imports.
# In addition to sympy, you can use the math library and the z3 library. Do not use any other libraries.
# For problems where it's not clear how to use code, you can use a decision tree or other structured approach to clearly document the decisions/steps of reasoning in code.
# An example of the expected output format is:
# ```json
# ... place the JSON here ...
# ```

# Plan for how to solve the problem here.

# ```python
# ... place the Python code here which performs the reasoning to calculate the final answer...
# ```
# """,
#             "text": """Analyze the provided input and output (1) a JSON code block with the relevant information extracted from the input and (2) a Python function which takes the JSON as input and computes the final answer.
# Using the output JSON, think step by step about how to solve the problem and how to write a Python program to perform the necessary reasoning.
# Then write a Python program to perform the necessary reasoning to calculate the final answer.
# The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block.
# The function must be called `solve` and accept a single input called `symbols` representing the JSON information extracted.
# DO NOT EVER write a function which ignores the input or simply returns a hardcoded answer.
# Instead, the code should perform important steps of the reasoning using the input symbols. Be sure to ALWAYS output a JSON code block and a Python code block.
# The value returned by the Python code must match what is expected by the question. Do not return JSON. Sometimes this may require some additional processing of the output.
# For problems requiring complex math, you may use the sympy library in Python, but be sure to provide the necessary imports.
# In addition to sympy, you can use the math library and the z3 library. Do not use any other libraries.
# For problems where it's not clear how to use code, you can use a decision tree or other structured approach to clearly document the decisions/steps of reasoning in code."""
        }
    )

    prompt = [{"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=0.3, max_tokens=50000, top_p=0.9)
    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )
    prompt.append({"role": "assistant", "content": [{"type": "text", "text": output}]})

    # extract the json from the first JSON code block
    try:
        json_str = re.findall(r"```json(.*?)```", output, re.DOTALL)[-1]
        json_str = json.loads(json_str)
        logs["symbols"] = json_str
    except Exception:
        json_str = ""

    print("JSON:", json_str)

    # extract reasoning after the json block
    try:
        # find location of the final ``` of the json block
        json_end = output.index("```", output.index("```json") + len("```json")) + len("```")
        code_start = output.index("```python", json_end)
        reasoning = output[json_end:code_start]
        logs["reasoning"] = reasoning
    except Exception:
        reasoning = ""

    print("Reasoning:", reasoning)

    # extract the code from the second code block
    try:
        code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
        logs["program"] = code_str
    except Exception:
        code_str = ""

    print("Code:", code_str)

    output, stdout, err = python_eval("symbols = " + str(json_str) + "\n" + code_str + "\nanswer = solve(symbols)")
    print("Output:", output, stdout, err)


    # prompt = []
    previous_output = code_str
    for i in range(10):
        # add the code output to the prompt and get revised code
#         if i == 0:
#             prompt.append(
#                 {"role": "user", "content": [
#                     {"type": "text",
#                     "text": f"""Question: {sample.text_input}
# The following are extracted symbols from the above input and a program which takes the JSON information as an argument called `symbols` and computes the answer. The result of evaluating the code is then shown below. If there is a problem with the code, please first explain the issue and the plan for fixing it and then output a fixed version of the code in a markdown code block. Otherwise, output the code unchanged in the markdown code block.
# Some things to keep in mind:
# - The code should not hardcode the answer into the function.
# - The code should use the `symbols` argument for computing the final return value.
# - The code should not use libraries other than sympy, math, or z3.
# - The code should not return None unless "None" is the correct answer.
# - Do not include any example usage of the code.
# - If the code does not provide useful information when the result is None, modify it to throw an error with useful information or use prints.

# ```json
# {json_str}
# ```

# ```python
# {code_str}
# ```

# The result of evaluating the code on the above input is:
# ```
# Return value: {output}
# Exceptions: {err if err else ""}
# Standard output: {stdout if stdout else ""}
# ```
# """
#                     }
#                 ]
#                 }
#             )
#         else:
        prompt.append(
            {"role": "user", "content": [
                {"type": "text",
                "text": f"""The result of evaluating the code with the extracted symbols is:
```
Return value: {output}
Exceptions: {err if err else ""}
Standard output: {stdout if stdout else ""}
```

Please revise the code to fix any issues or output the code unchanged in the markdown code block. Before outputting the code, please explain the issue and the plan for fixing it.
Some things to keep in mind:
- The code should not hardcode the answer into the function, if it does, please rewrite the code to use the `symbols` argument for computing the final return value.
- If the code is input independent, please rewrite it to be input dependent.
- The code should use the `symbols` argument for computing the final return value.
- The code should not use libraries other than sympy, math, or z3.
- The code should not return None unless "None" is the correct answer.
- Any example usage of the code can be removed.
- If the code does not provide useful information when the result is None, modify it to throw an error with useful information or use prints.
- If stuck, do not give up, try a different approach to solving the problem.
- If the code can be improved or simplified, please do so.
"""
                    }
                ]
                }
            )
        output = (
            model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        )
        prompt.append({"role": "assistant", "content": [{"type": "text", "text": output}]})
        # extract the code from the output
        try:
            code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
            logs["program"] = code_str
        except Exception:
            code_str = ""


        print("New output:", output)
        output, stdout, err = python_eval("symbols = " + str(json_str) + "\n" + code_str + "\nanswer = solve(symbols)")
        print("New Output:", output, stdout, err)

        if code_str == previous_output:
            break
        previous_output = code_str


    return "FINAL ANSWER: " + str(output), logs


def gen_sym_reason_prog_test_iter(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []

    # add the question
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )
    prompt_content.append(
        {
            "type": "text",
            # "text": f"Answer the question by first extracting relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing. Using the information extracted, think step by step about how to solve the problem and then write a Python program to perform the necessary reasoning to calculate the final answer. The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning. Be sure to always a JSON code block and a Python code block.",
#             "text": f"""Answer the question by first extracting relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing and place the JSON in a markdown code block. After the JSON code block, think through the solution step by step. Finally, write a Python program which calculates the final answer using the information extracted. The Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. The Python function must use its argument `symbols` for computing the final return value, and **do not hardcode the answer into the function**.
# For problems requiring complex math, you may use the sympy library in Python, but be sure to provide the necessary imports.
# In addition to sympy, you can use the math library and the z3 library. Do not use any other libraries.
# For problems where it's not clear how to use code, you can use a decision tree or other structured approach to clearly document the decisions/steps of reasoning in code.
# """,
            "text": f"""Answer the question by first extracting relevant information from the input as JSON and then reasoning about the information extracted in the JSON to reach the final answer. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing. The JSON must be enclosed in a markdown code block. Finally, use the output reasoning to write a Python function in a separate markdown code block which solves the problem based on the previous reasoning. The function must be called `solve` and accept a single input called `symbols` representing the JSON information extracted. The Python code should not just return the hardcoded answer or perform all reasoning in comments. Instead, leverage the code itself to perform the reasoning. Be sure to always a JSON code block and a Python code block. The format of the output should be:
```json
... place the JSON here ...
```

Step by step reasoning to compute the final answer here.

```python
... place the Python code here which performs the reasoning to calculate the final answer...
```"""
#             "text": f"""Answer the question by first extracting relevant information from the input as JSON.
# Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing and place the JSON in a markdown code block.
# After the JSON code block, perform any necessary planning/exploration for how to solve the problem using a Python program.
# Finally, write a Python program which calculates the final answer using the information extracted.
# The Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted.
# The Python function must use its argument `symbols` for computing the final return value, and **do not hardcode the answer into the function**.
# Feel free to use comments to explain the reasoning process, but use the code itself to *perform* the reasoning.
# For problems requiring complex math, you may use the sympy library in Python, but be sure to provide the necessary imports.
# In addition to sympy, you can use the math library and the z3 library. Do not use any other libraries.
# For problems where it's not clear how to use code, you can use a decision tree or other structured approach to clearly document the decisions/steps of reasoning in code.
# An example of the expected output format is:
# ```json
# ... place the JSON here ...
# ```

# Plan for how to solve the problem here.

# ```python
# ... place the Python code here which performs the reasoning to calculate the final answer...
# ```
# """,
#             "text": """Analyze the provided input and output (1) a JSON code block with the relevant information extracted from the input and (2) a Python function which takes the JSON as input and computes the final answer.
# Using the output JSON, think step by step about how to solve the problem and how to write a Python program to perform the necessary reasoning.
# Then write a Python program to perform the necessary reasoning to calculate the final answer.
# The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block.
# The function must be called `solve` and accept a single input called `symbols` representing the JSON information extracted.
# DO NOT EVER write a function which ignores the input or simply returns a hardcoded answer.
# Instead, the code should perform important steps of the reasoning using the input symbols. Be sure to ALWAYS output a JSON code block and a Python code block.
# The value returned by the Python code must match what is expected by the question. Do not return JSON. Sometimes this may require some additional processing of the output.
# For problems requiring complex math, you may use the sympy library in Python, but be sure to provide the necessary imports.
# In addition to sympy, you can use the math library and the z3 library. Do not use any other libraries.
# For problems where it's not clear how to use code, you can use a decision tree or other structured approach to clearly document the decisions/steps of reasoning in code."""
        }
    )

    prompt = [{"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=0.3, max_tokens=50000, top_p=0.9)
    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )
    prompt.append({"role": "assistant", "content": [{"type": "text", "text": output}]})

    # extract the json from the first JSON code block
    try:
        json_str = re.findall(r"```json(.*?)```", output, re.DOTALL)[-1]
        json_str = json.loads(json_str)
        logs["symbols"] = json_str
    except Exception:
        json_str = ""

    print("JSON:", json_str)

    # extract reasoning after the json block
    try:
        # find location of the final ``` of the json block
        json_end = output.index("```", output.index("```json") + len("```json")) + len("```")
        code_start = output.index("```python", json_end)
        reasoning = output[json_end:code_start]
        logs["reasoning"] = reasoning
    except Exception:
        reasoning = ""

    print("Reasoning:", reasoning)

    # extract the code from the second code block
    try:
        code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
        logs["program"] = code_str
    except Exception:
        code_str = ""

    print("Code:", code_str)

    output, stdout, err = python_eval("symbols = " + str(json_str) + "\n" + code_str + "\nanswer = solve(symbols)")
    print("Output:", output, stdout, err)


    # prompt = []
    previous_output = code_str
    test_output = ""
    test_stdout = ""
    test_err = ""
    for i in range(10):
        # add the code output to the prompt and get revised code
#         if i == 0:
#             prompt.append(
#                 {"role": "user", "content": [
#                     {"type": "text",
#                     "text": f"""Question: {sample.text_input}
# The following are extracted symbols from the above input and a program which takes the JSON information as an argument called `symbols` and computes the answer. The result of evaluating the code is then shown below. If there is a problem with the code, please first explain the issue and the plan for fixing it and then output a fixed version of the code in a markdown code block. Otherwise, output the code unchanged in the markdown code block.
# Some things to keep in mind:
# - The code should not hardcode the answer into the function.
# - The code should use the `symbols` argument for computing the final return value.
# - The code should not use libraries other than sympy, math, or z3.
# - The code should not return None unless "None" is the correct answer.
# - Do not include any example usage of the code.
# - If the code does not provide useful information when the result is None, modify it to throw an error with useful information or use prints.

# ```json
# {json_str}
# ```

# ```python
# {code_str}
# ```

# The result of evaluating the code on the above input is:
# ```
# Return value: {output}
# Exceptions: {err if err else ""}
# Standard output: {stdout if stdout else ""}
# ```
# """
#                     }
#                 ]
#                 }
#             )
#         else:
        prompt.append(
            {"role": "user", "content": [
                {"type": "text",
                "text": f"""The result of evaluating the code with the extracted symbols is:
```
Return value: {output}
Exceptions: {err if err else ""}
Standard output: {stdout if stdout else ""}
```

Output of test cases if provided:
```
{test_stdout}
```

If positive that the code is correct, output the code unchanged in a markdown code block.
Otherwise, please revise the code to fix any issues. Before outputting the code, please explain the issue and the plan for fixing it.
In addition to outputting the revised code, also output some test cases to verify the code is correct. The test cases should be complete Python code (without a main function) which calls `solve` with various inputs and it should be enclosed in a separate markdown code block at the end of the output. The printed output from executing the code in this block will be given to you.
The format of the tests should be:
```python
symbols = ... # the JSON information extracted from the input
print("Test case 1:", solve(symbols))
...
```

Some things to keep in mind:
- The code should not hardcode the answer into the function, if it does, please rewrite the code to use the `symbols` argument for computing the final return value.
- If the code is input independent, please rewrite it to be input dependent.
- The code should use the `symbols` argument for computing the final return value.
- The code should not use libraries other than sympy, math, or z3.
- The code should not return None unless "None" is the correct answer.
- Any example usage of the code can be removed.
- If the code does not provide useful information when the result is None, modify it to throw an error with useful information or use prints.
- If stuck, do not give up, try a different approach to solving the problem.
- If the code can be improved or simplified, please do so.
- Try writing test cases which test the code with various inputs where the answer is known.
"""
                    }
                ]
                }
            )
        output = (
            model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        )
        prompt.append({"role": "assistant", "content": [{"type": "text", "text": output}]})
        # extract the code from the output
        try:
            code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[0]
            logs["program"] = code_str
        except Exception:
            code_str = ""

        # extract the test cases from the output
        try:
            test_cases = re.findall(r"```python(.*?)```", output, re.DOTALL)[1]
            logs["test_cases"] = test_cases
        except Exception:
            test_cases = ""

        print("New output:", output)
        output, stdout, err = python_eval("symbols = " + str(json_str) + "\n" + code_str + "\nanswer = solve(symbols)")
        print("New Output:", output, stdout, err)

        test_output, test_stdout, test_err = python_eval(code_str + "\n" + test_cases)
        print("Test Output:", test_output, test_stdout, test_err)

        if code_str == previous_output:
            break
        previous_output = code_str


    return "FINAL ANSWER: " + str(output), logs


def ablate_gen_reason_prog_checks(
    sample: RawInput,
    model: LLM,
    temperature: float = 0.0,
) -> Tuple[str, dict]:
    # Adding the input to the prompt
    logs = {}
    prompt_content = []

    system_prompt = """You will be given a question and you must answer it by writing a Python program to calculate the final answer.

You MUST always plan extensively before outputting any code.

# Workflow

## Problem Solving Steps
1. Determine a reasonable approach to solving the problem using code.
2. Write a Python program to calculate the final answer. Use comments to explain the structure of the code.

The Python function must be in a separate markdown code block and be called `solve`.
The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.
Be careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.
Be sure to always output a Python code block."""

    # add the question
    if sample.image_input is not None:
        img_str = img2base64(sample.image_input)
        prompt_content.append(
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_str}"
                },
            },
        )
    prompt_content.append(
        {
            "type": "text",
            "text": f"Question: {sample.text_input}",
        }
    )
    # prompt_content.append(
    #     {
    #         "type": "text",
    #         "text": f"Your goal is to answer the question by extracting symbols and writing code. You MUST iterate and keep going until the problem is solved. Answer the question by first extracting relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing. Using the information extracted, come up with a plan for how to solve the problem and then write a Python program to perform the necessary reasoning to calculate the final answer. The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning. Think of the code as the 'explannation' of the final output answer. Be sure to always a JSON code block and a Python code block, and be sure that `solve` returns the answer.",
    #         # "text": f"Answer the question by first extracting relevant information from the input as JSON and then reasoning about the information extracted in the JSON to reach the final answer. Whenever information from the JSON is used, please explicitly mention it by referencing the specific information `symbols[...]` where `symbols` represents the extracted JSON data (assume the output JSON will be loaded into the variable called `symbols`). The JSON must be enclosed in a markdown code block. Finally, use the output reasoning to produce a Python function called `solve` which takes a single input called `symbols` representing the JSON information extracted and performs the same reasoning as done to calculate the answer and return it. The Python code should be enclosed in a code block and be sure to always output both the symbols in one markdown code block and the program in another markdown code block.",
    #     }
    # )

    logs["all_reasoning"] = []
    logs["all_programs"] = []
    logs["all_evaluator"] = []
    logs["all_outputs"] = []
    prompt = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt_content}]
    sampling_params = SamplingParams(temperature=0.0, max_tokens=50000, top_p=1.0)
    output = (
        model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
        .outputs[0]
        .text
    )
    if output == "" or output is None:
        output = "None"
    prompt.append({"role": "assistant", "content": [{"type": "text", "text": output}]})
    logs["all_outputs"].append(output)


    # Extract reasoning and code
    reasoning = ""
    code_str = ""

    try:
        # Try to locate the ```python block
        pattern = r"```(?:python|py|python\d*)\s*(.*?)```"
        matches = re.findall(pattern, output, re.DOTALL | re.IGNORECASE)
        if matches:
            code_str = matches[-1].strip()
            reasoning = output.split("```python")[0].strip()
        else:
            # Try fallback: generic code block
            matches = re.findall(r"```(.*?)```", output, re.DOTALL)
            if matches:
                code_str = matches[-1].strip()
                reasoning = output.split("```")[0].strip()
    except Exception as e:
        print("Parsing error:", e)
        
    logs["reasoning"] = reasoning
    logs["all_reasoning"].append(reasoning)

    print("Reasoning:", reasoning)

    logs["program"] = code_str
    logs["all_programs"].append(code_str)
    
    print("Code:", code_str)

    output, stdout, err = python_eval(code_str + "\nanswer = solve()")
    try:
        str(output)
    except Exception as e:
        print("Error converting output to string:", e)
        output = "None"
    print("Output:", output, stdout, err)


    for i in range(30):
        checker_prompt = [{"role": "system", "content": """You will be given a question and a code solution and you must judge the quality of the code for solving the problem.
                           
Look for any of the following issues in the code:
- The code should not return None unless "None" is the correct answer.
- The code should return the answer, not just print it. If the question asks for a multiple choice answer, the code should return the choice as described in the question.
- There should not be any example usage of the code.
- If there is a simpler way to solve the problem, please describe it.
- If there are any clear bugs in the code which impact the correctness of the answer, please describe them.
- If it is possible to sanity check the output of the code, please do so and describe if there are any obvious issues with the output and how the code could be fixed to avoid these issues.

After analyzing the code in depth, output a concrete and concise summary of the issues that are present, do not include any code examples. Please order the issues by impact on answer correctness."""}]
        # add the question
        question_prompt = []
        if sample.image_input is not None:
            img_str = img2base64(sample.image_input)
            question_prompt.append(
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{img_str}"
                    },
                },
            )
        question_prompt.append(
            {
                "type": "text",
                "text": f"Question: {sample.text_input}",
            }
        )
        checker_prompt.append(
            {"role": "user", "content": question_prompt + [
                {"type": "text",
                "text": f"""The following is the Python program which computes the answer.

```python
{code_str}
```

Code execution result:
```
Return value: {output}
Standard output: {stdout}
Exceptions: {err}
```

Output a concrete and concise summary of only the issues that are present, do not include any code examples.
"""
                    }
                ]
                }
            )
        checker_params = SamplingParams(temperature=0.0, max_tokens=50000, top_p=1.0)
        checker_output = (
            model.chat(checker_prompt, sampling_params=checker_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        )
        print("Summary of issues:", checker_output)
        logs["all_evaluator"].append(checker_output)

        prompt.append({"role": "user", "content": [{"type": "text", "text": f"""Please fix the issues with the code or output "FINISHED".
The following is the result of evaluating the above code.
```
Return value: {output}
Standard output: {stdout}
Exceptions: {err}
```

The following is the summary of issues found with the code by another model:
```
{checker_output}
```

If there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.
If stuck, try outputting different code to solve the problem in a different way.
If everything is correct, output the word "FINISHED" and nothing else.
"""
        }]})
        output = (
            model.chat(prompt, sampling_params=sampling_params, use_tqdm=False)[0]
            .outputs[0]
            .text
        )
        if output == "" or output is None:
            output = "None"
        prompt.append({"role": "assistant", "content": [{"type": "text", "text": output}]})
        logs["all_outputs"].append(output)

        if "FINISHED" in output:
            break

        # extract the code from the output
        try:
            code_str = re.findall(r"```python(.*?)```", output, re.DOTALL)[-1]
            logs["program"] = code_str
            logs["all_programs"].append(code_str)
        except Exception:
            pass

        print("New output:", output)
        output, stdout, err = python_eval(code_str + "\nanswer = solve()")
        print("New Output:", output, stdout, err)

    try:
        str(output)
    except Exception as e:
        print("Error converting output to string:", e)
        output = "None"

    output, stdout, err = python_eval(code_str + "\nanswer = solve()")
    return "FINAL ANSWER: " + str(output), logs