"""
Reasoning: this script is used to reason about the input question, text data, and image data.
"""

from PIL import Image
import llm as llm
import re



def reasoning(question: str, data_text: str = "", data_images: list = None, model: str = 'gpt-4o', qa_history: list = None) -> str:
    """
    Performs reasoning based on the input question, text data, and image data.
    Uses LLM (via get_answer) to generate an answer based on the combined input.

    Args:
        question (str): The question to reason about.
        data_text (str): The relevant text data to consider.
        data_images (list): A list of images to consider (as raw images, not captions).
        qa_history (list): A list of QA history.

    Returns:
        str: The answer generated by the reasoning process.
    """
    if not question:
        raise ValueError("A question must be provided.")

    # Construct the base prompt with text data
    prompt = f"Based on the following [Context]:\n---\n{data_text}\n---\n"
    prompt += f"Answer the following question:\n{question}"
    prompt += f"put your final answer within \\boxed{{}}."

    if qa_history is not None:
        his_prompt = "Additionally, it is known that regarding this question, you can refer to the following previous quetion answering: \n"
        for i, qa in enumerate(qa_history):
            q_his = qa["question"]
            a_his = qa["answer"]
            his_prompt += f"Question{i}: \"{q_his}\", Answer{i}: \"{a_his}\"\n"
    # prompt = REASONING.format(
    #     examples=REASONING_EXAMPLES,
    #     query=question,
    #     context=data_text,
    # )

    answer = llm.get_answer(prompt, model=model)
    final_answer = parse_answer_r1(answer)
    return answer, extract_box_content(final_answer)

def extract_box_content(text: str) -> str:
    """
    Extract the content within the box from the text.
    Args:
        text (str): The text to extract the content from.
    Returns:
        str: The content within the box.
    """
    match = None
    if re.search(r"\\box{", text):
        match = re.search(r"\\box{(.*?)}", text)
        return match.group(1) if match else text

    # Check if there is \boxed{}
    if re.search(r"\\boxed{", text):
        match = re.search(r"\\boxed{(.*?)}", text)
        return match.group(1) if match else text
    
    else:
        return text

    

def parse_answer_r1(answer):
    """
    Remove the think part and return the final answer
    Args:
        answer (str): The answer to parse.
    Returns:
        str: The final answer.
    """
    cleaned_answer = re.sub(r"<think>.*?</think>", "", answer, flags=re.DOTALL).strip()

    return cleaned_answer

def parse_answer(answer):
    """
    Remove the think part and return the final answer
    Args:
        answer (str): The answer to parse.
    Returns:
        str: The final answer.
    """
    # Remove "Based on the following [Context]:" and subsequent content
    context_pattern = r"Based on the following \[Context\]:[\s\S]*"
    cleaned_answer = re.sub(context_pattern, "", answer).strip()

    # Remove **final_answer** and **stop** markers
    markers_pattern = r"\*\*final_answer\*\*|\*\*stop\*\*"
    cleaned_answer = re.sub(markers_pattern, "", cleaned_answer).strip()

    # Remove "---" and subsequent content
    separator_pattern = r"---[\s\S]*"
    cleaned_answer = re.sub(separator_pattern, "", cleaned_answer).strip()

    return cleaned_answer


# Example usage (optional, for standalone testing)
if __name__ == "__main__":
    # Example inputs
    question = "What is Adobe's year-over-year change in unadjusted operating income from FY2015 to FY2016 (in units of percents and round to one decimal place)? Give a solution to the question by using the income statement."
    context = "Table of Contents\n62\nADOBE SYSTEMS INCORPORATED\nCONSOLIDATED STATEMENTS OF INCOME\n(In thousands, except per share data)\n \nYears Ended\n \nDecember 2,\n2016\nNovember 27,\n2015\nNovember 28,\n2014\nRevenue:\n \nSubscription\n$\n4,584,833\n$\n3,223,904\n$\n2,076,584\nProduct\n800,498\n1,125,146\n1,627,803\nServices and support\n469,099\n446,461\n442,678\nTotal revenue\n5,854,430\n4,795,511\n4,147,065\n \nCost of revenue:\nSubscription\n461,860\n409,194\n335,432\nProduct\n68,917\n90,035\n97,099\nServices and support\n289,131\n245,088\n189,549\nTotal cost of revenue\n819,908\n744,317\n622,080\n \nGross profit\n5,034,522\n4,051,194\n3,524,985\n \nOperating expenses:\nResearch and development\n975,987\n862,730\n844,353\nSales and marketing\n1,910,197\n1,683,242\n1,652,308\nGeneral and administrative\n577,710\n531,919\n543,332\nRestructuring and other charges\n(1,508)\n1,559\n19,883\nAmortization of purchased intangibles\n78,534\n68,649\n52,424\nTotal operating expenses\n3,540,920\n3,148,099\n3,112,300\n \nOperating income\n1,493,602\n903,095\n412,685\n \nNon-operating income (expense):\nInterest and other income (expense), net\n13,548\n33,909\n7,267\nInterest expense\n(70,442)\n(64,184)\n(59,732)\nInvestment gains (losses), net\n(1,570)\n961\n1,156\nTotal non-operating income (expense), net\n(58,464)\n(29,314)\n(51,309)\nIncome before income taxes\n1,435,138\n873,781\n361,376\nProvision for income taxes\n266,356\n244,230\n92,981\nNet income\n$\n1,168,782\n$\n629,551\n$\n268,395\nBasic net income per share\n$\n2.35\n$\n1.26\n$\n0.54\nShares used to compute basic net income per share\n498,345\n498,764\n497,867\nDiluted net income per share\n$\n2.32\n$\n1.24\n$\n0.53\nShares used to compute diluted net income per share\n504,299\n507,164\n508,480\n See accompanying Notes to Consolidated Financial Statements."
    
    question = "what was the change as a percent of sales in operating cash flow between 2016 and 2018?"
    context = """
    ("table"<|>Emerson Annual Report Cash Flow Data<|>Cash flow metrics for 2016, 2017, and 2018 from Emerson's annual report (values in millions of dollars))##
    ("header"<|>2016<|>2017<|>2018")##
    ("row"<|>Operating cash flow: $2,499<|>Operating cash flow: $2,690<|>Operating cash flow: $2,892")##
    ("row"<|>Capital expenditures: $447<|>Capital expenditures: $476<|>Capital expenditures: $617")##
    ("row"<|>Free cash flow: $2,052<|>Free cash flow: $2,214<|>Free cash flow: $2,275")##
    ("row"<|>Operating working capital: $755<|>Operating working capital: $1,007<|>Operating working capital: $985")##
    <|COMPLETE|>
    """
    result, extracted_result = reasoning(question, context, model="deepseek-r1")

    print(result)
    print(extracted_result)  
