import pandas as pd
import numpy as np

# data = "/mnt/shared-storage-user/p1-shared/wangfuting/codes/rl/LUFFY/data/openr1.parquet"
# data = "/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/polaris-data-53K.parquet"
# data = "/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/luffy/valid.parquet"
# data = "/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/luffy/valid.all.parquet"
# data = "/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/deepmath-5k.parquet"
# data = "/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/l1/deepscaler.parquet"
# data = "/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/dapo-math-17k-real.parquet"
# data = "/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/countdown/train.parquet"
# data = ["/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/dapo-math-17k_qwen3_polaris.parquet"]
# data = ["/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/luffy/valid.all.parquet"]
data = ["/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/valid_ood_qwen3.parquet"]
final_data = []
for d in data:
    df = pd.read_parquet(d)
    for i in range(len(df)):
        # breakpoint()
        row = df.iloc[i].copy()
        # 如果是列表或numpy数组，获取最后一个元素
        print(i, row["prompt"])
        # breakpoint()
        last_prompt = row["prompt"][-1]["content"]
        # breakpoint()
        # last_prompt = last_prompt.split("\nUser: ")[-1].split(" Show your work in <think> </think> tags")[0]
        # breakpoint()
        # breakpoint()
    # Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\nIn triangle $ABC$, $\\sin \\angle A = \\frac{4}{5}$ and $\\angle A < 90^\\circ$. Let $D$ be a point outside triangle $ABC$ such that $\\angle BAD = \\angle DAC$ and $\\angle BDC = 90^\\circ$. Suppose that $AD = 1$ and that $\\frac{BD}{CD} = \\frac{3}{2}$. If $AB + AC$ can be expressed in the form $\\frac{a\\sqrt{b}}{c}$ where $a, b, c$ are pairwise relatively prime integers, find $a + b + c$.\n\nRemember to put your answer on its own line after "Answer:".'
        # last_prompt = last_prompt.split("Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\n")[1].split('\n\nRemember to put your answer on its own line after "Answer:".')[0]
        # new_prompt= [{"content": last_prompt +  " Let's think step by step and output final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.", "role": "user"}]
        last_prompt = last_prompt.split(
            " Let's think step by step and output the final answer within \\boxed{}.")[0]
        new_prompt = [{"content": last_prompt, "role": "user"}]
        row["prompt"] = new_prompt
        final_data.append(row)
        #  + " Please reason step by step, and put your final answer within \\boxed{}."
        # 将修改后的prompt重新赋值给DataFrame（统一格式为列表）
        # prompt.append(new_prompt)
final_data = pd.DataFrame(final_data)
print(len(final_data))
final_data.to_parquet(
    "/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/luffy/valid_ood_llama.parquet")
# df["prompt"] = prompt
print(final_data.iloc[0]['prompt'])
# # df.to_parquet("/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/luffy/openr1_qwen3.parquet")
# df.to_parquet("/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/countdown/train_qwen3.parquet")
