from datasets import load_dataset
import pandas as pd

dataset = load_dataset("Elliott/Openr1-Math-46k-8192", split="train")

print(dataset[0])
# breakpoint()
ret_dict = []
for item in dataset:
    item["prompt"][0]['content'] = """Your task is to provide a systematic reasoning approach for the given problem. Structure your response using the format: \"<guidance>\n {your guidance} </guidance>\n\". Focus on outlining the key analytical steps and overall methodology needed to solve the problem systematically."""
    ret_dict.append(item)

train_df = pd.DataFrame(ret_dict)
train_df.to_parquet("../data/openr1_stage1_new.parquet")

val_df = pd.read_parquet("../data/valid.parquet")
# 遍历每一行
# 使用位置索引而不是标签索引
for i in range(len(val_df)):
    row = val_df.iloc[i]
    print(f"Processing row: {i}")
    
    current_prompt = row['prompt'].copy()
    current_prompt[0]['content'] = """Your task is to provide a systematic reasoning approach for the given problem. Structure your response using the format: \"<guidance>\n {your guidance} </guidance>\n\". Focus on outlining the key analytical steps and overall methodology needed to solve the problem systematically."""
    val_df.iloc[i, val_df.columns.get_loc('prompt')] = current_prompt
        
    
    # 将修改后的整个结构赋值回去
    
val_df.to_parquet("../data/valid_new.parquet")
# breakpoint()