# from datasets import load_dataset
# import pandas as pd
# import random

# dataset = load_dataset("Elliott/Openr1-Math-46k-8192", split="train")

# print(dataset[0])
# # breakpoint()
# ret_dict = []
# for item in dataset:
#     random_number = random.randint(100, 8192)
#     # breakpoint()
#     # instruction = "Let's think step by step and output the final answer within \\boxed{}."
#     instruction = f" Think for {random_number} tokens."
#     item["prompt"][1]['content'] += instruction
#     item["reward_model"]["num_tokens"] =  random_number
#     item["prompt"] = [item["prompt"][-1]]
#     print( item["prompt"][0]['content'])
#     ret_dict.append(item)

# train_df = pd.DataFrame(ret_dict)
# train_df.to_parquet("openr1_l1_style.parquet")

# val_df = pd.read_parquet("../data/valid.parquet")
# # 遍历每一行
# # 使用位置索引而不是标签索引
# for i in range(len(val_df)):
#     row = val_df.iloc[i]
#     print(f"Processing row: {i}")
    
#     current_prompt = row['prompt'].copy()
#     current_prompt[0]['content'] = """Your task is to provide a systematic reasoning approach for the given problem. Structure your response using the format: \"<guidance>\n {your guidance} </guidance>\n\". Focus on outlining the key analytical steps and overall methodology needed to solve the problem systematically."""
#     val_df.iloc[i, val_df.columns.get_loc('prompt')] = current_prompt
        
    
#     # 将修改后的整个结构赋值回去
    
# val_df.to_parquet("../data/valid_new.parquet")
# # breakpoint()
import random
import pandas as pd
import pandas as pd
import random

val_df = pd.read_parquet("/mnt/shared-storage-user/p1-shared/wangfuting/codes/rl/LUFFY/data/openr1.parquet")

val_df = val_df.copy()

# 遍历每一行
for i in range(len(val_df)):
    print(f"Processing row: {i}")
    random_number = random.randint(100, 8192)
    
    # 获取当前行的prompt（创建深拷贝）
    current_prompt = val_df.loc[i, 'prompt'].copy()
    
    # 修改prompt的第一个元素的content
    if isinstance(current_prompt, list) and len(current_prompt) > 0:
        current_prompt[0]['content'] += f" Think for {random_number} tokens."
    
    # 创建新的reward_model字典
    new_reward_model = {
        'style': 'rule', 
        'ground_truth': val_df.loc[i, 'reward_model']['ground_truth'], 
        'num_tokens': random_number
    }
    
    # 使用at来设置单个值，这样更高效且避免警告
    val_df.at[i, 'prompt'] = current_prompt
    val_df.at[i, 'reward_model'] = new_reward_model

    
        
    
    # 将修改后的整个结构赋值回去
print(val_df.iloc[0]['prompt'])
val_df.to_parquet("openr1_our_style.parquet")
# breakpoint()