import pandas as pd
import numpy as np
data = pd.read_parquet("/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/l1/dapo-math-17k_qwen3-add1k.parquet")
# all_lengths = []
# for i in range(len(data)):
#     all_lengths.append(data.iloc[i]['reward_model']['num_tokens'])

# avg_length = np.mean(all_lengths)
# print(f"Average length: {avg_length}")
for i in range(len(data)):
    data.at[i, 'reward_model']['num_tokens'] = data.iloc[i]['reward_model']['num_tokens'] - 400

# data.to_parquet("/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/l1/dapo-math-17k_qwen3-add8k5.parquet")
data.to_parquet("/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/l1/dapo-math-17k_qwen3-add600.parquet")