from verl.utils.reward_score import gsm8k, math, multiply, countdown
import pandas as pd
from tqdm import tqdm

df_pandas = pd.read_parquet('/home/ubuntu/TinyZero_OpenR1-Math-220k/train.parquet')

print(df_pandas.iloc[3]['reward_model']['ground_truth'])
print(math.strip_string(df_pandas.iloc[3]['reward_model']['ground_truth']))
# for i in tqdm(range(len(df_pandas)), total=len(df_pandas), desc="Processing rows", unit="row"):
