import json 
import pandas as pd
import argparse
import pdb

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--parquet-file", type=str, required=True)
    parser.add_argument("--save-file", type=str, required=True)
    args = parser.parse_args()
    dataframe = pd.read_parquet(args.parquet_file)

    df_sample = dataframe.sample(n=500)
    save_list = []
    for indice, row in df_sample.iterrows():
        row_ = row.to_dict()
        problem = row_["prompt"][0]["content"]
        answer = row_["reward_model"]["ground_truth"]

        save_list.append(
            {
                "problem": problem,
                "id": indice,
                "answer": answer
            }
        )
    
    with open(args.save_file, "w") as g:
        for item in save_list:
            g.write(json.dumps(item, ensure_ascii=False) + "\n")

