import os
import json
import random
from concurrent.futures import ProcessPoolExecutor

from tqdm import tqdm
from vllm import SamplingParams, LLM
from transformers import AutoTokenizer
import numpy as np


random.seed(42)

if __name__ == "__main__":
    with open(f"qwen3_32b/cache/livemathbench_summarized_trajectories/summarized_trajectories_x4_infer.json", "r") as rfile:
        data = json.load(rfile)
    correct_lst = []
    num_tokens_lst = []
    for k in data:
        correct_lst.append([item["correct"] for item in data[k]["responses"]])
        num_tokens_lst.extend([item["num_token"] for item in data[k]["responses"]])

    # print(f"Pass@16: {np.sum([any(lst) for lst in correct_lst]) / 30}")
    correct_lst = np.array(correct_lst)
    correct_lst = np.reshape(correct_lst, (-1, 16, 4))
    num_tokens_lst = np.array(num_tokens_lst)
    num_tokens_lst = np.reshape(num_tokens_lst, (-1, 4))
    print(f"Avg Avg@16: {correct_lst.mean(0).mean(0).mean(0)}")
    print(f"Avg Pass@16: {(correct_lst.sum(1) > 0).mean(0).mean(0)}")
    print(f"Avg num_token: {num_tokens_lst.mean(0).mean(0)}")