import json
from tqdm import tqdm

processed_file = './data/result/gsm8k_reasoning_processed_1.json'
raw_file = './data/result/gsm8k_reasoning_1.json'

# Load processed file
with open(processed_file, 'r', encoding='utf-8') as f:
    processed_data = json.load(f)

print(f"Loading processed file with {len(processed_data)} entries:")
verified_corrects_count = 0
verified_total = 0
processed_indices = set()

#  tqdm
for item in tqdm(processed_data, desc="Processed", unit="item"):
    idx = item.get("index_number")
    if idx is None or idx in processed_indices:
        continue
    processed_indices.add(idx)
    verified_correct_list = item.get("verified_corrects", [])
    if verified_correct_list and verified_correct_list[0] is True:
        verified_corrects_count += 1
    verified_total += 1

# Load raw file
with open(raw_file, 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

print(f"\nLoading raw file with {len(raw_data)} entries:")
voting_corrects_count = 0
voting_total = 0

for item in tqdm(raw_data, desc="Raw", unit="item"):
    idx = item.get("index_number")
    if idx is None or idx in processed_indices:
        continue
    processed_indices.add(idx)
    voting_correct_list = item.get("voting_corrects", [])
    if voting_correct_list and voting_correct_list[0] is True:
        voting_corrects_count += 1
    voting_total += 1

# Combine totals
total_corrects = verified_corrects_count + voting_corrects_count
total_evaluated = verified_total + voting_total
overall_accuracy = total_corrects / total_evaluated if total_evaluated > 0 else 0

print(f"\nThrough PRISM framework, the accuracy of GSM8k dataset is: {overall_accuracy:.2%}")

