import os
import json
import argparse
import re

parser = argparse.ArgumentParser(description='Process parquet files and save as JSON.')
parser.add_argument('--input_folder', type=str, default='', help='Output JSON file path')
args = parser.parse_args()

task = {}

output_json_path = args.input_folder + "/evaluation.json"

final = []
for key in task.keys():
    print(key)
    num_correct = 0
    results_path = f"{args.input_folder}/{key}/results.json"
    with open(results_path, 'r', encoding='utf-8') as f:
        results = json.load(f)
    for item in results:
        if 'output' not in item or item['output'] is None:
            continue
        if isinstance(item['output'], list):
            item['output'] = item['output'][0] 
        if '</think>' in item['output']:
            match = re.search(r'"answer"\s*:\s*(true|false)', item['output'].split('</think>')[1])
        else:
            match = re.search(r'"answer"\s*:\s*(true|false)', item['output'])
        if match and item['output'] != None:
            extracted_value = match.group(1).lower() == "true"
            is_same = extracted_value == item['answer']
            if is_same:
                num_correct += 1
    final.append({
        "task": key,
        "num_correct": num_correct,
        "total": len(results),
        "accuracy": num_correct / len(results) if len(results) > 0 else 0
    })

print(sum(x['accuracy'] for x in final) / len(final))


with open(output_json_path, 'w', encoding='utf-8') as f:
    json.dump(final, f, ensure_ascii=False, indent=4)