# dir: results/math

import argparse
import json
import os
import re

def load_results(jsonl_file):
    results = []
    with open(jsonl_file, "r") as f:
        for line in f:
            results.append(json.loads(line))
    return results

def evaluate_results(result_file):
    results = load_results(result_file)
    correct = 0
    incorrect = 0
    use_code_response = 0
    for result in results:
        try:
            answer_out =   result['attempt_answer']["output"]
            answer_code = result['attempt_answer']["code"]
            use_code_response += 1
        except Exception as e:
            # print(e)
            # print(result)
            answer_out = result["text"] if hasattr(result, "text") else result["attempt_answer"]["text"]
            answer_out = answer_out
            
    
            answer_code = "" 
            # continue
        correct_answer = result['correct_answer'][7:-1]
        correct_answer = correct_answer.replace(" ", "")
        # replace ",\!" in the correct answer
        correct_answer = correct_answer.replace(",\\!", "").replace("\$", "")
        correct_answer = correct_answer.replace("dfrac", "frac")
        # remove \boxed{}
        # correct_answer = re.sub(r'\\boxed\{(.*?)\}', r'\1', correct_answer)
        # use eval to check if the answer is correct with precision 1e-6
        if answer_out is None: answer_out = ""
        answer_out = answer_out.replace(" ", "")
        answer_code = answer_code.replace(" ", "")
        if correct_answer in answer_out or correct_answer in answer_code:
            correct += 1
        else:
            task_id = result['task_id']
            #remove the file
            # os.remove(f"results/math/{task_id}.json")
            incorrect += 1
            print("====================================")
            print(f"Correct answer: {correct_answer}")
            print(f"Answer out: {answer_out}")
            print(f"Answer code: {answer_code}")

    print(f"Correct: {correct}")
    print(f"Incorrect: {incorrect}")
    print(f"Accuracy: {correct / (correct + incorrect) * 100:.2f}% ({correct}/{correct + incorrect})")
    print(f"Use code response: {use_code_response}")
    
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--result_jsonl_file", type=str, required=True)
    args = parser.parse_args()
    result_file = args.result_jsonl_file
    evaluate_results(result_file)
    