import json
import numpy as np
import re

def parse_bullets(sentence):
    bullets_preprocess = sentence.split("\n")
    bullets = []

    for bullet in bullets_preprocess:
        try:
            idx = bullet.find(next(filter(str.isalpha, bullet)))
        except:
            continue

        bullet = bullet[idx:]

        if len(bullet) != 0:
            bullets.append(bullet)

    return bullets


def parse_yes_no(string):

    if "yes" in string.lower():
        return True
    elif "no" in string.lower():
        return False
    else:
        return None


def solve_problems(input_str):
    pattern = r'\b(true|false)\b'
    matches = re.findall(pattern, input_str, re.IGNORECASE)

    for match in reversed(matches):
        if is_valid_boolean(match):
            return match.lower()
    return None


def parse_answer(input_str):
    pattern = r"\\boxed\{([^}]+)\}"
    matches = re.findall(pattern, input_str)

    for match in reversed(matches):
        if is_valid_boolean(match):
            return match.lower()
    return None


def is_valid_boolean(value):
    return str(value).lower() in ('true', 'false')

def compute_accuracy(gt, pred_solution):
    answers = solve_problems(gt)
    if answers is None:
        return None

    if type(pred_solution) == list:
        pred_answers = []

        for pred_solution in pred_solutions:
            pred_answer = parse_answer(pred_solution)

            if pred_answer is None:
                pred_answer = solve_problems(pred_solution)

            pred_answers.append(pred_answer)


        pred_answer = most_frequent(pred_answers)

    else:
        pred_answer = parse_answer(pred_solution)
        if pred_answer is None:
            pred_answer = solve_problems(pred_solution)

    if pred_answer is None:

        return 0


    if answers == pred_answer:
        return 1
    else:
        return 0



def most_frequent(List):
    counter = 0
    num = List[0]

    for i in List:
        current_frequency = List.count(i)
        if current_frequency > counter:
            counter = current_frequency
            num = i

    return num

if __name__ == "__main__":
    response_dict = json.load(open("./result/xxx.json", "r"))

    questions = list(response_dict.keys())
    print(len(questions))

    accuracies = []

    for question in questions:
        responses, gt = response_dict[question]
        print(gt)

        pred_solutions = []
        for response in responses:
            pred_solution = response[-1]['content']

            pred_solutions.append(pred_solution)

        accurate = compute_accuracy(gt, pred_solutions)

        if accurate is not None:
            accuracies.append(float(accurate))
        else:
            import pdb
            pdb.set_trace()
            print(gt)

        print("accuracies:", np.mean(accuracies), np.std(accuracies) / (len(accuracies) ** 0.5))

