import warnings
warnings.filterwarnings('ignore')
import os
from pprint import pprint
import math
from copy import deepcopy

from Choices import data_name_choices
import utils


prompt_strategy = 'I3C-Select'
model_name = "text-davinci-003"
data_name_idx = 6
num_demonstrations = 8
threshold = 1e-5
data_name = data_name_choices[data_name_idx]
data_path = os.path.join('../result/', f'{data_name.capitalize()}-{prompt_strategy.capitalize()}-{model_name.capitalize()}-demonstrations({num_demonstrations}).txt')


data = utils.load_txt_data(data_path)

pred = [sub_data.get('numerical_answer') for sub_data in data]
gold = [sub_data.get('gold_answer') for sub_data in data]
gold = [i.replace(',', '') if type(i) == str else i for i in gold] 
gold = [eval(i) if type(i) == str else i for i in gold]         
pred = [eval(i) if type(i) == str else i for i in pred]
count = 0
label = []
wrong_index = []
for i in range(len(data)):
    if math.isclose(gold[i], pred[i], rel_tol=threshold, abs_tol=threshold):
        count += 1
        label.append('right')
        data[i]['judge'] = 'right'
    elif math.isclose(gold[i] / 100, pred[i], rel_tol=threshold, abs_tol=threshold):
        count += 1
        label.append('right')
        data[i]['judge'] = 'right'
    else:
        label.append('wrong')
        data[i]['judge'] = 'wrong'
        wrong_index.append(i)
print(f'Accuracy of {data_name.capitalize()}-{prompt_strategy.capitalize()}-{model_name.capitalize()}: {count / len(gold)*100:.2f}%')

