import os
import json

from evaluator.evaluator import evaluate_answer
from evaluator.Accuracy import Accuracy
from utils import Result_Extractor

from evaluator.datasets.bigbenchhard_dataset import BigBenchHardDataset
from prompt.bigbenchhard_prompt_set import BigBenchHardPromptSet
from src.utils import get_gpt_response
from config.setting import EXECUTOR_PROMPT_FORMAT_FOR_BASELINE

prompt_set = BigBenchHardPromptSet()
constraints = prompt_set.get_constraint()
accuracy = Accuracy()
accuracy_list = []
with open('./big_datasets/bigbenchhard/bbh_morphagent_navigate.json', 'r') as f:
    data_navigate = json.load(f)
for task in data_navigate:

    query_prompt = EXECUTOR_PROMPT_FORMAT_FOR_BASELINE.format(major_problem=task['question'], constraints=constraints)

    response = get_gpt_response(query_prompt=query_prompt, system_prompt="You are a helpful assistant." )
    response = Result_Extractor.extract_answer(response)
    print("response:",response)
    result = evaluate_answer(task['correct_answer'], response)
    accuracy.update(result)
    accuracy_list.append(accuracy.get_accuracy())

save_path =
with open(save_path, 'w') as f:
    json.dump(accuracy_list, f)