"""
Accuracy evaluation
"""
import numpy as np  
import openai
import json
import argparse
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from tenacity import retry, stop_after_attempt, wait_chain, wait_fixed
from src.prompt_template import ACC_EVAL_PROMPT, ACC_EVAL_PROMPT_AI
from src.constant import TYPE, QUESTION_TYPE
from tqdm import tqdm

parser = argparse.ArgumentParser()
parser.add_argument("--model_id", type=str, default="llama")
parser.add_argument("--type", type=str, default="normal")
parser.add_argument("--question_type", type=str, default="four_president_given_death_current")
parser.add_argument("--experiment_type", type=str, default="exp")

args = parser.parse_args()

if args.type not in TYPE:
    raise ValueError("Invalid type")
if args.question_type not in QUESTION_TYPE:
    raise ValueError("Invalid question type")
if args.model_id not in ["llama", "gemma", "claude", 'gpt']:
    raise ValueError("Invalid model id")
if args.experiment_type not in ["exp", "full"]:
    raise ValueError("Invalid experiment type")

openai.organization = "YOUR_ORG_ID"
openai.api_version = "VERSION"
openai.api_key = "YOUR_API_KEY"


STOP_AFTER_ATTEMPT=10

@retry(stop=stop_after_attempt(STOP_AFTER_ATTEMPT),
        wait=wait_chain(*[wait_fixed(3) for i in range(2)] +
                       [wait_fixed(5) for i in range(1)]))
def chat_gpt_call_vanilla(content, type):
    

    response = openai.chat.completions.create(
        model='gpt-4o-mini-2024-07-18', # technically, you can also set the engine value as 'gpt-4', but plz don't do it!!! (to save some money lol)
        
        messages = [
                {"role": "system", "content": "Please follow the instructions carefully."},
                {"role": "user", "content": content}
            ],
        temperature=.0000000000000000000001,
        top_p=.0000000000000000000001,   
        max_tokens=800,     # change this when needed. 800 is just a placeholder here.
        frequency_penalty=0,
        presence_penalty=0,
        stop=["# END"]  # change this to the correct stop token based on the prompt. This stop token will only be effective if you say something like "Generate # END at the end of your response" in your prompt. Or alternatively, you can simply leave it blank.
        )
    

    answer = response.choices[0].message.content.strip()  # Extract and clean the response

    return answer

@retry(stop=stop_after_attempt(STOP_AFTER_ATTEMPT),
        wait=wait_chain(*[wait_fixed(3) for i in range(2)] +
                       [wait_fixed(5) for i in range(1)]))

def evaluate_answers(input_file, output_file,input_type):
    with open(input_file, 'r') as f:
        data = json.load(f)

    output_data = []
    with open(output_file, 'a') as f:
        f.write('[')

    for character_data in data:
        # breakpoint()
        character = character_data.get("character")
        type = character_data.get("type")
        questions = character_data.get("questions", [])

        evaluations = []
        

        for question in tqdm(questions, desc=f"Evaluating {character}"):
            # Construct the prompt
            # breakpoint()
            prompt = ACC_EVAL_PROMPT.format(question=question)
            answer = chat_gpt_call_vanilla(prompt,input_type)

            # Append the evaluation
            evaluations.append(answer)
        # breakpoint()

        # Write the character's data to the output file after processing all questions
        with open(output_file, 'a') as f:  # Open in append mode
            json.dump({
                "character": character,
                "type": type,
                "questions": questions,
                f"{input_type} evaluation": evaluations
            }, f, indent=4)
            f.write(',')
            f.write('\n')  # Write a newline for each entry

        # breakpoint()
    # Optionally, write the output to a JSON file (if needed for final summary)
    # with open(output_file, 'w') as f:
    #     json.dump(output_data, f, indent=4)

    with open(output_file, 'a') as f:
        f.write(']')
     
cfg = {
    "type": args.type,
    "question_type": args.question_type
}

if __name__ == "__main__":
    type = cfg["type"]
    question_type = cfg["question_type"]
    dir_path = f"evaluation/{args.experiment_type}/{args.model_id}/{type}/{question_type}"
    os.makedirs(dir_path, exist_ok=True)

    input_type = "accuracy"
    evaluate_answers(f'answers/{args.experiment_type}/{args.model_id}/{type}/formatted_output_{type}_{question_type}.json', f'{dir_path}/evaluated_accuracy_{type}_{question_type}.json',input_type)
