import pandas as pd
import openai, os, json
import tiktoken
PATH = "."
PATH_DATA = f"{PATH}/outputs"
OUTPUT_PATH = f"{PATH}/outputs"
MODE_DICT = {"COMPLETION": {"davinci":"text-davinci-003"}, "CHAT_MODE":{"turbo":"gpt-3.5-turbo", "gpt4": "gpt-4"}}
MODE = "CHAT_MODE"
MODEL = "gpt4"
PROMTD_MODE = "ablation"

openai.api_key = "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"
# openai.api_key = "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"

##
anachronisms_examples_only_full_response = "The candidate prompt is vague and does not provide clear guidelines about what defines the expected output. The model is expected to infer the rules based on the provided examples, which leads to ambiguity. The corrected prompt needs to identify the core transformation that aligns with all the examples and then provide clear instructions about that transformation.\n###Better Prompt Type###\n[PATTERN IDENTIFICATION][ANALYSIS]\n###Better Prompt###\nGiven a sentence, determine if it contains an anachronism (a thing belonging or appropriate to a period other than that in which it exists, particularly a thing that is conspicuously old-fashioned). An example of an anachronism would be a historical figure using modern technology. If an anachronism exists, output \"Yes\", otherwise, output \"No\". Print the output in the format \"The answer is \\answer{}\"."
anachronisms_instruction_and_examples_only_full_response = "The candidate prompt lacks complete specificity about how the task should be carried out and also lacks a structure for the model to give the output. There is no indication of how the answer should be provided beyond yes or no. Considering these shortcomings, a better prompt is reformulated targeting all these issues with an emphasis on giving the output in a specific format. The candidate prompt also does not provide a common output format like \"the answer is \\answer{}\".\n###Better Prompt Type###\n[CRITICAL READING][ANALYSIS]\n###Better Prompt###\nYour task is to determine if the given statement contains an anachronism. An anachronism is an error of chronology or timeline in a literary piece. Anything that is out of time and out of place is an anachronism. For example, if in a book there's a story set in the ancient Roman era, but the characters are using mobile phones, the mobile phone is an anachronism in the story. Please print the output in the format \"The answer is \\answer{}\""

##
anachronisms_examples_only_instruction = "Given a sentence, determine if it contains an anachronism (a thing belonging or appropriate to a period other than that in which it exists, particularly a thing that is conspicuously old-fashioned). An example of an anachronism would be a historical figure using modern technology. If an anachronism exists, output \"Yes\", otherwise, output \"No\". Print the output in the format \"The answer is \\answer{}\"."
anachronisms_instruction_and_examples_only_instruction = "Your task is to determine if the given statement contains an anachronism. An anachronism is an error of chronology or timeline in a literary piece. Anything that is out of time and out of place is an anachronism. For example, if in a book there's a story set in the ancient Roman era, but the characters are using mobile phones, the mobile phone is an anachronism in the story. Please print the output in the format \"The answer is \\answer{}\""

##
anachronisms_examples_only_full_response = "The candidate prompt is vague and ambiguous because it does not specify the exact objective of the task. We have only been given a set of examples but no instructions or system to guide our understanding. Given these circumstances, there could be multiple interpretations that the model could infer from the input-output pairs. The better prompt should provide a clear instruction for the task. Specifying the objective function will eliminate any ambiguity and prevent the model from guessing. Moreover, the model will provide a complete description of the task and clarify all possible edge cases. Providing a common output format for the answer which will provide a consistent and easy extraction of the final answer will also be beneficial.  \n###Better Prompt Type###\n[MATHEMATICAL REASONING]\n###Better Prompt###\nThe task is to solve a mathematical word problem and provide the answer with some justification showing your calculations. Write down all steps to solve the problem as well. After solving, print the output in the format \"The answer is \\answer{}\"."
gsm8k_examples_only_instruction = ""#"The task is to solve a mathematical word problem and provide the answer with some justification showing your calculations. Write down all steps to solve the problem as well. After solving, print the output in the format \"The answer is \\answer{}\"."


#####<<<<-----------------OPTIMIZED INSTRUCTIONS------------------>>>>#####
maths_optimized_instruction = "Consider solving a word problem step by step. Begin by reading the problem thoroughly to ensure a complete understanding. Identify all the variables and the relationships between them. Next, break the problem into smaller manageable parts and develop a strategy to solve each part. Lastly, review the solutions of all parts and check whether the final solution makes sense in the context of the original problem."
humaneval_optimized_instruction = "You need to create a Python function based on the information provided in the enclosed comments. Ensure that the function adheres to the function signature and description provided. Include any necessary module imports, maintain the intent of the code, and consider possible edge cases to result in robust code execution. Please follow the specific instructions provided in the comments to develop the complete Python code."
analytical_entailement_optimized_instruction = "Your task is to analyze two sentences and determine if one logically entails the other. If the first sentence implies or logically leads to the truth of the second, state \"Entailment\". If it is unclear or ambiguous, state \"Uncertain\". Print the output in the format \"The answer is \\answer{}\""
known_unknown_optimized_instruction = "You will be provided with a series of questions that test for 'hallucinations,' where the correct response is either a specific answer or unknown. Your job is to look at the question and determine if the answer can be found from the question or if it is undeterminable. If the answer cannot be concluded from the question alone, choose \"(b) unknown.\" If the answer can be determined from the question alone, provide the answer in the form \"(a) Your Answer.\" Print the output in the format \"The answer is \\answer{}\""
date_understanding_optimized_instruction = "Given the context of the text, infer the date it is referring to. Consider any key phrases, words or events that could hint towards a specific date. If necessary, conduct a brief background check of any prominent figures or events that mention in the text. Try to narrow down the exact date in MM/DD/YYYY format. Print the output in the format \"The answer is \\answer{}\"."
anachronisms_optimized_instruction = "Review the given statement and identify whether it includes an anachronism - a chronological inconsistency in which a person, event, object, or language is mistakenly placed in a time period where it does not belong. Clearly state \"Yes\" if the statement contains an anachronism and \"No\" if it does not. Explain your reasoning briefly, providing details that led you to your conclusion. Print the output in the format \"The answer is \\answer{}\" and if yes also provide \"Identified anachronism is - \\response{}\" otherwise print \"No anachronism found\"."
#####<<<<-----------------OPTIMIZED INSTRUCTIONS------------------>>>>#####



PROMTD_MODE_DICTIONARY = {"examples_only": gsm8k_examples_only_instruction, "instruction_and_examples_only": anachronisms_instruction_and_examples_only_instruction}


import backoff, requests, sys
@backoff.on_exception(backoff.expo, Exception, max_tries=50, max_time = 61)
def get_output_chat_mode(prompt):
    prompt = prompt#promptD + "\n" + prompt.strip() + "\n###Reason###\n"
    print(prompt)
    # sys.exit(-1)
    response = openai.ChatCompletion.create(
        model = MODE_DICT[MODE][MODEL],
        messages = [{"role": "user", "content": prompt}],
        temperature = 1.0,
        top_p = 1,
        frequency_penalty = 0.0,
        presence_penalty = 0.0
        )
    # print(response)
    print(response["choices"][0]["message"]["content"])
    # sys.exit(-1)
    return response["choices"][0]["message"]["content"]


@backoff.on_exception(backoff.expo, Exception, max_tries=50, max_time = 61)
def get_output_completion_mode(prompt):
    print(prompt)
    # xys
    response = openai.Completion.create(
        model = MODE_DICT[MODE][MODEL],
        prompt = prompt,
        temperature = 1.0,
        top_p = 1,
        frequency_penalty = 0.0,
        presence_penalty = 0.0,
        max_tokens = 400
        )
    print(response["choices"][0]["text"].strip())
    return response["choices"][0]["text"].strip()


import random
def read_dataset(dataset_name, dataset_file, read_subset = False, subset_size = 250):
    try:
        all_data = [json.loads(line.strip()) for line in open(os.path.join(PATH_DATA, dataset_name, dataset_file), encoding = "utf-8")]
    except:
        all_data = json.load(open(os.path.join(PATH_DATA, dataset_name, dataset_file), encoding = "utf-8"))    
    try:
        all_data = all_data["examples"]
    except:
        pass
    # if(read_subset):
    #     all_data = random.sample(all_data, subset_size)
    #     with open(os.path.join(PATH_DATA, dataset_name, dataset_file), "w") as f:
    #         json.dump(all_data, f, indent = 4)
    return all_data



def find_errors_in_dataset(dataset):
    error_indices = []
    for idx, d in enumerate(dataset):
        if (type(dataset) == type({}) and dataset[d].get(f"promptd_output") is None) or ((type(dataset) == type([]) and d.get(f"promptd_output") is None)):
            print(f"d --- {d}   type (d) {type(d)}")
            error_indices.append(idx if type(d) is not type ("") else d)
            continue
        final_response = dataset[d].get(f"promptd_output") if type(dataset)==type({}) else d.get(f"promptd_output") 
        if(final_response is None or final_response.strip()==""):# or type(final_response)!=type([]) or len(final_response)<=0):
            print("Here---", final_response)
            error_indices.append(idx if type(d) is not type("") else d)
    print('-'*100)
    print(f"Error in {len(error_indices)} indices")
    print('-'*100)
    return error_indices#[:2]
        

import time
MAX_ATTEMPT = 10
def collect_results(dataset, dataset_name, prompt_ins = ""):
    # 
    
    attempt = 0
    while(True):
        error_indices = find_errors_in_dataset(dataset)
        # print(error_indices, dataset.keys())
        errors_json = [(dataset[i], i) for i in error_indices]
        # print(errors_json[:3])
        if(len(errors_json)<=0 or attempt==MAX_ATTEMPT):
            break
        for idx, value in tqdm.tqdm(enumerate(errors_json), desc = f"Attempt {attempt+1}", total = len(errors_json)):
            # print(value)
            didx, value = value[1], value[0] 
            # all_attempts = value["all_attempts"]
            # all_outputs = []
            test_instance = "Determine whether given opinion is positive, negative, neutral or ambivalent in terms of its sentiment.\n'" + (value["input"] if value.get("input") is not None else value["question"]).split("###Better Prompt###")[-1].strip() + "'"
            prompt = value["rewritten_prompt"]#prompt_ins + "\n" + test_instance #+ "\n###Reason###\n"
            # prompt = prompt.strip()
            # print(prompt); sys.exit(-1)
            full_output = get_output_chat_mode(prompt)
            output = full_output.split("###Better Prompt###")[-1].strip()
            print(">>>", output, "<<<")
            # zzz
            # all_outputs.append(output if output is not None else "")
            value["full_output"] = full_output
            value[f"promptd_output"] = output
            dataset[didx] = value
        with open(OUTPUT_PATH + "/" + dataset_name + f"_{MODE_DICT[MODE][MODEL]}_attempt_{str(attempt+1)}_{PROMTD_MODE}_promptd.json", "w") as f:
            json.dump(dataset, f, indent = 4)
        break
        attempt += 1
        # sys.exit(-1)
        # if(not(len(errors_json)<=0 or attempt==MAX_ATTEMPT)):
        time.sleep(60*1)
    with open(OUTPUT_PATH + "/" + dataset_name + f"_{MODE_DICT[MODE][MODEL]}_{PROMTD_MODE}_promptd.json", "w") as f:
        json.dump(dataset, f, indent = 4)

        



# prompt = [lines for lines in open(
#         f'{PATH_DATA}/new_output_prompt.txt', encoding="utf-8")]
# promptD = ''.join(prompt)
import tqdm
if __name__ == "__main__":
    dataset_name = "sentiments"
    dataset_file = "sentiment_gpt-4_ablation.json"
    prompt_dataset = read_dataset(dataset_name, dataset_file)
    collect_results(prompt_dataset, dataset_name, prompt_ins = "")