from tqdm import tqdm
import os, json

PATH = "F:/user-repos/saurasrivastava/prompt_gen"
PATH_DATA = f"{PATH}/data"

f = open(f"{PATH_DATA}/bad.txt", encoding="utf-8")
bad_examples = [lines for lines in f]


f = open(f"{PATH_DATA}/good.txt", encoding="utf-8")
good_examples = [lines for lines in f]


f = open(f"{PATH_DATA}/prompt_type.txt", encoding="utf-8")
prompt_types = [lines for lines in f]

f = open(f"{PATH_DATA}/reasons_for_good_bad.txt", encoding="utf-8")
reasons = [lines for lines in f]

from langchain.chat_models import AzureChatOpenAI
from langchain import LLMChain
import openai
openai.api_key = ""
openai.api_type = ""
openai.api_base =  ""
openai.api_version = ""

prompt = [lines for lines in open(f"{PATH_DATA}/output_prompt.txt", encoding = "utf-8")]

import tiktoken
enc = tiktoken.encoding_for_model("gpt-4")


guidelines_len = len(enc.encode(''.join(prompt)))
print("Length: ", guidelines_len)


llm = AzureChatOpenAI(deployment_name="gpt-4-32k",
                      model_name="gpt-4-32k", 
                      openai_api_key = "", 
                      openai_api_base = "", 
                      openai_api_version = "")


### use a prompt to summarize the reasons distinguishing good vs bad, to make more readable
### includes some general guidelines at the beginning, followed by instructions, then all examples

summary_prompt = """
There are 3 smart computer scientist specializing in creating prompts. Their task involves identifying and explaining what makes a bad prompt not as effective. To do this, they should use the provided guidelines and follow the recommended steps. These will give them a clear understanding of what separates a good prompt from a bad one. This process includes examining the clarity, specificity, and structure of prompts, as well as understanding how effectively they guide the response. A good prompt is not just about asking a question; it's about guiding the answer in a way that is most helpful for the end-user. The guidelines are given next:
###Guidelines###
1) Be very specific about the instruction and task you want the model to perform. The more descriptive and detailed the prompt is, the better the results. This is particularly important when you have a desired outcome or style of generation you are seeking. There aren't specific tokens or keywords that lead to better results. It's more important to have a good format and descriptive prompt. In fact, providing examples in the prompt is very effective to get desired output in specific formats.
2) When designing prompts, you should also keep in mind the length of the prompt as there are limitations regarding how long the prompt can be. Thinking about how specific and detailed you should be. Including too many unnecessary details is not necessarily a good approach. The details should be relevant and contribute to the task at hand. This is something you will need to experiment with a lot. We encourage a lot of experimentation and iteration to optimize prompts for your applications.
3) Rather than the model on the loose, you should set up the scenario and scopes in the prompt by providing details of what, where, when, why, who, and how
4) Assigning a persona in the prompt, for example, "As a computer science professor, explain what is machine learning" rather than merely "Explain what machine learning is," can make the response more academic.
5) You can control the output style by requesting "explain to a 5-year-old", "explain with an analogy," "make a convincing statement," or "in 3 to 5 points."
6) To encourage the model to respond with a chain of thoughts, end your request with "solve this in steps."
7) You can provide additional information to the model by saying, "Reference to the following information," followed by the material you want the model to work on
8) Because the previous conversation constructs the context, beginning the prompt with "ignore all previous instructions before this one" can make the model start from scratch
9) Making the prompt straightforward and easy to understand is essential since the context deduced can be more accurate to reflect your intention.
10) For very long conversations, it's important to know that you only have a certain number tokens to play with, and if your inputs or outputs have been very long, in a long conversation GPT will start to forget some of the earlier context. To avoid this, you can ask ChatGPT to summarize the conversation and then use that summary as a prompt to refresh the context during future interactions.
11) Another Principle: Give the Model Time to 'Think'. It's also important to give the LLM time to "think". If a model is making reasoning errors by rushing to an incorrect conclusion, you should try reframing the query to request a chain or series of relevant reasoning before the model provides its final answer. Another way to think about this is that if you give a model a task that's too complex for it to do in a short amount of time, or in a small number of words, it may make up a guess which is likely to be incorrect.
12) Reducing Hallucinations: One LLM limitation is hallucinations, which is basically when the AI makes up something that sounds plausible but isn't actually correct. Even though the language model has been exposed to a vast amount of knowledge during its training process, it has not perfectly memorized the information […] and so it doesn't know the boundary of its knowledge very well. This means that it might try to answer questions about obscure topics and can make things up that sound plausible but are not actually true. One way to reduce hallucinations is to ask the model to first find any relevant quotes from the text and then ask it to use those quotes to answer questions. Having a way to trace the answer back to [a] source document is often pretty helpful to reduce these hallucinations.

###Your Task###
Three experts are set to tackle an intricate task. The objective is to dissect a provided "reason" that classifies one prompt as good and another as bad. This "reason" might point out issues like vagueness, wordiness, lack of specifics, or tendencies to guess or make assumptions in the bad prompt. Beyond these outlined issues, the experts must also identify any unmentioned problems with the bad prompt. This requires a deep dive into both good and bad prompts.

Once this is accomplished, they must discern what vital components are absent from the bad prompt. Their findings will be consolidated into a list of keywords that encapsulate the faults of the bad prompt. The list should be comma-separated entries, which could be a keyword or a concise summary of up to five words.

Follow these comprehensive steps:

Step 1: Begin with a detailed examination of both good and bad prompts.

Step 2: Scrutinize both prompts to pinpoint any potential issues or shortcomings within the bad prompt, especially when held in comparison to the good one.

Step 3: Pair the discoveries from step (2) with the originally provided "Reason".

Step 4: Now, imagine a team of three experts working together using a thought tree strategy. Each expert will meticulously elucidate their thought process at each step, simultaneously considering the insights offered by their fellow experts. They will openly acknowledge any missteps, and use the shared understanding within the group to improve. This iterative process will be followed until a clear solution emerges. Combine the observations of all the three experts.

Step 5: Compile a brief list highlighting the faults of the bad prompt. Ensure this list consists of comma-separated entries, which can either be keywords or succinct summaries that spotlight the bad prompt's shortcomings. To make the list use the observations from step 3 and step 4. Provide your final summary after the new line phrase "Final Summary: The bad prompt "
Let's look at an example now:
"""

# final output file - contains only final summarized reasons
f = open("Short_Summaries.txt", "w")
# this file contains more details including the three "experts"
f2 = open("ALL SHORT SUMMARIES OPERATIONS.txt", "w")
# create separate prompts for each example
for idx, (ge, be, re, pt) in tqdm(enumerate(zip(good_examples, bad_examples, reasons, prompt_types)), total = len(good_examples), desc = "Summarizing"):
    test_string =  "###Example###\n###Bad Prompt###\n" + be
    test_string += "###Reason###\n" + re 
    test_string += "###Good Prompt Type###\n" + pt
    test_string += "###Better Prompt###\n" + ge   
    test_string += "###Actionable Steps###\n"
    prompt = summary_prompt + test_string
    
    response = llm.predict(prompt)
    try:
        # split into final summary and all other pieces
        resp_parts = response.split("Final Summary:")
        actions, reasons = " ".join(resp_parts[:-1]), " ".join(resp_parts[-1:])
    except:
        actions, reasons = "", response + "<<<----- There was an error here"
        print("An error at: ", idx + 1)
    actions, reasons = actions.strip(), reasons.strip()
    #print(response)
    _ = f.write(reasons + "\n")
    _ = f2.write(response.replace("\n", "-----") + "\n")
    f.flush()
    f2.flush()

