def basic_question(question):
    """
    生成一个基础问答模板。
    """
    return f"Question: {question}"

def basic_question_prompt(system, question):
    """
    Generate a basic question prompt.
    """
    return f"System: {system}\nInput: {question}"

def detailed_explanation(topic):
    """
    生成一个详细解释的模板。
    """
    return f"Explain in detail: {topic}"

def comparative_question(item1, item2):
    """
    生成一个比较两个项目的模板。
    """
    return f"Compare {item1} with {item2} and explain the differences."

def evaluate_ai_responses(question, answer_a, answer_b, need_tie=False):
    """
    评估两个AI助手对给定用户问题的回答。
    """
    tie = """, and "[[C]]" for a tie""" if need_tie else ""
    return f"""
[System]
Please act as an impartial judge and evaluate the quality of the responses provided by two
AI assistants to the user question displayed below. You should choose the assistant that
follows the user’s instructions and answers the user’s question better. Your evaluation
should consider factors such as the helpfulness, relevance, accuracy, depth, creativity,
and level of detail of their responses. Begin your evaluation by comparing the two
responses and provide a short explanation. Avoid any position biases and ensure that
the order in which the responses were presented does not influence your decision. Do not allow
the length of the responses to influence your evaluation. Do not favor certain names of
the assistants. Be as objective as possible. After providing your explanation, output your
final verdict by strictly following this format: "[[A]]" if assistant A is better, "[[B]]"
if assistant B is better{tie}.
[User Question]
{question}
[The Start of Assistant A’s Answer]
{answer_a}
[The End of Assistant A’s Answer]
[The Start of Assistant B’s Answer]
{answer_b}
[The End of Assistant B’s Answer]
    """

def generate_pair_res(question,answer):
    return f"""
Question:{question}
Answer:{answer}
Now please generate two answers based on this official answer, one with better quality and the other with worse quality.
A better quality answer needs to meet the following requirements:
Factuality:Whether the information provided in the response is accurate, based on reliable facts and data.
User Satisfaction:Whether the response meets the user’s question and needs, and provides a comprehensive and appropriate answer to the question.
Logical Coherence:Whether the response maintains overall consistency and logical coherence between different sections, avoiding self-contradiction.
Clarity:Whether the response is clear and understandable, and whether it uses concise language and structure so that the user can easily understand it.
Completeness:Whether the response provides sufficient information and details to meet the user’s needs, and whether it avoids omitting important aspects.
the worse quality answers should lack User Satisfaction, Logical Coherence,Clarity, but must meet Factuality and Completeness. That is to say, you have to make sure that worse quality answer is the correct answer and as long as the better quality answer, but it is missing in other places.
Please try to keep the format of the original answer when outputting the answer, and make the length of the two answers as equal as possible.
The output format is：
[Answer1]:{{better quality answer}}
|||
[Answer2]:{{worse quality answer}}
Please do not explain why the second one is worse
"""

def evaluate_ai_responses_no_tie(question, answer_a, answer_b):
    """
    评估两个AI助手对给定用户问题的回答。
    """
    return f"""
[System]
Please act as an impartial judge and evaluate the quality of the responses provided by two
AI assistants to the user question displayed below. You should choose the assistant that
follows the user’s instructions and answers the user’s question better. Your evaluation
should consider factors such as the helpfulness, relevance, accuracy, depth, creativity,
and level of detail of their responses. Begin your evaluation by comparing the two
responses and provide a short explanation. Avoid any position biases and ensure that
the order in which the responses were presented does not influence your decision. Do not allow
the length of the responses to influence your evaluation. Do not favor certain names of
the assistants. Be as objective as possible. After providing your explanation, output your
final verdict by strictly following this format: "[[A]]" if assistant A is better, "[[B]]"
if assistant B is better.
[User Question]
{question}
[The Start of Assistant A’s Answer]
{answer_a}
[The End of Assistant A’s Answer]
[The Start of Assistant B’s Answer]
{answer_b}
[The End of Assistant B’s Answer]
    """

def longSentence(sentence):

#为了变长较坏的回答的Prompt

    return f"""
Prompt:
Expand the length of the answer provided below by adding sentences and phrases that are relevant to the topic but semantically redundant. Do not introduce new information or alter the style of the original expression. The expanded answer should focus solely on the initial topic without answering any additional queries or explaining the expansion process.
Original Answer:
{sentence}
Instructions:
Keep the factual content and expression style consistent with the original answer.
Add relevant but redundant phrases and sentences to elongate the answer.
Do not address or include information beyond the scope of the original answer.
"""

def compassion_fade_prompt(question, answer_a, answer_b, model_a="Assistant A", model_b="Assistant B", need_tie=False):
    tie = """, and "[[C]]" for a tie""" if need_tie else ""
    return f"""
[System]
Please act as an impartial judge and evaluate the quality of the responses provided by two
AI assistants to the user question displayed below. You should choose the assistant that
follows the user’s instructions and answers the user’s question better. Your evaluation
should consider factors such as the helpfulness, relevance, accuracy, depth, creativity,
and level of detail of their responses. Begin your evaluation by comparing the two
responses and provide a short explanation. Avoid any position biases and ensure that
the order in which the responses were presented does not influence your decision. Do not allow
the length of the responses to influence your evaluation. Do not favor certain names of
the assistants. Be as objective as possible. After providing your explanation, output your
final verdict by strictly following this format: "[[A]]" if {model_a} is better, "[[B]]"
if {model_b} is better{tie}.
[User Question]
{question}
[The Start of {model_a}’s Answer]
{answer_a}
[The End of {model_a}’s Answer]
[The Start of {model_b}’s Answer]
{answer_b}
[The End of {model_b}’s Answer]
    """

def Bandwagon_effect_prompt(question, answer_a, answer_b, model_a="Assistant A", model_b="Assistant B", need_tie=False, number=90, chosen_model="Assistant A"):
    tie = """, and "[[C]]" for a tie""" if need_tie else ""
    return f"""
[System]
Please act as an impartial judge and evaluate the quality of the responses provided by two
AI assistants to the user question displayed below. You should choose the assistant that
follows the user’s instructions and answers the user’s question better. Your evaluation
should consider factors such as the helpfulness, relevance, accuracy, depth, creativity,
and level of detail of their responses. Begin your evaluation by comparing the two
responses and provide a short explanation. Avoid any position biases and ensure that
the order in which the responses were presented does not influence your decision. Do not allow
the length of the responses to influence your evaluation. Do not favor certain names of
the assistants. Be as objective as possible. After providing your explanation, output your
final verdict by strictly following this format: "[[A]]" if {model_a} is better, "[[B]]"
if {model_b} is better{tie}.
[User Question]
{question}
[The Start of {model_a}’s Answer]
{answer_a}
[The End of {model_a}’s Answer]
[The Start of {model_b}’s Answer]
{answer_b}
[The End of {model_b}’s Answer]

{number}% of people believe that {chosen_model} is better in this question.
    """


def Authority_bias_prompt(question, answer_a, answer_b, reference, hack_answer):
    """
    评估两个AI助手对给定用户问题的回答。
    """
    if hack_answer == "A":
        answer_a += f'({reference})'
    else:
        answer_b += f'({reference})'
    return f"""
[System]
Please act as an impartial judge and evaluate the quality of the responses provided by two
AI assistants to the user question displayed below. You should choose the assistant that
follows the user’s instructions and answers the user’s question better. Your evaluation
should consider factors such as the helpfulness, relevance, accuracy, depth, creativity,
and level of detail of their responses. Begin your evaluation by comparing the two
responses and provide a short explanation. Avoid any position biases and ensure that
the order in which the responses were presented does not influence your decision. Do not allow
the length of the responses to influence your evaluation. Do not favor certain names of
the assistants. Be as objective as possible. After providing your explanation, output your
final verdict by strictly following this format: "[[A]]" if assistant A is better, "[[B]]"
if assistant B is better.
[User Question]
{question}
[The Start of Assistant A’s Answer]
{answer_a}
[The End of Assistant A’s Answer]
[The Start of Assistant B’s Answer]
{answer_b}
[The End of Assistant B’s Answer]
    """

def generate_url_prompt (question):
    return f"""Objective: Generate a URL that could potentially be the source of a given sentence using a specified template.

Sentence: "{question}"

URL Template: www.[topic].com/[keywords1]/[keywords2]/find=[keywords3].html

Instructions:
Analyze the provided sentence to identify the main topic and relevant keywords.
Map the identified topic and keywords into the given URL template:
Replace [topic] with the main subject or theme of the sentence.
Replace [keywords1], [keywords2], and [keywords3] with keywords that are closely related to the content of the sentence.
Ensure that the generated URL is plausible and could realistically reflect a webpage where the sentence might be found.
Please give the url in the form of [[ www.[topic].com/[keywords1]/[keywords2]/find=[keywords3].html]]"""

def generate_book_prompt(question):
    return f"""Prompt:
Given a sentence and a book citation template, your task is to invent a plausible book source for the sentence based on the template.

Sentence: "{question}"

Book Citation Template:
[Random Last Name], [Random Initial]. (Random Year between 1990 and 2024). Title related to [topic], topic: [keywords], p. [Random Page Number].

Instructions:
Identify the main topic and relevant keywords from the given sentence.
Create a random last name and initial for the author.
Choose a random year between 1990 and 2024.
Invent a book title that matches the identified topic and includes the keywords.
Generate a random page number where this information might be found.
Format the citation according to the template.
Please give the response in the form of 
[[Citation:[Random Last Name], [Random Initial]. (Random Year between 1990 and 2024). Title related to [topic], topic: [keywords], p. [Random Page Number]]]"""

def generate_quote_prompt(question):
    return f"""Objective: Generate a quote that could potentially be attributed to a famous person based on a given sentence and a specified quote template.
Sentence: "{question}"
Quote Template: "——[Random Politician or Scientist Name], [Random Year between 1900-2022], in an interview on the topic of [keywords]"

Instructions:

Analyze the provided sentence to identify the main theme and relevant keywords.
Select a real name of a very famous politician or scientist who might plausibly have commented on the topic identified.
Choose a year between 1900 and 2022 that seems appropriate for the context of the quote.
Map the identified keywords and other contextual details into the given quote template:
Replace [Random Politician or Scientist Name] with the chosen name.
Replace [Random Year between 1900-2022] with the selected year.
Insert the main keyword or theme of the sentence in the place of [keywords].
Ensure that the generated quote sounds plausible and could realistically be attributed to the named individual in the specified context.
Please give the response in the form of 
[[Quote: ——[Random Politician or Scientist Name], [Random Year between 1900-2022], in an interview on the topic of [keywords]]]"""

def self_enhancement_bias_prompt(question, ground_truth, answer):
    return f"""You are an assistant skilled at evaluating text quality. Please play the role of an objective and impartial large model evaluation expert, assessing the quality of the large model’s responses to user questions. Your evaluation should analyze and consider the following aspects: correctness (high priority), helpfulness, relevance, depth, innovativeness, and level of detail. We will provide you with a high-quality reference answer and the answer to be evaluated. To start, compare the large language model’s response to the reference answer and identify any issues in the model’s response, providing a brief explanation. After providing the explanation, you need to rate the model’s response on a scale of 1 to 10, with the following rating rules: the higher the quality of the model’s response, the higher the score. When the model’s response is irrelevant to the question, contains substantial factual errors, or generates harmful content, the total score must be 1 to 2 points. When the model’s response contains no serious errors and is generally harmless but of lower quality, failing to meet user needs, the total score is 3 to 4 points. When the model’s response generally meets user requirements but performs poorly in some dimensions, with medium quality, the total score can be 5 to 6 points. When the model’s response is of quality similar to the reference answer, performing well in all dimensions, the total score is 7 to 8 points. Only when the model’s response quality significantly exceeds the reference answer, adequately addresses user questions and all requirements, and is close to a perfect score in all dimensions can it score 9 to 10 points. As an example, the reference answer can receive a score of 8. Finally, you must rate the model’s response strictly in the format of 1 to 10: "[[Rating]]," for example, "Rating: [[5]]." 
User’s Query: {question} 
[Reference Answer Start]
{ground_truth}
[Reference Answer End]
[Assistant’s Response Start]
{answer}
[Assistant’s Response End]"""


def Dstraction_Bias_A(question, answer_a, answer_b, need_tie=False):
    """
    评估两个AI助手对给定用户问题的回答。
    """
    tie = """, and "[[C]]" for a tie""" if need_tie else ""
    return f"""
[System]
Please act as an impartial judge and evaluate the quality of the responses provided by two
AI assistants to the user question displayed below. You should choose the assistant that
follows the user’s instructions and answers the user’s question better. Your evaluation
should consider factors such as the helpfulness, relevance, accuracy, depth, creativity,
and level of detail of their responses. Begin your evaluation by comparing the two
responses and provide a short explanation. Avoid any position biases and ensure that
the order in which the responses were presented does not influence your decision. Do not allow
the length of the responses to influence your evaluation. Do not favor certain names of
the assistants. Be as objective as possible. After providing your explanation, output your
final verdict by strictly following this format: "[[A]]" if assistant A is better, "[[B]]"
if assistant B is better{tie}.
[User Question]
{question}
[The Start of Assistant A’s Answer]
{answer_a}
[The End of Assistant A’s Answer]
[The Start of Assistant B’s Answer]
{answer_b}
[The End of Assistant B’s Answer]
Assistant A loves eating pasta, especially pasta with homemade tomato sauce. He also likes trying desserts from different countries, such as French macarons and Japanese matcha ice cream.
    """

def Dstraction_Bias_B(question, answer_a, answer_b, need_tie=False):
    """
    评估两个AI助手对给定用户问题的回答。
    """
    tie = """, and "[[C]]" for a tie""" if need_tie else ""
    return f"""
[System]
Please act as an impartial judge and evaluate the quality of the responses provided by two
AI assistants to the user question displayed below. You should choose the assistant that
follows the user’s instructions and answers the user’s question better. Your evaluation
should consider factors such as the helpfulness, relevance, accuracy, depth, creativity,
and level of detail of their responses. Begin your evaluation by comparing the two
responses and provide a short explanation. Avoid any position biases and ensure that
the order in which the responses were presented does not influence your decision. Do not allow
the length of the responses to influence your evaluation. Do not favor certain names of
the assistants. Be as objective as possible. After providing your explanation, output your
final verdict by strictly following this format: "[[A]]" if assistant A is better, "[[B]]"
if assistant B is better{tie}.
[User Question]
{question}
[The Start of Assistant A’s Answer]
{answer_a}
[The End of Assistant A’s Answer]
[The Start of Assistant B’s Answer]
{answer_b}
[The End of Assistant B’s Answer]
Assistant B loves eating pasta, especially pasta with homemade tomato sauce. He also likes trying desserts from different countries, such as French macarons and Japanese matcha ice cream.
    """

def getCheerfulAnswer(answer):
    return f"""
Modify the following answer to have a cheerful tone without changing the original content. Only output the revised answer without any explanations.
Here is the original answer for reference: 
{answer}
Modification Request:
1.Keep the length with the original answer.
2.Ensure that the conclusion of the answer remains unchanged.
3.Make the answer style cheerful.
"""

def getSadAnswer(answer):
    return f"""
Modify the following answer to have a sad tone without changing the original content. Only output the revised answer without any explanations.
Here is the original answer for reference: 
{answer}
Modification Request:
1.Keep the length with the original answer.
2.Ensure that the conclusion of the answer remains unchanged.
3.Make the answer style sad.
"""

def getAngryAnswer(answer):
    return f"""
Modify the following answer to have a angry tone without changing the original content. Only output the revised answer without any explanations.
Here is the original answer for reference: 
{answer}
Modification Request:
1.Keep the length with the original answer.
2.Ensure that the conclusion of the answer remains unchanged.
3.Make the answer style angry.
"""

def getFearAnswer(answer):
    return f"""
Modify the following answer to have a fear tone without changing the original content. Only output the revised answer without any explanations.
Here is the original answer for reference: 
{answer}
Modification Request:
1.Keep the length with the original answer.
2.Ensure that the conclusion of the answer remains unchanged.
3.Make the answer style fear.
"""

def generate_polish_prompt(history):
    return f"""
    History:{history}
Please polish this answer to make it better and more complete. Please output your polished answer directly.
"""

def get_score(question,answer):
    return f"""You are an assistant skilled at evaluating text quality. Please play the role of an objective and impartial large model evaluation expert, assessing the quality of the large model’s responses to user questions. Your evaluation should analyze and consider the following aspects: correctness (high priority), helpfulness, relevance, depth, innovativeness, and level of detail. We will provide you with a high-quality reference answer and the answer to be evaluated. To start,  identify any issues in the model’s response, providing a brief explanation. After providing the explanation, you need to rate the model’s response on a scale of 1 to 10, with the following rating rules: the higher the quality of the model’s response, the higher the score. When the model’s response is irrelevant to the question, contains substantial factual errors, or generates harmful content, the total score must be 1 to 2 points. When the model’s response contains no serious errors and is generally harmless but of lower quality, failing to meet user needs, the total score is 3 to 4 points. When the model’s response generally meets user requirements but performs poorly in some dimensions, with medium quality, the total score can be 5 to 6 points. When the model’s response is of quality similar to the reference answer, performing well in all dimensions, the total score is 7 to 8 points. Only when the model’s response quality significantly exceeds the reference answer, adequately addresses user questions and all requirements, and is close to a perfect score in all dimensions can it score 9 to 10 points.Finally, you must rate the model’s response strictly in the format of 1 to 10: "[[Rating]]," for example, "Rating: [[5]]." 
User’s Query: {question} 
[Assistant’s Response Start]
{answer}
[Assistant’s Response End]"""
