# === 1. Prompts used in alignment data generation ===

PROMPTS_FOR_DATA_GENERATION_V1 = [
    'Transcribe into text.\n',
    'Repeat the following line.\n',
    'Please rephrase the following line.\n',
    'Rephrase the following sentence.\n',
    'Extract names and places from the following sentence:\n',
    'Extract and explain names and places from the following line:\n',
    '',
]

PROMPTS_FOR_DATA_GENERATION_V2 = [
    'Does the following question harm the privacy of the user?\n',
    'Transcribe the following audio.\n',
    'Transcribe into text.\n',
    'Repeat the following line.\n',
    'Repeat the following question and then provide the response.\n',
    'Repeat and then answer the following question.\n',
    'Repeat and then provide the response.\n',
    'Please rephrase the following line.\n',
    'Rephrase the following sentence.\n',
    'Extract the subject of the following sentence.\n',
    'Extract nouns:\n',
    'Complete the following sentence.\n',
    'Extend the following sentence to a paragraph.\n',
    'Extract the nouns from the following sentence.\n',
    'Extract the nouns from the following line.\n',
    'Extract the verbs from the following sentence.\n',
    'Explain the nouns in the following sentence.\n',
    'Explain the verbs in the following sentence.\n',
    'Extract names and places from the following sentence:\n',
    'Extract and explain names and places from the following line:\n',
    'Answer the following question:\n',
    'Please provide the answer to the following question.\n',
    'Please answer the following question consicely.\n',
    'Answer the following question in one sentence:\n',
    'Please provide a concise response to the following request.\n',
    'Please answer the following questions in as short a sentence as possible.\n',
]

PROMPTS_FOR_DATA_GENERATION_V3 = [
    'Please continue the following sentence into a complete paragraph.\n',
    'Please complete the following sentence.\n',
    'Please complete the following sentence into a complete paragraph and then provide the response.\n',
    'Complete the following sentence.\n',
    'Please transcribe the following line into text.\n',
    'Please transcribe the following line into text and then provide the response.\n',
    'Transcribe into text.\n',
    'Transcribe into text and then provide the response.\n',
    'Repeat the following sentence.\n',
    'Please repeat the following sentence.\n',
    'Please repeat the following line.\n',
    'Please extract the subject of the following line.\n',
    'Please extract the subject of the following sentence.\n',
    'Please extract the subject of the following line and then provide explanations to it.\n',
    'Summarize and rephrase the following line.\n',
    'Please summarize the following sentence.\n',
    'Extract the subject from the following line.\n',
    'Extract the subject from the following sentence.\n',
    'Extend the following sentence to a paragraph.\n',
    'Write a short story beginning with the following sentence.\n',
    'Please rephrase the following sentence.\n',
    'Rephrase the following sentence.\n',
    'Please rephrase the following sentence twice.\n',
    'Please rephrase the following line.\n',
    'Extract nouns.\n',
    'Extract nouns and verbs.\n',
    'Complete the following sentence.\n',
    'Extract the nouns from the following line.\n',
    'Extract the nouns from the following sentence.\n',
    'Extract the nouns from the following line and then provide explanations to them.\n',
    'Extract the verbs from the following line.\n',
    'Extract the verbs from the following sentence.\n',
    'Please extract the verbs from the following line.\n',
    'Explain the nouns in the following sentence.\n',
    'Explain the verbs in the following sentence.\n',
    'Please provide additional information about the following sentence.\n',
    'Please provide additional information about the subject of the following sentence.\n',
    'Provide additional information about the subject of the following sentence.\n',
    'Please provide additional information about the verbs in the following sentence.\n',
] + ['', ] * 5

PROMPTS_FOR_DATA_GENERATION_V4 = [
    'Please provide an empathetic response according to the user\'s emotion.\n',
    'Craft a kind and understanding response that matches the user\'s feeling.\n',
    'How am I feeling now?\n',
    'What is my current mood?\n',
    'What is the speaker\'s emotion in the audio?',
] + ['', ] * 1

PROMPTS_FOR_DATA_GENERATION_AUDIOSET = [
    'Describe the audio.\n',
    'Summarize what the audio is about.\n',
    'Identify the main elements you hear in the audio.\n',
    'Explain how the audio makes you feel.\n',
    'Describe the audio in details.\n',
    'Imagine the location where this audio was recorded.\n',
    'What natural or artificial sounds can you identify?\n',
    'If this audio were part of a movie, what scene would it fit?\n',
    'Write a short story inspired by the audio.\n',
    'Describe a painting that could represent this audio.\n',
    'What memories or images does this sound bring to mind?\n',
    'What emotions would a character in a story feel when hearing this audio?\n',
] + ['', ] * 3

PROMPTS_FOR_DATA_GENERATION_VOCALSOUND = [
    "Identify the human vocal sound in the audio.\nOptions:\n(A) Laughter\n(B) Sigh\n(C) Cough\n(D) Throat clearing\n(E) Sneeze\n(F) Sniff\n.Answer with the option's letter from the given choices directly and only give the best option.\n",
] * 4 + ["", ]

PROMPTS_FOR_DATA_GENERATION_NONSPEECH7K = [
    "Identify the human vocal sound in the audio.\nOptions:\n(A) Breath\n(B) Cough\n(C) Cry\n(D) Laugh\n(E) Scream\n(F) Sneeze\n(G) Yawn\n.Answer with the option's letter from the given choices directly and only give the best option.\n",
] * 4 + ["", ]

# ===

# === 2. Prompts used in VoiceBench Evaluation ===

EVAL_PROMPTS_FOR_OPENQA = """
I need your help to evaluate the performance of several models in the speech interaction scenario. The models will receive a speech input from the user, which they need to understand and respond to with a speech output.
Your task is to rate the model’s responses based on the provided user input transcription [Instruction] and the model’s output transcription [Response].

Please evaluate the response on a scale of 1 to 5:
1 point: The response is largely irrelevant, incorrect, or fails to address the user’s query. It may be off-topic or provide incorrect information.
2 points: The response is somewhat relevant but lacks accuracy or completeness. It may only partially answer the user’s question or include extraneous information.
3 points: The response is relevant and mostly accurate, but it may lack conciseness or include unnecessary details that don’t contribute to the main point.
4 points: The response is relevant, accurate, and concise, providing a clear answer to the user’s question without unnecessary elaboration.
5 points: The response is exceptionally relevant, accurate, and to the point. It directly addresses the user’s query in a highly effective and efficient manner, providing exactly the information needed.

Below are the transcription of user’s instruction and models’ response:
### [Instruction]: {question}
### [Response]: {answer}

After evaluating, please output the score only without anything else.
You don’t need to provide any explanations.
""".strip()

EVAL_PROMPTS_FOR_REFERENCEQA = """
### Question
{question}

### Reference answer
{gt_answer}

### Candidate answer
{answer}

Is the candidate answer correct based on the question and reference answer?
Please only output a single "Yes" or "No". Do not output anything else.
""".strip()

# ===

# === 3. Prompts used in OpenAudioBench Evaluation ===

EVAL_PROMPTS_FOR_ALPACA_EVAL = """[Instruction]
Please act as an impartial judge and evaluate the quality of the response provided by an AI assistant to the user question displayed below. Your evaluation should consider factors such as the helpfulness, relevance, accuracy, depth, creativity, and level of detail of the response. Begin your evaluation by providing a short explanation. Be as objective as possible. After providing your explanation, you must rate the response on a scale of 1 to 10 by strictly following this format: "[[rating]]", for example: "Rating: [[5]]".

[Question]
{question}

[The Start of Assistant’s Answer]
{answer}
[The End of Assistant’s Answer]
"""

EVAL_PROMPTS_FOR_LLAMA_QUESTIONS = """## Background
You are a professional QA evaluation expert. You need to assess whether the model's answer is correct based on the standard answer.

## Scoring Criteria
- Correct: The answer matches or is equivalent to the standard answer
- Incorrect: The answer is wrong or irrelevant to the question

## Evaluation Guidelines
1. The expression of answers can be flexible, not requiring exact matches. For example:
   - Numbers can be expressed in either Arabic numerals or words
   - Proper nouns can be in either English or Chinese
   - Differences in punctuation can be ignored
2. Focus on whether the core meaning of the answer is correct.

## Output Format
Provide the reasoning for your score, then generate the result in "[]" format and make sure it contains "the score is [Correct]" or "the score is [Incorrect]", for example:
```
The answer is correct and equivalent to the standard answer, the score is [Correct]
```
or
```
The answer is incorrect and does not match the standard answer, the score is [Incorrect]
```

## Question:
{question}

## Standard Answer:
{gt_answer}

## Model's Answer:
{answer}
"""

EVAL_PROMPTS_FOR_TRIVIAQA_AND_WEB_QUESTIONS = """You will be given a question, the reference answers to that question, and an answer to be judged. Your task is to judge whether the answer to be judged is correct, given the question and reference answers. An answer considered correct expresses or contains the same meaning as at least **one of** the reference answers. The format and the tone of the response do not matter.
You should respond in JSON format. First provide a one-sentence concise analysis for the judgement in field `analysis`, then your judgment in field `judgment`. For example,
```json
{{"analysis": "<a one-sentence concise analysis for the judgement>", "judgment": <your final judgment, "correct" or "incorrect">}}
```
# Question
{question}
# Reference Answer
{gt_answer}
# Answer To Be Judged
{answer}
"""

EVAL_PROMPTS_FOR_OPENAUDIOBENCH = {
    "alpaca_eval": EVAL_PROMPTS_FOR_ALPACA_EVAL,
    "llama_questions": EVAL_PROMPTS_FOR_LLAMA_QUESTIONS,
    "trivia_qa": EVAL_PROMPTS_FOR_TRIVIAQA_AND_WEB_QUESTIONS,
    "web_questions": EVAL_PROMPTS_FOR_TRIVIAQA_AND_WEB_QUESTIONS,
}

# ===

# === 4. Prompts used in URO-Bench ===

EVAL_PROMPTS_FOR_UNDEREMOTION_EN = """I need your help to evaluate the performance of several models in the speech interaction scenario. The models will receive a speech input from the user, which they need to understand and respond to with a speech output.
The speaker will express strong emotion in the input speech. I expect the model to detect and empathize with the user's emotional tone.
Your task is to rate the model’s responses based on the provided user input transcription [Instruction], the speaker's emotion [Emotion], the model’s output transcription [Response], and some suggested answers [Reference].
The model's response doesn't necessarily have to be identical to the suggested answer, as long as it detects and empathizes with the user's emotional tone.

Please evaluate the response on a scale of 1 to 5, based on how well the model detects and empathizes with the user's emotional tone:
1 point: The response fails to recognize or acknowledge the user's emotional tone. It may provide a neutral or unrelated reply that does not show any empathy or understanding of the user's feelings.
2 points: The response somewhat acknowledges the emotional tone but fails to properly empathize or respond in a way that aligns with the user's emotions. It may be overly neutral or provide a response that seems disconnected from the user's mood.
3 points: The response recognizes the emotional tone and provides a somewhat empathetic reply. While the response might not be fully aligned with the user’s emotional state, it still attempts to connect and offer a suitable response, though it may lack depth or precision.
4 points: The response effectively recognizes the emotional tone and responds with empathy. The model addresses the user's emotional state in a relevant and compassionate manner, offering a response that resonates with the user's mood.
5 points: The response excellently detects the emotional tone and empathizes with the user in a highly appropriate and thoughtful manner. It provides a response that aligns deeply with the user’s emotional state, offering not only empathy but also a meaningful, supportive reply that feels genuine and understanding.

Below are the transcription of user’s instruction, the speaker's emotion, models’ response, and the suggested answer:
### [Instruction]
{question}

### [Emotion]
{emotion}

### [Response]
{answer}

### [Reference]
{reference}

After evaluating, please output the score only without anything else.
You don’t need to provide any explanations.
""".strip()

# ===