from textwrap import dedent
from collections.abc import Sequence

prompt = """
    You are a professional video analysis expert, specialized in answering questions based on video content. Please answer the following question based **strictly** on the video provided. Ensure that your response is based on the video itself, and not on your own guesses or general knowledge. 
    
    You will be provide some yes/no questions related to the video. Your answer should be in 'true', 'false' or 'nan'. Besides, you should provide a brief explanation or evidence for your answer.
    
    You should answer 'nan' if:
    1. The video quality is too low, or the content is too unclear to make any meaningful inference.
    2. The content in the video is not continuous or complete. The temporal and spatial discontinuities in the video make it impossible to make reasonable predictions.
    3. The question asks about something that cannot be observed or recognized in the video (e.g., an object, event, or action that is not present).
    4. The video does not provide enough context or evidence to form a conclusion.
    5. The answer is unclear or could be interpreted in multiple ways, leading to ambiguity.
    6. The question asks about an action, and the necessary prior action (for example, the ball hitting the ground before it can bounce) is not observed. Without the prior action, it is impossible to determine if the subsequent event occurred. 

    if you believe you can answer yes or no with a reasonable degree of confidence, you should not answer 'nan'. Especially, if the question asks about whether something is present, or an event has occurred, and the videos shows that it is absent or has not occurred, you should answer 'false' instead of 'nan'. For these questions, you can answer 'nan' only if the video quality is too low to make a meaningful inference.
   
    If the question asks about an object, and the object is not observed, answer 'false'. Do not answer 'nan'.
    
    For detect an action, you should refer to some continuous frames to make sure the action is happening, instead of just one frame.
    
    In addition, you should judge each question as independently as possible, and do not answer another question based on the content of another question. In particular, the content of another question itself should not be used as the basis for answering the current question.
    
    Based on the above guidelines, please answer the following questions:
"""

prompt = dedent(prompt)

def get_prompt(questions: str | Sequence[str]) -> str:
    if isinstance(questions, str):
        questions = [questions]
    questions = [f"{idx}. {question}" for idx, question in enumerate(questions, 1)]
    return prompt + "\n".join(questions)