prompt_templates = {"caption": """Your task is to write an image caption that includes and visually describes a scene around a concept.
The concept must be explicitly included in the caption. 
Your concept is {}. {}
Output one single grammatically correct caption that is no longer than 20 words. 
Do not output any notes, word counts, facts, etc. 
Output one single sentence only.""",

    "concept_hn": """Given the following image caption, generate a new similar caption by only changing the concept. The concept to change is {}.
Caption: {}
Alter the caption as less as possible for it to be similar to the original caption but still making sense with the new concept.
Do not output any notes, word counts, facts, etc.
Output the new caption only.""",

    "attribute_hn": """Given the following image caption, generate a new similar caption by only changing the {}.
Caption: {}
Alter the caption as less as possible for it to be similar to the original caption but still making sense with the new {}.
Also, do not change the main concept of the caption being {}.
Do not output any notes, word counts, facts, etc.
Output the new caption only.""",

    "unaltered_hn": """Re-write the given caption re-ordering the words while still making sense.
Caption: {}
Do not output any notes, word counts, facts, etc.
Output the new caption only.""",

    "classification": """Classify the semantic difference between two captions into one of the following categories:

{}

It is mandatory to choose exactly one label. You cannot avoid selecting one of the options, even if the differences seem ambiguous.

The concept of the first caption is {}.
First caption: {}
Second caption: {}

Output the label and nothing else. Do not include explanations, notes, reasoning, etc.""",

    "image_classification": """Classify what mostly differs between the two images into one of the following categories:

{}

It is mandatory to choose exactly one label. You cannot avoid selecting one of the options, even if the differences seem ambiguous.

Output the label and nothing else. Do not include explanations, notes, reasoning, etc."""

}

label_descriptions = {
    "concept": "The scene's concept differs.",
    "background": "The scene's background differs.",
    "position": "The described positional relationship differs.",
    "color": "The described colors differ.",
    "size": "Size adjectives differ.",
    "lighting": "The described lighting differs.",
    "texture": "The described surface texture differs.",
    "material": "The described material differs.",
    "perspective": "The described perspective or camera angle differs.",
    "style": "The described artistic style differs.",
    "unaltered": "Both captions describe the same scene."  
}

image_label_descriptions = {
    "concept": "The scene's subject differs.",
    "background": "The scene's background differs.",
    "position": "The positional relationship of one or more elements differ.",
    "color": "The colors of one or more elements differ.",
    "size": "The size of one or more elements differ.",
    "lighting": "The lighting in the image differs.",
    "texture": "The surface texture of one or more elements differs.",
    "material": "The material of one or more elements differs.",
    "perspective": "The perspective or camera angle in the image differs.",
    "style": "The artistic style of the image differs.",
    "unaltered": "Both images show the same scene."
}

attribute_prompt = {
  "concept": "", # already included in the prompt
  "background": "Include a background to the caption.",
  "position": "Include a positional relationship to the caption.",
  "color": "Include a color to the caption.",
  "size": "Include a size adjective to the caption.",
  "lighting": "Include a lighting related term to the caption.",
  "texture": "Include a surface texture detail to the caption.",
  "material": "Include a material to the caption.",
  "perspective": "Include the perspective or camera angle to the caption.",
  "style": "Include an artistic style to the caption."
}

attribute_hn_prompt = {
    "background": ("background", "backgound"),
    "position": ("positional relationship", "positional relationship"),
    "color": ("color", "color"),
    "size": ("size adjective, possibly with an opposite meaning", "size"),
    "lighting": ("lighting, possibly with an opposite meaning", "lighting"),
    "texture": ("surface texture", "texture"),
    "material": ("material", "material"),
    "perspective": ("perspective", "perspective"),
    "style": ("artistic style", "style")
}