locations = ["early", "mid", "late"]
durations = ["long", "medium", "short"]

entities = ["adult",
            "baby",
            "bag",
            "ball",
            "ballon",
            "basket",
            "bat",
            "bed",
            "bench",
            "beverage",
            "bike",
            "bird",
            "blanket",
            "board",
            "book",
            "bottle",
            "bowl",
            "box",
            "bread",
            "brush",
            "bucket",
            "cabinet",
            "cake",
            "camera",
            "can",
            "candle",
            "car",
            "card",
            "carpet",
            "cart",
            "cat",
            "cellphone",
            "chair",
            "child",
            "chopstick",
            "cloth",
            "computer",
            "condiment",
            "cookie",
            "countertop",
            "cover",
            "cup",
            "curtain",
            "dog",
            "door",
            "drawer",
            "dustbin",
            "egg",
            "fan",
            "faucet",
            "fence",
            "flower",
            "fork",
            "fridge",
            "fruit",
            "gift",
            "glass",
            "glasses",
            "glove",
            "grain",
            "guitar",
            "hat",
            "helmet",
            "horse",
            "iron",
            "knife",
            "light",
            "lighter",
            "mat",
            "meat",
            "microphone",
            "microwave",
            "mop",
            "net",
            "noodle",
            "others",
            "oven",
            "pan",
            "paper",
            "piano",
            "pillow",
            "pizza",
            "plant",
            "plate",
            "pot",
            "powder",
            "rack",
            "racket",
            "rag",
            "ring",
            "scissor",
            "shelf",
            "shoe",
            "simmering",
            "sink",
            "slide",
            "sofa",
            "spatula",
            "sponge",
            "spoon",
            "spray",
            "stairs",
            "stand",
            "stove",
            "switch",
            "table",
            "teapot",
            "towel",
            "toy",
            "tray",
            "tv",
            "vaccum",
            "vegetable",
            "washer",
            "window"
            "ceiling",
            "floor",
            "grass",
            "ground",
            "rock",
            "sand",
            "sky",
            "snow",
            "tree",
            "wall",
            "water"
        ]


user = '''
You are a super user in logic programming. 
'''

context = f'''
The entites in the video can be: {', '.join(entities)}.
'''

example1 = '''
Caption: A man carries a child and walks to the left from behind a woman holding another child.
Entity Description json:
{   
    "caption": "A man carries a child and walks to the left from behind a woman holding another child.",
    "occured entities": ["man", "child", "woman", "adult"],
    "unlikely occur entities": ["stove", "bat", "bed", "spatula", "snow"]
}
'''

example2 = '''
Caption: "A woman is teasing a kitten with a piece of meat, and the kitten is peeking its head from a chair to look at the meat."
Entity Description json:
{
    "caption": "A woman is teasing a kitten with a piece of meat, and the kitten is peeking its head from a chair to look at the meat.",
    "occured entities": ["woman", "cat", "kitten", "meat", "chair"],
    "unlikely occur entities": ["horse", "glove", "racket", "powder", "piano"]
}
'''


example3 = '''
Caption: "The young boy walks through the television to pick up a gift."
Entity Description json:
{
    "caption": "The young boy walks through the television to pick up a gift.",
    "occured entities": ["boy", "television", "gift"],
    "unlikely occur entities": ["faucet", "brush", "card", "cart", "helmet"]
}  
'''

query = '''
Given the caption summarize the entities that occured in the scene and infer 5 unlikely and diverse entities occuring in the scene.
Please generate the entity description json programs for the following captions in the following format:
{"captions": {caption_id: caption json programs}}
'''
    
all_examples = [example1, example2, example3]
few_shot_prompt = '\n'.join(all_examples)
prompt = '\n'.join([user, context, few_shot_prompt, query])

def wrap_prompt(caption_ls, few_shot=True):
    output_prompt = [context]
    if few_shot:
        output_prompt.append(few_shot_prompt)
        
    output_prompt.append(query)
    
    for cid, caption in enumerate(caption_ls): 
        output_prompt.append(f"{cid}. {caption}")

    return '\n'.join(output_prompt)

if __name__ == "__main__":
    
    print(prompt)