locations = ["early", "mid", "late"]
durations = ["long", "medium", "short"]

user = '''
You are a super user in logic programming. 
'''

context = f'''
Here are some words that describes the event length and location that you can use. 
The location in the video can be: {', '.join(locations)}.
The duration of the event can be: {', '.join(durations)}
'''

example1 = '''
Caption: A man carries a child and walks to the left from behind a woman holding another child.
Action json:
{   
    "caption": "A man carries a child and walks to the left from behind a woman holding another child.",
    "sequential descriptions": [
        "man A carry child B, women C hold child D, man A is behind women C", 
        "man A walk"
        "man A at left",
        ],
    "time stamps": {
        "1": {
            "description": [
                "man A carry child B", 
                "women C hold child D", 
                "man A is behind women C"
            ],
            "programmatic": [
                "binary(carry, A, B)",
                "name(A, man)",
                "name(B, child)",
                "binary(hold, C, D)",
                "name(C, women)",
                "name(D, man)",
                "binary(behind, A, C)",
                
            ],
            "duration": "short",
            "video location": "early"
        },
        "2": {
            "description": [
               "man A walk"
            ],
            "programmatic": [
                "unary(walk, A)",
            ],
            "duration": "medium",
            "video location": "mid"
        },
        "3": {
            "description": [
               "man A at left"
            ],
            "programmatic": [
                "unary(left, A)",
            ],
            "duration": "short",
            "video location": "late"
        }
    },
}
'''

example2 = '''
Caption: The woman rocks and holds the child, singing a birthday song together with another woman to celebrate the birthday of the girl.
Action json:
{
    "caption": "The woman rocks and holds the child, singing a birthday song together with another woman to celebrate the birthday of the girl.",
    "sequential descriptions": [
        "woman A rocks and holds the child B, woman A and women C sings birthday song", 
        ],
    "time stamps": {
        "1": {
            "decription": [
                "woman A rocks and holds the child B, woman A and women C sings birthday song", 
            ],
            "programmatic": [
                "binary(rock, A, B)",
                "binary(hold, A, B)",
                "unary(sing, A)",
                "unary(sing, B)"
            ],
            "duration": "long",
            "video location": "mid"
        }
    }
}
'''

example3 = '''
Caption: "I adjusted my cellphone and continued playing the ukulele."
Action json:
{
    "caption": "I adjusted my cellphone and continued playing the ukulele.",
    "sequential descriptions": [
        "person A adjust cellphone B",
        "person A play ukulele C", 
        ],
    "time stamps": {
        "1": {
            "decription": [
                "person A adjust cellphone B",
            ],
            "programmatic": [
                "binary(adjust, A, B)",
                "name(A, person)",
                "name(B, cellphone)"
            ],
            "duration": "short",
            "video location": "early"
        },
        "2": {
            "decription": [
                "person A play ukulele C",
            ],
            "programmatic": [
                "binary(play, A, C)",
                "name(A, person)",
                "name(B, ukulele)"
            ],
            "duration": "long",
            "video location": "late"
        }
    }
}
'''

query = '''
Please generate the action json programs for the following captions in the following format:
{"actions": {caption_id: action json programs}}
'''
    
all_examples = [example1, example2, example3]
few_shot_prompt = '\n'.join(all_examples)
prompt = '\n'.join([user, context, few_shot_prompt, query])

def wrap_prompt(caption_ls, few_shot=True):
    output_prompt = [context]
    if few_shot:
        output_prompt.append(few_shot_prompt)
        
    output_prompt.append(query)
    
    for cid, caption in enumerate(caption_ls): 
        output_prompt.append(f"{cid}. {caption}")

    return '\n'.join(output_prompt)

if __name__ == "__main__":
    
    print(prompt)