locations = ["early", "mid", "late"]
durations = ["long", "medium", "short"]

user = '''
You are a super user in logic programming. 
'''

context = f'''
Here are some words that describes the event length and location that you can use. 
The location in the video can be: {', '.join(locations)}.
The duration of the event can be: {', '.join(durations)}.
'''

example1 = '''
Scene graph context: "Generic Video Scene, out door"
Caption: A man carries a child and walks to the left from behind a woman holding another child.
Action json:
{   
    "caption": "A man carries a child and walks to the left from behind a woman holding another child.",
    "sequential descriptions": [
        "man A carry child B, women C hold child D, man A is behind women C", 
        "man A walk"
        "man A at left",
        ],
    "time stamps": {
        "1": {
            "description": [
                "man A carry child B", 
                "women C hold child D", 
                "man A is behind women C"
            ],
            "programmatic": [
                "carrying(A, B)",
                "name(A, man)",
                "name(B, child)",
                "holding(C, D)",
                "name(C, women)",
                "name(D, man)",
                "behind(A, C)",
                
            ],
            "duration": "short",
            "video location": "early"
        },
        "2": {
            "description": [
               "man A walk"
            ],
            "programmatic": [
                "walk(A)",
            ],
            "duration": "medium",
            "video location": "mid"
        },
        "3": {
            "description": [
               "man A at left"
            ],
            "programmatic": [
                "left(A)",
            ],
            "duration": "short",
            "video location": "late"
        }
    },
}
'''

example2 = '''
Scene graph context: "Generic Video Scene, in door"
Caption: The woman rocks and holds the child, singing a birthday song together with another woman to celebrate the birthday of the girl.
Action json:
{
    "caption": "The woman rocks and holds the child, singing a birthday song together with another woman to celebrate the birthday of the girl.",
    "sequential descriptions": [
        "woman A rocks and holds the child B, woman A and women C sings birthday song", 
        ],
    "time stamps": {
        "1": {
            "decription": [
                "woman A rocks and holds the child B, woman A and women C sings birthday song", 
            ],
            "programmatic": [
                "rock(A, B)",
                "hold(A, B)",
                "sing(A)",
                "sing(B)"
            ],
            "duration": "long",
            "video location": "mid"
        }
    }
}
'''

example3 = '''
Scene graph context: "Generic Video Scene"
Caption: "I adjusted my cellphone and continued playing the ukulele."
Action json:
{
    "caption": "I adjusted my cellphone and continued playing the ukulele.",
    "sequential descriptions": [
        "person A adjust cellphone B",
        "person A play ukulele C", 
        ],
    "time stamps": {
        "1": {
            "decription": [
                "person A adjust cellphone B",
            ],
            "programmatic": [
                "adjust(A, B)",
                "name(A, person)",
                "name(B, cellphone)"
            ],
            "duration": "short",
            "video location": "early"
        },
        "2": {
            "decription": [
                "person A play ukulele C",
            ],
            "programmatic": [
                "play(A, C)",
                "name(A, person)",
                "name(B, ukulele)"
            ],
            "duration": "long",
            "video location": "late"
        }
    }
}
'''

example4 = '''
Scene graph context: "Generic Video Scene, indoor"
Caption: "A woman is teasing a kitten with a piece of meat, and the kitten is peeking its head from a chair to look at the meat."
Action json:
{
    "caption": "A woman is teasing a kitten with a piece of meat, and the kitten is peeking its head from a chair to look at the meat.",
    "sequential descriptions": [
        "woman A teasing kitten B with meat C",
        "kitten B peek at meat C from a chair D", 
        ],
    "time stamps": {
        "1": {
            "decription": [
                "woman A teasing kitten B with meat C",
            ],
            "programmatic": [
                "sitting on(B, D)",
                "name(B, cat)",
                "name(D, chair)",
                "name(A, adult)",
                "name(C, meat)"
            ],
            "duration": "long",
            "video location": "early"
        },
        "2": {
            "decription": [
                "kitten B peek at meat C from a chair D",
            ],
            "programmatic": [
                "catching(B, C)",
                "sitting on(B, D)",
            ],
            "duration": "long",
            "video location": "late"
        }
    }
}
'''


example5 = '''
Scene graph context: "Generic Video Scene, indoor"
Caption: "The young boy receives another gift and sits on the floor."
Action json:
{
    "caption": "The young boy receives another gift and sits on the floor.",
    "sequential descriptions": [
        "boy A receives gift B",
        "boy A sits on the floor C", 
        ],
    "time stamps": {
        "1": {
            "decription": [
                "boy A receives gift B",
            ],
            "programmatic": [
                "holding(B, D)",
                "name(A, boy)",
                "name(B, gift)",
            ],
            "duration": "medium",
            "video location": "early"
        },
        "2": {
            "decription": [
                "boy A sits on the floor C",
            ],
            "programmatic": [
                "sitting on(A, C)",
                "name(A, boy)",
                "name(C, floor)",
            ],
            "duration": "medium",
            "video location": "late"
        }
    }
}
'''

# example5 = '''
# Caption: "The young boy (9) walks through the television (5) to pick up a gift (4)."
# Action json:
# {
    
#     "sequential descriptions": [
#         "woman A teasing kitten B with meat C",
#         "kitten B peek at meat C from a chair D", 
#         ],
# }
    
# '''

query = '''
Note all the predicates are unary or binary.
A unary predicate takes in one argument. For example, close(A) means A is close to the camera.
A binary predicate takes in two arguments. For example, above(A, B) means A is above B.
Please use as many predicates as possible to precisely describe the action.
Please generate the action json programs for the following captions in the following format:
{"actions": {caption_id: action json programs}}
'''
    
all_examples = [example1, example4, example5]
few_shot_prompt = '\n'.join(all_examples)
prompt = '\n'.join([user, context, few_shot_prompt, query])

def wrap_prompt(caption_ls, sg_context_ls, few_shot=True):
    output_prompt = [context]
    if few_shot:
        output_prompt.append(few_shot_prompt)
        
    output_prompt.append(query)
    
    for cid, (caption, sg_context) in enumerate(zip(caption_ls, sg_context_ls)): 
        output_prompt.append(f"{cid}. Scene graph context: {sg_context}\n Caption: {caption} ")

    return '\n'.join(output_prompt)

if __name__ == "__main__":
    
    print(prompt)