static = ["is_bendable", "is_fluid", "is_holdable", "is_rigid", "is_tearable", "neq"]
unary = [ "broken", "close", "closed", "deformed", "empty", "far", "folded", 
         "has_hole", "high",  "low", "left", "nextto", "onsurface", "open", "right", 
         "stacked",  "stretched", "torn", "twisted",  "upright",]
binary = ["above", "attached", "behind", "fits",  "in", "infront", "on",  "touching", 
          "under", "visible",]
constants = ["hand"]
locations = ["early", "mid", "late"]

user = '''
You are a super user in logic programming. 
'''

context = f'''
Here are some predicates you can use. 
A static predicate represents the property of the target object. For example, is_tearable(A) means the A can be teared. 
A unary predicate takes in one argument. For example, close(A) means A is close to the camera.
A binary predicate takes in two arguments. For example, above(A, B) means A is above B.
Static predicates: {', '.join(static)}
Unary predicates: {', '.join(unary)}
Binary predicates: {', '.join(binary)}
Constants: {', '.join(constants)}
The location in the video can be: {', '.join(locations)}
'''

dig_example = '''
Action: dig
Action predicate: dig(A, B)
Action description: Digging [A] out of [B].
Action json:
{
    "action": "dig",
    "action_pred": "dig(A, B)",
    "explanation": "Digging [A] out of [B]",
    "static properties": [
        "A is holdable", 
        "B is not rigid", 
        "entity A and B are not equvalent"
    ],
    "programmatic version": [
        "is_holdable(A)", 
        "not(is_rigid(B))",
        "neq(A, B)"
    ],
    "time stamps": {
        "1": {
            "decription": [
                "Not A and B are far away from the camera",
                "A is in B",
                "A and B are not touched by hand", 
                "A is not visible, but B is visible, and there is a hand that is visible"
            ],
            "programmatic": [
                "not(far(A)), not(far(B))",
                "in(A, B)",
                "not(touching(A, hand)), not(touching(B, hand))",
                "not(visible(A)), visible(B), visible(hand)"
            ],
            "duration": "short",
            "video location": "early"
        },
        "2": {
            "decription": [
                "A become visible", 
                "A is in a hand", 
                "A is not touching B"
            ],
            "programmatic": [
                "visible(A)", 
                "in(A, hand)", 
                "not(touching(A, B))"
            ],
            "duration": "short",
            "video location": "late"
        }
    },
    "time period": ["1", "2", "long"]
}
'''

hold_over_example = '''
Action: hold-over
Action predicate: hold-over(A, B)
Action description: Holding [A] over [B].
Action json:
{
    "action": "hold_over",
    "action_pred": "hold_over(A, B)",
    "explanation": "Holding [A] over [B]",
    "static properties": [
        "entity A and B are not equvalent",
    ],
    "programmatic version": [
        "neq(A, B)"
    ],
    "time stamps": {
        "1": {
            "decription": [
                "the entity A is above B and they are not touching each other",
                "the camera is not far from the entity B", 
                "the entity A is in a hand", 
                "the entity B is visible"
            ],
            "programmatic": [
                "above(A, B), not(touching(A, B))",
                "not (far (B))",
                "in(A, hand)",
                "visible(B)"
            ],
            "duration": "short",
            "video location": "early"
        }
    }
    "time period": []
}
'''

plug_example = '''
Caption: "plug a usbc wire into my computer"
Specification:
{
    "caption": "plug a usbc wire into my computer",
    "static properties": [
        "entity usbc wire and my computer are not equvalent",
        "entity my computer is rigid"
    ],
    "programmatic version": [
        "neq(usbc, my computer)",
        "is-rigid(my computer)"
    ],
    "time stamps": {
        "1": {
            "decription": [
                "The camera is not far from B",
                "The entity A fits B, and it is in a hand",
                "The object A and B are not touching each other",
                "The object B is not touched by a hand",
                "The object B is visible"
            ],
            "programmatic": [
                "not(far_away(B))",
                "fits(A, B), inside_a_rigit(A, hand)",
                "not(touching(A, B))",
                "not(touching(B, hand))",
                "visible(b)"
            ],
            "duration": "short",
            "video location": "early"
        },
        "2": {
            "decription": [
                "The object A is attached to B",
                "A is in B",
                "A is no longer touched by a hand"
            ],
            "programmatic": [
                "attached (A, B)",
                "in (A, B)",
                "not (touching (A, hand))"
            ],
            "duration": "short",
            "video location": "late"
        }
    },
    "time period": ["1", "2", "long"]
}
'''


    
all_examples = [dig_example, hold_over_example, plug_example]
few_shot_prompt = '\n'.join(all_examples)
prompt = '\n'.join([user, context, few_shot_prompt])

def wrap_prompt(action_ls, few_shot=True):
    output_prompt = [context]
    if few_shot:
        output_prompt.append(few_shot_prompt)

    output_prompt.append("Please use as many predicates as possible to precisely describe the action. Please only use the variables, A, B, C and hand. Please follow the format and describe the following actions in json format, with the kay `actions`:")
    for aid, (action, (action_pred, ad)) in enumerate(action_ls): 
        output_prompt.append(f"{aid}. Action: {action}, Action predicate: {action_pred}, Explanation: {ad}.")

    return '\n'.join(output_prompt)

if __name__ == "__main__":
    print(prompt)