import json
import os
import pandas as pd
import csv
import numpy as np
DATA = "/n/fs/nlp-xxxx/datasets/epic-kitchens/epic-kitchens-55-annotations/preprocessed/validation.csv"
GEN_CAPTIONS = (
    "/n/fs/nlp-xxxx/datasets/epic-kitchens/epic-kitchens-55-annotations/preprocessed/gen_captions/trained_detector/epic_val_gen_caption.tsv"
)
OUT_PATH = "/n/fs/nlp-xxxx/projects/world-models/checkpoints/val_gt_actions.tsv"

def parse_epic_csv(filename):
    labels = {}
    with open(filename) as f:
        reader = csv.DictReader(
            f, fieldnames=["id", "video", "start", "end", "verb", "noun", "action"]
        )
        for row in reader:
            video = row["video"].strip()
            if video not in labels:
                labels[video] = []
            labels[video].append({
                "video": row["video"].strip(),
                "start": 2 * int(row["start"]),
                "end": 2 * int(row["end"]),
                "verb": int(row["verb"]),
                "noun": int(row["noun"]),
                "action": int(row["action"]),
            })
    return labels

NOUNS = [
    "Nothing",
    "pan",
    "pan:dust",
    "tap",
    "plate",
    "knife",
    "bowl",
    "spoon",
    "cupboard",
    "drawer",
    "fridge",
    "lid",
    "hand",
    "onion",
    "onion:spring",
    "pot",
    "glass",
    "water",
    "fork",
    "board:chopping",
    "bag",
    "sponge",
    "spatula",
    "cup",
    "oil",
    "bin",
    "meat",
    "potato",
    "bottle",
    "container",
    "tomato",
    "salt",
    "cloth",
    "sink",
    "door:kitchen",
    "pasta",
    "dish:soap",
    "food",
    "kettle",
    "box",
    "carrot",
    "sauce",
    "colander",
    "milk",
    "rice",
    "garlic",
    "pepper",
    "hob",
    "dough",
    "dishwasher",
    "egg",
    "cheese",
    "bread",
    "table",
    "salad",
    "microwave",
    "oven",
    "cooker:slow",
    "coffee",
    "filter",
    "jar",
    "rack:drying",
    "chicken",
    "tray",
    "mixture",
    "towel",
    "towel:kitchen",
    "peach",
    "skin",
    "courgette",
    "liquid:washing",
    "liquid",
    "leaf",
    "lettuce",
    "leaf:mint",
    "cutlery",
    "scissors",
    "package",
    "top",
    "spice",
    "tortilla",
    "paper",
    "machine:washing",
    "olive",
    "sausage",
    "glove:oven",
    "peeler:potato",
    "can",
    "mat",
    "mat:sushi",
    "vegetable",
    "wrap:plastic",
    "wrap",
    "flour",
    "cucumber",
    "curry",
    "cereal",
    "napkin",
    "soap",
    "squash",
    "fish",
    "chilli",
    "cover",
    "sugar",
    "aubergine",
    "jug",
    "heat",
    "leek",
    "rubbish",
    "ladle",
    "mushroom",
    "stock",
    "freezer",
    "light",
    "pizza",
    "ball",
    "yoghurt",
    "chopstick",
    "grape",
    "ginger",
    "banana",
    "oregano",
    "tuna",
    "kitchen",
    "salmon",
    "basket",
    "maker:coffee",
    "roll",
    "brush",
    "lemon",
    "clothes",
    "grater",
    "strainer",
    "bacon",
    "avocado",
    "blueberry",
    "pesto",
    "utensil",
    "bean:green",
    "floor",
    "lime",
    "foil",
    "grill",
    "ingredient",
    "scale",
    "paste:garlic",
    "processor:food",
    "nut:pine",
    "butter",
    "butter:peanut",
    "shelf",
    "timer",
    "rinse",
    "tablecloth",
    "switch",
    "powder:coconut",
    "powder:washing",
    "capsule",
    "oat",
    "tofu",
    "lighter",
    "corn",
    "vinegar",
    "grinder",
    "cap",
    "support",
    "cream",
    "content",
    "tongs",
    "pie",
    "fan:extractor",
    "raisin",
    "toaster",
    "broccoli",
    "pin:rolling",
    "plug",
    "button",
    "tea",
    "parsley",
    "flame",
    "herb",
    "base",
    "holder:filter",
    "thyme",
    "honey",
    "celery",
    "kiwi",
    "tissue",
    "time",
    "clip",
    "noodle",
    "yeast",
    "hummus",
    "coconut",
    "cabbage",
    "spinach",
    "nutella",
    "fruit",
    "dressing:salad",
    "omelette",
    "kale",
    "paella",
    "chip",
    "opener:bottle",
    "shirt",
    "chair",
    "sandwich",
    "burger:tuna",
    "pancake",
    "leftover",
    "risotto",
    "pestle",
    "sock",
    "pea",
    "apron",
    "juice",
    "wine",
    "dust",
    "desk",
    "mesh",
    "oatmeal",
    "artichoke",
    "remover:spot",
    "coriander",
    "mocha",
    "quorn",
    "soup",
    "turmeric",
    "knob",
    "seed",
    "boxer",
    "paprika",
    "juicer:lime",
    "guard:hand",
    "apple",
    "tahini",
    "finger",
    "salami",
    "mayonnaise",
    "biscuit",
    "pear",
    "mortar",
    "berry",
    "beef",
    "squeezer:lime",
    "tail",
    "stick:crab",
    "supplement",
    "phone",
    "shell:egg",
    "pith",
    "ring:onion",
    "cherry",
    "cake",
    "sprout",
    "almond",
    "mint",
    "flake:chilli",
    "cutter:pizza",
    "nesquik",
    "blender",
    "scrap",
    "backpack",
    "melon",
    "breadcrumb",
    "sticker",
    "shrimp",
    "smoothie",
    "grass:lemon",
    "ketchup",
    "slicer",
    "stand",
    "dumpling",
    "watch",
    "beer",
    "power",
    "heater",
    "basil",
    "cinnamon",
    "crisp",
    "asparagus",
    "drink",
    "fishcakes",
    "mustard",
    "caper",
    "whetstone",
    "candle",
    "control:remote",
    "instruction",
    "cork",
    "tab",
    "masher",
    "part",
    "muffin",
    "shaker:pepper",
    "garni:bouquet",
    "popcorn",
    "envelope",
    "chocolate",
    "spot",
    "window",
    "syrup",
    "bar:cereal",
    "croissant",
    "coke",
    "stereo",
    "alarm",
    "recipe",
    "handle",
    "sleeve",
    "cumin",
    "wire",
    "label",
    "fire",
    "presser",
    "air",
    "mouse",
    "boiler",
    "rest",
    "tablet",
    "poster",
    "trousers",
    "form",
    "rubber",
    "rug",
    "sheets",
    "pepper:cayenne",
    "waffle",
    "pineapple",
    "turkey",
    "alcohol",
    "rosemary",
    "lead",
    "book",
    "rim",
    "gravy",
    "straw",
    "hat",
    "cd",
    "slipper",
    "casserole",
    "ladder",
    "jambalaya",
    "wall",
    "tube",
    "lamp",
    "tarragon",
    "heart",
    "funnel",
    "whisk",
    "driver:screw",
    "trouser",
]
VERBS = [
    "take",
    "put",
    "open",
    "close",
    "wash",
    "cut",
    "mix",
    "pour",
    "throw",
    "move",
    "remove",
    "dry",
    "turn-on",
    "turn",
    "shake",
    "turn-off",
    "peel",
    "adjust",
    "empty",
    "scoop",
    "check",
    "squeeze",
    "insert",
    "press",
    "fill",
    "add",
    "scrape",
    "sharpen",
    "wrap",
    "roll",
    "sprinkle",
    "break",
    "flip",
    "hang",
    "hold",
    "sort",
    "apply",
    "crush",
    "search",
    "sample",
    "knead",
    "set",
    "walk",
    "divide",
    "spray",
    "use",
    "fold",
    "cook",
    "filter",
    "scrub",
    "look",
    "finish",
    "soak",
    "brush",
    "pull",
    "pat",
    "form",
    "measure",
    "drink",
    "choose",
    "serve",
    "drop",
    "wear",
    "rip",
    "tip",
    "turn-down",
    "gather",
    "eat",
    "stack",
    "store",
    "switch",
    "increase",
    "carry",
    "lift",
    "twist",
    "sweep",
    "rub",
    "unwrap",
    "stab",
    "attach",
    "stretch",
    "lower",
    "prepare",
    "unscrew",
    "season",
    "video",
    "tap-off",
    "set-off",
    "squirt",
    "load",
    "unroll",
    "water",
    "do",
    "flatten",
    "uncover",
    "slide",
    "unplug",
    "level",
    "tear-out",
    "feel",
    "fix",
    "spill",
    "pack",
    "bake",
    "blow",
    "sit-on",
    "count",
    "dip",
    "cool",
    "flush",
    "knife",
    "fork",
    "swirl",
    "stick",
    "pet-down",
    "realize",
    "weigh",
    "defoliate",
    "deseed",
    "tessellate",
    "unfreeze",
    "decide-if",
    "let-out",
    "save",
    "reverse",
]

if __name__ == "__main__":
    annotations = parse_epic_csv(DATA)

    with open(OUT_PATH, "w") as f_out:
        f = open(GEN_CAPTIONS)
        for line in f.readlines():
            video_image_id = line.split("\t")[0]
            video, sequence, image_id = video_image_id.split('_')
            image_id = int(image_id)
            video_sequence =  video + "_" + sequence
            # find closest action which preceds this action
            cur_actions = annotations[video_sequence]
            # iterate through all actions
            dist = []
            for cur_action in cur_actions:
                if cur_action["end"] < image_id:
                    dist.append(image_id - cur_action["start"])
            
            if len(dist) == 0:
                cur_caption = "Nothing Nothing"
            else:
                closest_action_id = np.argmin(dist)
                cur_caption = VERBS[cur_actions[closest_action_id]["verb"]] + " " + NOUNS[cur_actions[closest_action_id]["noun"]] + " "
            caption_dict = {"caption":cur_caption, "conf":1}
            f_out.write("%s\t%s\n" % (video_image_id, json.dumps([caption_dict])))

        f.close()

