import math
import torch
import maskclip
from PIL import Image
from matplotlib import pyplot as plt
import torch.nn.functional as F


from transformers import AutoTokenizer, AutoProcessor
from maskclip import MaskCLIPModel


device = "cuda:7" if torch.cuda.is_available() else "cpu"

model = MaskCLIPModel.from_pretrained(
    "/mnt/nlp01/usr/huangwenxuan/home_tencent1/vision_tower/clip-vit-large-patch14-336"
)
tokenizer = AutoTokenizer.from_pretrained(
    "/mnt/nlp01/usr/huangwenxuan/home_tencent1/vision_tower/clip-vit-large-patch14-336"
)
processor = AutoProcessor.from_pretrained(
    "/mnt/nlp01/usr/huangwenxuan/home_tencent1/vision_tower/clip-vit-large-patch14-336"
)

coco_stuff_classes = [
    "person",
    "bicycle",
    "car",
    "motorcycle",
    "airplane",
    "bus",
    "train",
    "truck",
    "boat",
    "traffic light",
    "fire hydrant",
    "stop sign",
    "parking meter",
    "bench",
    "bird",
    "cat",
    "dog",
    "horse",
    "sheep",
    "cow",
    "elephant",
    "bear",
    "zebra",
    "giraffe",
    "backpack",
    "umbrella",
    "handbag",
    "tie",
    "suitcase",
    "frisbee",
    "skis",
    "snowboard",
    "sports ball",
    "kite",
    "baseball bat",
    "baseball glove",
    "skateboard",
    "surfboard",
    "tennis racket",
    "bottle",
    "wine glass",
    "cup",
    "fork",
    "knife",
    "spoon",
    "bowl",
    "banana",
    "apple",
    "sandwich",
    "orange",
    "broccoli",
    "carrot",
    "hot dog",
    "pizza",
    "donut",
    "cake",
    "chair",
    "couch",
    "potted plant",
    "bed",
    "dining table",
    "toilet",
    "tv",
    "laptop",
    "mouse",
    "remote",
    "keyboard",
    "cell phone",
    "microwave",
    "oven",
    "toaster",
    "sink",
    "refrigerator",
    "book",
    "clock",
    "vase",
    "scissors",
    "teddy bear",
    "hair drier",
    "toothbrush",
    "banner",
    "blanket",
    "branch",
    "bridge",
    "building",
    "bush",
    "cabinet",
    "cage",
    "cardboard",
    "carpet",
    "ceiling",
    "tile ceiling",
    "cloth",
    "clothes",
    "clouds",
    "counter",
    "cupboard",
    "curtain",
    "desk",
    "dirt",
    "door",
    "fence",
    "marble floor",
    "floor",
    "stone floor",
    "tile floor",
    "wood floor",
    "flower",
    "fog",
    "food",
    "fruit",
    "furniture",
    "grass",
    "gravel",
    "ground",
    "hill",
    "house",
    "leaves",
    "light",
    "mat",
    "metal",
    "mirror",
    "moss",
    "mountain",
    "mud",
    "napkin",
    "net",
    "paper",
    "pavement",
    "pillow",
    "plant",
    "plastic",
    "platform",
    "playingfield",
    "railing",
    "railroad",
    "river",
    "road",
    "rock",
    "roof",
    "rug",
    "salad",
    "sand",
    "sea",
    "shelf",
    "sky",
    "skyscraper",
    "snow",
    "solid",
    "stairs",
    "stone",
    "straw",
    "structural",
    "table",
    "tent",
    "textile",
    "towel",
    "tree",
    "vegetable",
    "brick wall",
    "concrete wall",
    "wall",
    "panel wall",
    "stone wall",
    "tile wall",
    "wood wall",
    "water",
    "waterdrops",
    "blind window",
    "window",
    "wood",
]
all_pascal_context_classes = [
    "accordion",
    "airplane",
    "air conditioner",
    "antenna",
    "artillery",
    "ashtray",
    "atrium",
    "baby carriage",
    "bag",
    "ball",
    "balloon",
    "bamboo weaving",
    "barrel",
    "baseball bat",
    "basket",
    "basketball backboard",
    "bathtub",
    "bed",
    "bedclothes",
    "beer",
    "bell",
    "bench",
    "bicycle",
    "binoculars",
    "bird",
    "bird cage",
    "bird feeder",
    "bird nest",
    "blackboard",
    "board",
    "boat",
    "bone",
    "book",
    "bottle",
    "bottle opener",
    "bowl",
    "box",
    "bracelet",
    "brick",
    "bridge",
    "broom",
    "brush",
    "bucket",
    "building",
    "bus",
    "cabinet",
    "cabinet door",
    "cage",
    "cake",
    "calculator",
    "calendar",
    "camel",
    "camera",
    "camera lens",
    "can",
    "candle",
    "candle holder",
    "cap",
    "car",
    "card",
    "cart",
    "case",
    "casette recorder",
    "cash register",
    "cat",
    "cd",
    "cd player",
    "ceiling",
    "cell phone",
    "cello",
    "chain",
    "chair",
    "chessboard",
    "chicken",
    "chopstick",
    "clip",
    "clippers",
    "clock",
    "closet",
    "cloth",
    "clothes tree",
    "coffee",
    "coffee machine",
    "comb",
    "computer",
    "concrete",
    "cone",
    "container",
    "control booth",
    "controller",
    "cooker",
    "copying machine",
    "coral",
    "cork",
    "corkscrew",
    "counter",
    "court",
    "cow",
    "crabstick",
    "crane",
    "crate",
    "cross",
    "crutch",
    "cup",
    "curtain",
    "cushion",
    "cutting board",
    "dais",
    "disc",
    "disc case",
    "dishwasher",
    "dock",
    "dog",
    "dolphin",
    "door",
    "drainer",
    "dray",
    "drink dispenser",
    "drinking machine",
    "drop",
    "drug",
    "drum",
    "drum kit",
    "duck",
    "dumbbell",
    "earphone",
    "earrings",
    "egg",
    "electric fan",
    "electric iron",
    "electric pot",
    "electric saw",
    "electronic keyboard",
    "engine",
    "envelope",
    "equipment",
    "escalator",
    "exhibition booth",
    "extinguisher",
    "eyeglass",
    "fan",
    "faucet",
    "fax machine",
    "fence",
    "ferris wheel",
    "fire extinguisher",
    "fire hydrant",
    "fire place",
    "fish",
    "fish tank",
    "fishbowl",
    "fishing net",
    "fishing pole",
    "flag",
    "flagstaff",
    "flame",
    "flashlight",
    "floor",
    "flower",
    "fly",
    "foam",
    "food",
    "footbridge",
    "forceps",
    "fork",
    "forklift",
    "fountain",
    "fox",
    "frame",
    "fridge",
    "frog",
    "fruit",
    "funnel",
    "furnace",
    "game controller",
    "game machine",
    "gas cylinder",
    "gas hood",
    "gas stove",
    "gift box",
    "glass",
    "glass marble",
    "globe",
    "glove",
    "goal",
    "grandstand",
    "grass",
    "gravestone",
    "ground",
    "guardrail",
    "guitar",
    "gun",
    "hammer",
    "hand cart",
    "handle",
    "handrail",
    "hanger",
    "hard disk drive",
    "hat",
    "hay",
    "headphone",
    "heater",
    "helicopter",
    "helmet",
    "holder",
    "hook",
    "horse",
    "horse-drawn carriage",
    "hot-air balloon",
    "hydrovalve",
    "ice",
    "inflator pump",
    "ipod",
    "iron",
    "ironing board",
    "jar",
    "kart",
    "kettle",
    "key",
    "keyboard",
    "kitchen range",
    "kite",
    "knife",
    "knife block",
    "ladder",
    "ladder truck",
    "ladle",
    "laptop",
    "leaves",
    "lid",
    "life buoy",
    "light",
    "light bulb",
    "lighter",
    "line",
    "lion",
    "lobster",
    "lock",
    "machine",
    "mailbox",
    "mannequin",
    "map",
    "mask",
    "mat",
    "match book",
    "mattress",
    "menu",
    "metal",
    "meter box",
    "microphone",
    "microwave",
    "mirror",
    "missile",
    "model",
    "money",
    "monkey",
    "mop",
    "motorbike",
    "mountain",
    "mouse",
    "mouse pad",
    "musical instrument",
    "napkin",
    "net",
    "newspaper",
    "oar",
    "ornament",
    "outlet",
    "oven",
    "oxygen bottle",
    "pack",
    "pan",
    "paper",
    "paper box",
    "paper cutter",
    "parachute",
    "parasol",
    "parterre",
    "patio",
    "pelage",
    "pen",
    "pen container",
    "pencil",
    "person",
    "photo",
    "piano",
    "picture",
    "pig",
    "pillar",
    "pillow",
    "pipe",
    "pitcher",
    "plant",
    "plastic",
    "plate",
    "platform",
    "player",
    "playground",
    "pliers",
    "plume",
    "poker",
    "poker chip",
    "pole",
    "pool table",
    "postcard",
    "poster",
    "pot",
    "potted plant",
    "printer",
    "projector",
    "pumpkin",
    "rabbit",
    "racket",
    "radiator",
    "radio",
    "rail",
    "rake",
    "ramp",
    "range hood",
    "receiver",
    "recorder",
    "recreational machines",
    "remote control",
    "road",
    "robot",
    "rock",
    "rocket",
    "rocking horse",
    "rope",
    "rug",
    "ruler",
    "runway",
    "saddle",
    "sand",
    "saw",
    "scale",
    "scanner",
    "scissors",
    "scoop",
    "screen",
    "screwdriver",
    "sculpture",
    "scythe",
    "sewer",
    "sewing machine",
    "shed",
    "sheep",
    "shell",
    "shelves",
    "shoe",
    "shopping cart",
    "shovel",
    "sidecar",
    "sidewalk",
    "sign",
    "signal light",
    "sink",
    "skateboard",
    "ski",
    "sky",
    "sled",
    "slippers",
    "smoke",
    "snail",
    "snake",
    "snow",
    "snowmobiles",
    "sofa",
    "spanner",
    "spatula",
    "speaker",
    "speed bump",
    "spice container",
    "spoon",
    "sprayer",
    "squirrel",
    "stage",
    "stair",
    "stapler",
    "stick",
    "sticky note",
    "stone",
    "stool",
    "stove",
    "straw",
    "stretcher",
    "sun",
    "sunglass",
    "sunshade",
    "surveillance camera",
    "swan",
    "sweeper",
    "swim ring",
    "swimming pool",
    "swing",
    "switch",
    "table",
    "tableware",
    "tank",
    "tap",
    "tape",
    "tarp",
    "telephone",
    "telephone booth",
    "tent",
    "tire",
    "toaster",
    "toilet",
    "tong",
    "tool",
    "toothbrush",
    "towel",
    "toy",
    "toy car",
    "track",
    "train",
    "trampoline",
    "trash bin",
    "tray",
    "tree",
    "tricycle",
    "tripod",
    "trophy",
    "truck",
    "tube",
    "turtle",
    "tv monitor",
    "tweezers",
    "typewriter",
    "umbrella",
    "unknown",
    "vacuum cleaner",
    "vending machine",
    "video camera",
    "video game console",
    "video player",
    "video tape",
    "violin",
    "wakeboard",
    "wall",
    "wallet",
    "wardrobe",
    "washing machine",
    "watch",
    "water",
    "water dispenser",
    "water pipe",
    "water skate board",
    "watermelon",
    "whale",
    "wharf",
    "wheel",
    "wheelchair",
    "window",
    "window blinds",
    "wineglass",
    "wire",
    "wood",
    "wool",
]
bg_classes = ["building", "ground", "grass", "tree", "sky", "background"]

full_classes = list(set(coco_stuff_classes + all_pascal_context_classes + bg_classes))

other_class_names = [
    "This is a photo of the " + class_name for class_name in full_classes
]

# classnames = ["descript the image The image captures a nighttime scene with a large boat floating on the water near a city. The boat is positioned towards the center of the scene, and its lights are on, illuminating the area. In the background, there are several buildings that create a picturesque cityscape."] + other_class_names
classnames = ["Describe this image. There are dog and cat."] + other_class_names
# classnames = ["This is a photo of the boat"] + other_class_names

inputs = tokenizer(classnames, padding=True, return_tensors="pt")
text_embeds = model.get_text_features(**inputs)

# 计算余弦相似度
# cosine_similarity 需要两个矩阵的维度除最后一个以外必须相同，所以我们使用 expand_as
similarity = F.cosine_similarity(
    text_embeds[1:], text_embeds[:1].expand_as(text_embeds[1:]), dim=1
)

# 找到最近的 5 个特征的索引
_, closest_indices = torch.topk(similarity, 5, largest=True)
closest_indices += 1

mask = torch.ones(text_embeds.size(0), dtype=torch.bool)
mask[closest_indices] = False
filtered_text_embeds = text_embeds[mask]

image_inputs = processor(
    images=Image.open(
        "/mnt/nlp01/usr/huangwenxuan/home_tencent1/code/dynamic_LLaVA_v2x/cat_dog.webp"
    ),
    return_tensors="pt",
)
output = model(filtered_text_embeds, **image_inputs)
B, C, Nt = output.shape
output = output.reshape(B, C, int(math.sqrt(Nt)), int(math.sqrt(Nt)))

# mask = output.argmax(dim=1).cpu().numpy()[0]
# mask = (mask == 0).astype(int)


# # 使用imshow显示矩阵
# plt.imshow(mask, cmap="gray")  # 使用灰度颜色映射
# plt.axis("off")  # 关闭坐标轴

# # 保存图片
# plt.savefig("test.png", bbox_inches="tight", pad_inches=0)
# plt.close()  # 关闭图形，防止显示输出

# softmax_output = F.softmax(output, dim=1)
# class_0_data = softmax_output[:, 0, :, :]
# # 绘制热力图
# plt.imshow(class_0_data.squeeze(), cmap='hot', interpolation='nearest')
# plt.colorbar()
# plt.title("Heatmap of Class 0")

# # 保存图片到本地文件系统
# plt.savefig('class_0_heatmap.png', dpi=300)  # dpi参数可以调整图片的分辨率
# plt.close()  # 关闭图形，避免内存泄漏


softmax_output = F.softmax(output, dim=1)
# 提取第0个类的数据
class_0_data = softmax_output[:, 0, :, :].squeeze()

# 设定top-k值，例如要找出值最大的10个元素
k = 100
top_k_values, _ = torch.topk(class_0_data.view(-1), k)
threshold = top_k_values.min()  # 获取第k大的元素值作为阈值

# 生成mask，大于等于阈值的位置为1，其余为0
mask = (class_0_data >= threshold).int()

# 可视化mask
plt.imshow(mask, cmap="gray", interpolation="nearest")
plt.colorbar()
plt.title(f"Visualization of Top-{k} Mask")

# 保存图片到本地文件系统
plt.savefig("top_k_mask.png", dpi=300)  # dpi参数可以调整图片的分辨率
plt.close()  # 关闭图形，避免内存泄漏
