# coding=utf-8
import os

import PIL
import cv2
import torch
import torch.utils.data as data
import torchvision.transforms as transforms

from PIL import Image, ImageDraw
import json

import random
import os.path as osp
import numpy as np
from torch.utils.data import DataLoader
#from datasets.posemap import get_coco_body25_mapping,kpoint_to_heatmap

import matplotlib.pyplot as plt
import torch.nn.functional as F


def mask2bbox(mask):
    up = np.max(np.where(mask)[0])
    down = np.min(np.where(mask)[0])
    left = np.min(np.where(mask)[1])
    right = np.max(np.where(mask)[1])
    center = ((up + down) // 2, (left + right) // 2)

    factor = random.random() * 0.1 + 0.1

    up = int(min(up * (1 + factor) - center[0] * factor + 1, mask.shape[0]))
    down = int(max(down * (1 + factor) - center[0] * factor, 0))
    left = int(max(left * (1 + factor) - center[1] * factor, 0))
    right = int(min(right * (1 + factor) - center[1] * factor + 1, mask.shape[1]))
    return (down, up, left, right)

def show(title, array):
    plt.title(title)
    plt.imshow(array)
    plt.show()

def crop_mask(mask):
    # 找到所有的连通区域
    num_labels, labels = cv2.connectedComponents(mask)

   # 随机选择比例：1/4、1/2 或 3/4
    ratios = [1/8,1/4, 1/2, 3/4]

    # 遍历每个连通区域（跳过背景，label=0）
    for label in range(1, num_labels):
        # 找到当前连通区域的所有位置
        region_indices = np.where(labels == label)
        rows, cols = region_indices

        # 随机选择一个比例
        ratio = np.random.choice(ratios)

        # 找到区域的上面部分
        min_row = np.min(rows)  # 区域的最小行索引
        max_row = np.max(rows)  # 区域的最大行索引
        height = max_row - min_row + 1  # 区域的高度
        split_row = min_row + int(height * ratio)  # 上面部分的分界线

        # 找到上面部分的位置
        upper_rows = rows[rows <= split_row]
        upper_cols = cols[rows <= split_row]

        # 将这些位置的 1 变成 0
        mask[upper_rows, upper_cols] = 0


    return mask

def crop_main_cloth(mask):
    # 找到 mask 的原始边界
    x, y, w, h = cv2.boundingRect(mask)

    # 计算中心点
    center_x = x + w // 2
    center_y = y + h // 2

    # 定义缩减比例（例如 50%）
    scale = 0.7

    # 计算缩减后的边界
    new_w = int(w * scale)
    new_h = int(h * scale)
    new_x = center_x - new_w // 2
    new_y = center_y - new_h // 2

    # 确保缩减后的边界在图像范围内
    new_x = max(0, new_x)
    new_y = max(0, new_y)
    new_w = min(mask.shape[1] - new_x, new_w)
    new_h = min(mask.shape[0] - new_y, new_h)

    # 创建一个与输入 mask 形状相同的空白掩码
    final_mask = np.zeros_like(mask)

    # 将裁剪后的区域填充到空白掩码中
    if new_w > 0 and new_h > 0:  # 确保裁剪区域有效
        final_mask[new_y:new_y+new_h, new_x:new_x+new_w] = mask[new_y:new_y+new_h, new_x:new_x+new_w]
    return final_mask

import torch
import torch.nn.functional as F
import numpy as np

def align_to_mask(cloth, warp_cloth_mask, cloth_mask):
    """
    将 cloth 和 warp_cloth_mask 对齐到 cloth_mask 的范围内。

    参数:
        cloth (torch.Tensor): 输入的图像张量，形状为 (3, H, W)。
        warp_cloth_mask (torch.Tensor): 输入的掩码张量，形状为 (1, H, W)。
        cloth_mask (torch.Tensor): 目标掩码张量，形状为 (1, H, W)。

    返回:
        aligned_cloth (torch.Tensor): 对齐后的图像张量，形状为 (3, H, W)。
        aligned_mask (torch.Tensor): 对齐后的掩码张量，形状为 (1, H, W)。
    """
    # 检查 cloth_mask 是否有前景区域
    cloth_mask_np = cloth_mask.squeeze(0).numpy()  # 去掉通道维度并转换为 NumPy 数组
    fg_pixels = np.where(cloth_mask_np != 0)
    if len(fg_pixels[0]) == 0:
        raise ValueError("cloth_mask 中没有前景区域（全为零）。")

    # 获取 cloth_mask 的边界框
    t, b = min(fg_pixels[0]), max(fg_pixels[0])  # 上下边界
    l, r = min(fg_pixels[1]), max(fg_pixels[1])  # 左右边界

    # 计算 cloth_mask 的中心点
    mask_center_x = (l + r) // 2
    mask_center_y = (t + b) // 2

    # 检查 warp_cloth_mask 是否有前景区域
    warp_mask_np = warp_cloth_mask.squeeze(0).numpy()  # 去掉通道维度并转换为 NumPy 数组
    warp_fg_pixels = np.where(warp_mask_np != 0)
    if len(warp_fg_pixels[0]) == 0:
        raise ValueError("warp_cloth_mask 中没有前景区域（全为零）。")

    # 获取 warp_cloth_mask 的边界框
    warped_t, warped_b = min(warp_fg_pixels[0]), max(warp_fg_pixels[0])
    warped_l, warped_r = min(warp_fg_pixels[1]), max(warp_fg_pixels[1])

    # 计算 warp_cloth_mask 的中心点
    warped_center_x = (warped_l + warped_r) // 2
    warped_center_y = (warped_t + warped_b) // 2

    # 计算缩放因子
    mask_height = b - t
    mask_width = r - l
    warped_height = warped_b - warped_t
    warped_width = warped_r - warped_l

    if warped_height == 0 or warped_width == 0:
        raise ValueError("warp_cloth_mask 的边界框高度或宽度为零。")
    if mask_height == 0 or mask_width == 0:
        raise ValueError("cloth_mask 的边界框高度或宽度为零。")

    scale_factor = min(mask_height / warped_height, mask_width / warped_width)

    # 计算目标大小
    target_height = int(warped_height * scale_factor)
    target_width = int(warped_width * scale_factor)

    # 缩放 cloth 和 warp_cloth_mask
    cloth = F.interpolate(
        cloth.unsqueeze(0),  # 添加批次维度
        size=(target_height, target_width),
        mode='bilinear',
        align_corners=False
    ).squeeze(0)  # 移除批次维度

    warp_cloth_mask = F.interpolate(
        warp_cloth_mask.unsqueeze(0),  # 添加批次维度
        size=(target_height, target_width),
        mode='nearest'
    ).squeeze(0)  # 移除批次维度

    # 计算粘贴位置
    paste_x = mask_center_x - int(warped_center_x * scale_factor)
    paste_y = mask_center_y - int(warped_center_y * scale_factor)

    # 确保粘贴位置在有效范围内
    paste_x = max(0, min(paste_x, cloth_mask.size(2) - target_width))
    paste_y = max(0, min(paste_y, cloth_mask.size(1) - target_height))

    # 创建空白张量
    aligned_cloth = torch.zeros_like(cloth_mask).repeat(3, 1, 1)  # (3, H, W)
    aligned_mask = torch.zeros_like(cloth_mask)  # (1, H, W)

    # 将缩放后的图像和掩码粘贴到空白张量
    aligned_cloth[:, paste_y:paste_y + target_height, paste_x:paste_x + target_width] = cloth
    aligned_mask[:, paste_y:paste_y + target_height, paste_x:paste_x + target_width] = warp_cloth_mask

    return aligned_cloth, aligned_mask

class CPDataset(data.Dataset):
    """
        Dataset for CP-VTON.
    """

    def __init__(self, dataroot, image_size=512, mode='train',  unpaired=False,semantic_nc=13,
                 caption_folder=None
                 ):
        super(CPDataset, self).__init__()
        # base setting
        self.root = dataroot
        self.unpaired = unpaired
        self.datamode = mode  # train or test or self-defined
        self.data_list = mode + '_pairs.txt'
        self.fine_height = image_size
        self.fine_width = int(image_size / 256 * 256)
        self.semantic_nc = semantic_nc
        self.data_path = osp.join(dataroot, mode)
        self.crop_size = (self.fine_height, self.fine_height)
        self.to_tensor = transforms.ToTensor()
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        self.clip_normalize = transforms.Normalize((0.48145466, 0.4578275, 0.40821073),
                                                   (0.26862954, 0.26130258, 0.27577711))



        #assert not (self.datamode == "train" and self.unpaired), f"train must use paired dataset"  暂时先删除，真正训练需要
       
        self.cloth_caption_folder = caption_folder
        #self.spatial_caption_folder = spatial_caption_folder
        if self.cloth_caption_folder is not None:
            #print(osp.join(dataroot, self.cloth_caption_folder))
            with open(osp.join(dataroot, self.cloth_caption_folder), 'r') as f:
                # self.captions_dict = json.load(f)['items']
                self.cloth_captions_dict = json.load(f)
        """
        if self.spatial_caption_folder is not None:
            with open(osp(dataroot, self.spatial_caption_folder), 'r') as f:
                lines = f.readlines()
                self.spatial_captions_dict = {}
                for line in lines:
                    # 去掉行末的换行符
                    line = line.strip()
                    # 拆分键值对
                    key, value = line.split(':', 1)
                    # 将键值对存储在字典中
                    self.spatial_captions_dict[key] = value
        """
        # load data list
        im_names = []
        c_names = []
        with open(osp.join(dataroot, self.data_list), 'r') as f:
            for line in f.readlines():
                #print(f"line : {line}")
                im_name, c_name = line.strip().split()
                im_names.append(im_name)
                c_names.append(c_name)

        self.im_names = im_names
        self.c_names = dict()
        self.c_names['paired'] = im_names
        self.c_names['unpaired'] = c_names

    def name(self):
        return "CPDataset"
    def get_agnostic(self, im, im_parse, pose_data):
        parse_array = np.array(im_parse)
        parse_head = ((parse_array == 4).astype(np.float32) +
                      (parse_array == 13).astype(np.float32))
        parse_lower = ((parse_array == 9).astype(np.float32) +
                       (parse_array == 12).astype(np.float32) +
                       (parse_array == 16).astype(np.float32) +
                       (parse_array == 17).astype(np.float32) +
                       (parse_array == 18).astype(np.float32) +
                       (parse_array == 19).astype(np.float32))

        agnostic = im.copy()
        agnostic_draw = ImageDraw.Draw(agnostic)

        length_a = np.linalg.norm(pose_data[5] - pose_data[2])
        length_b = np.linalg.norm(pose_data[12] - pose_data[9])
        point = (pose_data[9] + pose_data[12]) / 2
        pose_data[9] = point + (pose_data[9] - point) / length_b * length_a
        pose_data[12] = point + (pose_data[12] - point) / length_b * length_a

        r = int(length_a / 16) + 1

        # mask torso
        for i in [9, 12]:
            pointx, pointy = pose_data[i]
            agnostic_draw.ellipse((pointx - r * 3, pointy - r * 6, pointx + r * 3, pointy + r * 6), 'gray', 'gray')
        agnostic_draw.line([tuple(pose_data[i]) for i in [2, 9]], 'gray', width=r * 6)
        agnostic_draw.line([tuple(pose_data[i]) for i in [5, 12]], 'gray', width=r * 6)
        agnostic_draw.line([tuple(pose_data[i]) for i in [9, 12]], 'gray', width=r * 12)
        agnostic_draw.polygon([tuple(pose_data[i]) for i in [2, 5, 12, 9]], 'gray', 'gray')

        # mask neck
        pointx, pointy = pose_data[1]
        agnostic_draw.rectangle((pointx - r * 5, pointy - r * 9, pointx + r * 5, pointy), 'gray', 'gray')

        # mask arms
        agnostic_draw.line([tuple(pose_data[i]) for i in [2, 5]], 'gray', width=r * 12)
        for i in [2, 5]:
            pointx, pointy = pose_data[i]
            agnostic_draw.ellipse((pointx - r * 5, pointy - r * 6, pointx + r * 5, pointy + r * 6), 'gray', 'gray')
        for i in [3, 4, 6, 7]:
            if (pose_data[i - 1, 0] == 0.0 and pose_data[i - 1, 1] == 0.0) or (
                    pose_data[i, 0] == 0.0 and pose_data[i, 1] == 0.0):
                continue
            agnostic_draw.line([tuple(pose_data[j]) for j in [i - 1, i]], 'gray', width=r * 10)
            pointx, pointy = pose_data[i]
            agnostic_draw.ellipse((pointx - r * 5, pointy - r * 5, pointx + r * 5, pointy + r * 5), 'gray', 'gray')

        for parse_id, pose_ids in [(14, [5, 6, 7]), (15, [2, 3, 4])]:
            # mask_arm = Image.new('L', (self.fine_width, self.fine_height), 'white')
            mask_arm = Image.new('L', (768, 1024), 'white')
            mask_arm_draw = ImageDraw.Draw(mask_arm)
            pointx, pointy = pose_data[pose_ids[0]]
            mask_arm_draw.ellipse((pointx - r * 5, pointy - r * 6, pointx + r * 5, pointy + r * 6), 'black', 'black')
            for i in pose_ids[1:]:
                if (pose_data[i - 1, 0] == 0.0 and pose_data[i - 1, 1] == 0.0) or (
                        pose_data[i, 0] == 0.0 and pose_data[i, 1] == 0.0):
                    continue
                mask_arm_draw.line([tuple(pose_data[j]) for j in [i - 1, i]], 'black', width=r * 10)
                pointx, pointy = pose_data[i]
                if i != pose_ids[-1]:
                    mask_arm_draw.ellipse((pointx - r * 5, pointy - r * 5, pointx + r * 5, pointy + r * 5), 'black',
                                          'black')
            mask_arm_draw.ellipse((pointx - r * 4, pointy - r * 4, pointx + r * 4, pointy + r * 4), 'black', 'black')

            parse_arm = (np.array(mask_arm) / 255) * (parse_array == parse_id).astype(np.float32)
            agnostic.paste(im, None, Image.fromarray(np.uint8(parse_arm * 255), 'L'))

        agnostic.paste(im, None, Image.fromarray(np.uint8(parse_head * 255), 'L'))
        agnostic.paste(im, None, Image.fromarray(np.uint8(parse_lower * 255), 'L'))
        return agnostic


    def __getitem__(self, index):
        im_name = self.im_names[index]
        im_name = 'image/' + im_name
        if self.unpaired:
            key = 'unpaired'
        else:
            key = 'paired'


        # load cloth
        c_name= self.c_names[key][index]
        cloth = Image.open(osp.join(self.data_path, 'cloth', c_name)).convert('RGB')
        cloth = transforms.Resize(self.crop_size, interpolation=2)(cloth)
        cloth = self.transform(cloth)  # [-1,1]
        # load cloth_mask
        cloth_mask = Image.open(osp.join(self.data_path,'cloth-mask', c_name)).convert('L')
        cloth_mask = transforms.Resize(self.crop_size, interpolation=transforms.InterpolationMode.NEAREST) \
            (cloth_mask)

        cloth_mask = self.to_tensor(cloth_mask)
        cloth_mask = (cloth_mask > 0.5).float()
        aug_cloth_mask = cloth_mask[0].numpy() # numpy
        # aug_cloth_mask
        kernel_size = int(5 * (self.fine_width / 256))
        aug_cloth_mask = cv2.dilate(aug_cloth_mask.astype(np.uint8), kernel=np.ones((kernel_size, kernel_size)), iterations=4) # 原来3
        aug_cloth_mask = cv2.erode(aug_cloth_mask.astype(np.uint8), kernel=np.ones((kernel_size, kernel_size)), iterations=1)
        aug_cloth_mask = aug_cloth_mask.astype(np.float32)
        aug_cloth_mask = self.to_tensor(aug_cloth_mask)

        #--------------l1 loss need


        #----------------------


        # load inpaint_cloth
        inpaint_cloth = (1 - aug_cloth_mask) * cloth 

        
        # load ref_cloth
        down, up, left, right = mask2bbox(cloth_mask[0].numpy()) # 不知道这里是否用[0]
        ref_cloth = cloth[:, down:up, left:right]
        ref_cloth = (ref_cloth + 1.0) / 2.0
        ref_cloth = transforms.Resize((224, 224))(ref_cloth)
        ref_cloth = self.clip_normalize(ref_cloth)

        # load image
        im_pil_big = Image.open(osp.join(self.data_path, im_name)).convert('RGB')
        im_pil = transforms.Resize(self.crop_size, interpolation=2)(im_pil_big)
        im = self.transform(im_pil)

        # load agn_mask
        agn_mask_name = im_name.replace('image', 'agnostic-mask').replace('.jpg', '_mask.png')
        agn_mask_big = Image.open(osp.join(self.data_path, agn_mask_name)).convert('L')
        agn_mask = transforms.Resize(self.crop_size, interpolation=transforms.InterpolationMode.NEAREST)(agn_mask_big)
        agn_mask = self.to_tensor(agn_mask)
        agn_mask = (agn_mask > 0.5).float()

        
        #---------------------------------------------------------------
        # load parsing image
       
        parse_name = im_name.replace('image', 'image-parse-v3').replace('.jpg', '.png')
        im_parse_pil_big = Image.open(osp.join(self.data_path, parse_name))
        im_parse_pil = transforms.Resize(self.crop_size, interpolation=0)(im_parse_pil_big)
        parse = torch.from_numpy(np.array(im_parse_pil)[None]).long()

        # parse map
        labels = {
            0: ['background', [0, 10]],
            1: ['hair', [1, 2]],
            2: ['face', [4, 13]],
            3: ['upper', [5, 6, 7]],
            4: ['bottom', [9, 12]],
            5: ['left_arm', [14]],
            6: ['right_arm', [15]],
            7: ['left_leg', [16]],
            8: ['right_leg', [17]],
            9: ['left_shoe', [18]],
            10: ['right_shoe', [19]],
            11: ['socks', [8]],
            12: ['noise', [3, 11]]
        }

        parse_map = torch.FloatTensor(20, self.fine_height, self.fine_width).zero_()
        parse_map = parse_map.scatter_(0, parse, 1.0)
        new_parse_map = torch.FloatTensor(self.semantic_nc, self.fine_height, self.fine_width).zero_()

        for i in range(len(labels)):
            for label in labels[i][1]:
                new_parse_map[i] += parse_map[label]

        parse_onehot = torch.FloatTensor(1, self.fine_height, self.fine_width).zero_()
        for i in range(len(labels)):
            for label in labels[i][1]:
                parse_onehot[0] += parse_map[label] * i

        mask_id = torch.Tensor([5,6])
        mask = torch.isin(parse_onehot[0], mask_id).numpy()
        kernel_size = int(5 * (self.fine_width / 256))
        mask = cv2.dilate(mask.astype(np.uint8), kernel=np.ones((kernel_size, kernel_size)), iterations=1)
        mask = cv2.erode(mask.astype(np.uint8), kernel=np.ones((kernel_size, kernel_size)), iterations=1)
        #-------------------
        mask = crop_mask(mask.astype(np.uint8))
        mask = mask.astype(np.float32)
        #-------------------
        arm_mask = self.to_tensor(mask)
        arm = arm_mask * im
        #show('arm', ((arm.permute(1,2,0)+1)/2))

        

        # load ref_human_target_cloth
        mask_id = torch.Tensor([3])
        mask = torch.isin(parse_onehot[0], mask_id).numpy()
        kernel_size = int(5 * (self.fine_width / 256))
        mask = cv2.dilate(mask.astype(np.uint8), kernel=np.ones((kernel_size, kernel_size)), iterations=1)
        mask = cv2.erode(mask.astype(np.uint8), kernel=np.ones((kernel_size, kernel_size)), iterations=1)
        mask = mask.astype(np.float32)


        upper_body_mask = self.to_tensor(mask)
        if torch.sum(upper_body_mask) == 0:
            upper_body_mask = agn_mask
        upper_body = upper_body_mask * im

        #-------------------------------------------------------
        #print(upper_body_mask.numpy().astype(np.uint8).shape)
        main_cloth_mask = crop_main_cloth(upper_body_mask.numpy()[0].astype(np.uint8)).astype(np.float32)
        main_cloth = self.to_tensor(main_cloth_mask) * im
        main_cloth = transforms.Resize((int(self.fine_height/2), int(self.fine_height/2)), interpolation=2)(main_cloth) # ->(256,256)

        #show('main cloth', ((main_cloth.permute(1,2,0)+1)/2))
        #show('upper body cloth', ((upper_body.permute(1,2,0)+1)/2))

        #-------------------------------------------------------

        down, up, left, right = mask2bbox(upper_body_mask[0].numpy()) # 不知道这里是否用[0]
        ref_human = upper_body[:, down:up, left:right]
        ref_human = (ref_human + 1.0) / 2.0
        ref_human = transforms.Resize((224, 224))(ref_human)
        ref_human = self.clip_normalize(ref_human)


        # warp cloth
        warped_cloth_name = im_name.replace('image', 'cloth-warp' if not self.unpaired else 'unpaired-cloth-warp')

        warped_cloth = Image.open(osp.join(self.data_path, warped_cloth_name))
        warped_cloth = transforms.Resize(self.crop_size, interpolation=2)(warped_cloth)
        warped_cloth = self.transform(warped_cloth)
        warped_cloth_mask_name = im_name.replace('image',
                                                 'cloth-warp-mask' if not self.unpaired else 'unpaired-cloth-warp-mask')
        warped_cloth_mask = Image.open(osp.join(self.data_path, warped_cloth_mask_name))
        warped_cloth_mask = transforms.Resize(self.crop_size, interpolation=transforms.InterpolationMode.NEAREST) \
            (warped_cloth_mask)
        warped_cloth_mask = self.to_tensor(warped_cloth_mask)
        warped_cloth = warped_cloth * warped_cloth_mask
        

        #inpaint mask
        mask_id = torch.Tensor([3, 5, 6])
        mask = torch.isin(parse_onehot[0], mask_id).numpy()
        kernel_size = int(5 * (self.fine_width / 256))
        mask = cv2.dilate(mask.astype(np.uint8), kernel=np.ones((kernel_size, kernel_size)), iterations=3)
        mask = cv2.erode(mask.astype(np.uint8), kernel=np.ones((kernel_size, kernel_size)), iterations=1)
        mask = mask.astype(np.float32)
        inpaint_mask = self.to_tensor(mask) 
        #---------------------------------
        # l1 loss need
        human_feat = warped_cloth * inpaint_mask + im * (1 - inpaint_mask)
        #---------------------------------
        inpaint_mask = torch.logical_or(inpaint_mask, warped_cloth_mask).float()
        inpaint_mask = torch.logical_or(upper_body_mask, inpaint_mask).float()
        inpaint_human = im * (1 - inpaint_mask)


        # pose encoder need
        warped_cloth = transforms.Resize((int(self.fine_height/2), int(self.fine_height/2)), interpolation=2)(warped_cloth)

        """
         # load pose points
        pose_name = im_name.replace('image', 'openpose_json').replace('.jpg', '_keypoints.json')
        with open(osp.join(self.data_path, pose_name), 'r') as f:
            pose_label = json.load(f)
            pose_data = pose_label['people'][0]['pose_keypoints_2d']
            pose_data = np.array(pose_data)
            pose_data = pose_data.reshape((-1, 3))[:, :2]
        agnostic = self.get_agnostic(im_pil_big, im_parse_pil_big, pose_data)
        agnostic = transforms.Resize(self.crop_size, interpolation=2)(agnostic)
        agnostic = self.transform(agnostic)

        # load image-parse-agnostic
        parse_name = im_name.replace('image', 'image-parse-agnostic-v3.2').replace('.jpg', '.png')
        image_parse_agnostic = Image.open(osp.join(self.data_path, parse_name))
        image_parse_agnostic = transforms.Resize(self.crop_size, interpolation=0)(image_parse_agnostic)
        parse_agnostic = torch.from_numpy(np.array(image_parse_agnostic)[None]).long()
        parse_agnostic_map = torch.FloatTensor(20, self.fine_height, self.fine_width).zero_()
        parse_agnostic_map = parse_agnostic_map.scatter_(0, parse_agnostic, 1.0)
        new_parse_agnostic_map = torch.FloatTensor(self.semantic_nc, self.fine_height, self.fine_width).zero_()
        for i in range(len(labels)):
            for label in labels[i][1]:
                new_parse_agnostic_map[i] += parse_agnostic_map[label]
        hands_mask = torch.sum(new_parse_agnostic_map[5:7], dim=0, keepdim=True)
        hands_mask = torch.clamp(hands_mask, min=0.0, max=1.0)

        inpaint_human = inpaint_human * (1 - hands_mask) + agnostic * hands_mask
        """
    
        """
        # load gt_warped_mask
        gt_warped_masked_name = im_name.replace('image', 'gt_cloth_warped_mask')
        gt_cloth_mask_big = Image.open(osp.join(self.data_path, gt_warped_masked_name)).convert('L')
        gt_cloth_mask = transforms.Resize(self.crop_size, interpolation=transforms.InterpolationMode.NEAREST)(gt_cloth_mask_big)
        gt_cloth_mask = self.to_tensor(gt_cloth_mask)
        gt_cloth_mask = (gt_cloth_mask > 0.5).float()
        gt_cloth = gt_cloth_mask * im
        """
        
    


        #print(f"gt_cloth_mask min:{torch.min(gt_cloth_mask)}, max:{torch.max(gt_cloth_mask)}")
        #print(f"warp_cloth_mask min:{torch.min(warped_cloth_mask)}, max:{torch.max(warped_cloth_mask)}")

        #inpaint_mask = torch.logical_or(inpaint_mask, warped_cloth_mask).float() # 将模特身上的衣服和目标衣服预变形的mask结合，最大可能保留人的一些关键特征(胳膊)
        #inpaint_human = warped_cloth * inpaint_mask + im * (1 - inpaint_mask)
        
        ##inpaint_mask = agn_mask
        ##inpaint_human  = (1 - inpaint_mask) * im

        """
        # load pose points
        pose_name = im_name.replace('image', 'openpose_json').replace('.jpg', '_keypoints.json')
        with open(osp.join(self.data_path, pose_name), 'r') as f:
            pose_label = json.load(f)
            pose_data = pose_label['people'][0]['pose_keypoints_2d']
            pose_data = np.array(pose_data)
            pose_data = pose_data.reshape((-1, 3))[:, :2]
        agnostic = self.get_agnostic(im_pil_big, im_parse_pil_big, pose_data)
        agnostic = transforms.Resize(self.crop_size, interpolation=2)(agnostic)
        agnostic = self.transform(agnostic)

        # load image-parse-agnostic
        parse_name = im_name.replace('image', 'image-parse-agnostic-v3.2').replace('.jpg', '.png')
        image_parse_agnostic = Image.open(osp.join(self.data_path, parse_name))
        image_parse_agnostic = transforms.Resize(self.crop_size, interpolation=0)(image_parse_agnostic)
        parse_agnostic = torch.from_numpy(np.array(image_parse_agnostic)[None]).long()
        parse_agnostic_map = torch.FloatTensor(20, self.fine_height, self.fine_width).zero_()
        parse_agnostic_map = parse_agnostic_map.scatter_(0, parse_agnostic, 1.0)
        new_parse_agnostic_map = torch.FloatTensor(self.semantic_nc, self.fine_height, self.fine_width).zero_()
        for i in range(len(labels)):
            for label in labels[i][1]:
                new_parse_agnostic_map[i] += parse_agnostic_map[label]
        hands_mask = torch.sum(new_parse_agnostic_map[5:7], dim=0, keepdim=True)
        hands_mask = torch.clamp(hands_mask, min=0.0, max=1.0)

        inpaint_human = inpaint_human * (1 - hands_mask) + agnostic * hands_mask
        """

        #inpaint_feature = (1 - inpaint_mask) * im


        #--------------------------------------------------------
        #inpaint_cloth = gt_cloth #(1-cloth_mask) * cloth + cloth_mask * gt_cloth


        #print(f"cloth shape:{warped_cloth.shape}, warp cloth_mask shape:{warped_cloth_mask.shape}, cloth_mask shape:{cloth_mask.shape}")
        #align_warp_cloth, align_warp_cloth_mask =align_to_mask(warped_cloth, warped_cloth_mask, cloth_mask)

        # target warp cloth

        # sum 57651.0
        # sum 28791.7421875
        """
        # load openpose
        pose_name = im_name.replace('image','openpose_json').replace('.jpg', '_keypoints.json')
        with open(osp.join(self.data_path, pose_name), 'r') as f:
                pose_label = json.load(f)
                pose_data = pose_label['people'][0]['pose_keypoints_2d']
                pose_data = np.array(pose_data)
                pose_data = pose_data.reshape((-1, 3))[:, :2] # x坐标、y坐标和置信度，取前两个

                # rescale keypoints on the base of height and width
                pose_data[:, 0] = pose_data[:, 0] * (self.fine_width / 768)
                pose_data[:, 1] = pose_data[:, 1] * (self.fine_height / 1024)

        pose_mapping = get_coco_body25_mapping()
        point_num = len(pose_mapping)
        d = []
        for idx in range(point_num):
            ux = pose_data[pose_mapping[idx], 0]  # / (192)
            uy = (pose_data[pose_mapping[idx], 1])  # / (256)

            # scale posemap points
            px = ux  # * self.width
            py = uy  # * self.height

            d.append(kpoint_to_heatmap(np.array([px, py]), (self.fine_height, self.fine_width), 9))

        openpose_map =torch.stack(d) #[chw] 数据都是[0,1]
        openpose_map = (openpose_map > 0.5).float()
        #print(f"openpose_map.shape {openpose_map.shape},max {openpose_map.max()}")
        """
        """
        #load openpose_img
        openpose_name = im_name.replace('image','openpose_img').replace('.jpg', '_rendered.png')
        openpose_img = Image.open(osp.join(self.data_path, openpose_name)).convert('RGB')
        openpose_img = transforms.Resize((int(self.fine_height/2), int(self.fine_height/2)), interpolation=2)(openpose_img)
        openpose_img = self.transform(openpose_img)
        """
        """
        # load densepose_img
        densepose_name = im_name.replace('image','image-densepose')
        densepose_img = Image.open(osp.join(self.data_path, densepose_name)).convert('RGB')
        densepose_img = transforms.Resize(self.crop_size, interpolation=2)(densepose_img)
        densepose_img = self.transform(densepose_img)

        # load parse_imag
        parse_name = im_name.replace('image','image-parse-v3').replace('.jpg', '.png')
        parse_img = Image.open(osp.join(self.data_path, parse_name)).convert('RGB')
        parse_img = transforms.Resize(self.crop_size, interpolation=2)(parse_img)
        parse_img = self.transform(parse_img)
        """
        # load captions
        cloth_captions = ''
        #human_captions = ''
        #text = ''
        if self.cloth_caption_folder is not None:
            cloth_captions = self.cloth_captions_dict[c_name.split('_')[0]]
            # take a random caption if there are multiple
            if self.datamode == 'train':
                random.shuffle(cloth_captions)
            cloth_captions = ", ".join(cloth_captions)

        #if self.spatial_caption_folder is not None:
        #    text = self.spatial_captions_dict[im_name.split('_')[0]]
       

        



        #show('parse',(parse_img.permute(1,2,0)+1)/2)
        #show('image',(im.permute(1,2,0)+1)/2)

        #print(f"file_name:{self.im_names[index]}")
        result = {
            "ref_human":ref_human,
            "ref_cloth":ref_cloth,
            "human":im,
            "cloth":cloth,
            #---------------------------
            #"gt_warp_cloth_mask":gt_cloth_mask,
            "inpaint_human":inpaint_human,
            "inpaint_mask":inpaint_mask,
            "inpaint_cloth":inpaint_cloth,
            "cloth_mask":cloth_mask,
            "inpaint_cloth_mask":aug_cloth_mask,
            "inpaint_skin":arm,
            "warp_cloth":warped_cloth,

            'human_feat':human_feat,
            'cloth_feat':cloth_feat,
            #"cloth_f":cloth*cloth_mask,
            #"human_f":im*agn_mask,
            #--------------------------------
            #"warp_mask":warped_cloth_mask,
            #"gt_cloth_mask":gt_cloth_mask,
            #"inpaint_mask":inpaint_mask,
            #"inpaint_feature":inpaint_feature,
            ## "inpaint":inpaint, # 因为我要预测mask，所以最终的inpaint得到肯定是在get_learned_condaition里面得到
            #----------------------------------
            #"human":im,
            "caption": "upper garment" ,#cloth_captions,
            #"human_captions":human_captions,
            #"text":text,
            # 训练时以下才需要
            "agn_mask": agn_mask,

            "main_cloth":main_cloth,
            "warp_cloth_mask":warped_cloth_mask,
            #------------
            #"cloth_mask": cloth_mask,
            #"openpose_map": openpose_map, # 值>0.5则是1
            ## 'pose_img': openpose_img,
            #"densepose_img":densepose_img,
            #"parse_img":parse_img,
            "im_name": self.im_names[index],
        }
        return result

    def __len__(self):
        return len(self.im_names)



if __name__ == '__main__':
    dataset = CPDataset('DATA/vitonhd/VITON-HD', 512, mode='train', unpaired=False, caption_folder='captions.json')
    loader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=1)
    for data in loader:
        for i in data.keys():
            print(f"{i}:{data[i].shape if not isinstance(data[i], list) else data[i]}")
            if not isinstance(data[i], list):
                if data[i].shape[1] > 3:
                    for j in range(data[i].shape[1]):
                        show(f"{i}-{j}", ((data[i][:,j,:,:].unsqueeze(3)+1)/2)[0])
                else:
                    show(i, ((data[i].permute(0,2,3,1)+1)/2)[0])

        break
        
        continue
