from PIL import Image
import os
import hashlib
import json

def image_hash(image_path):
    with open(image_path, 'rb') as f:
        image_data = f.read()
        return hashlib.md5(image_data).hexdigest()

def white_ratio(image_path):
    image = Image.open(image_path)
    width, height = image.size
    white_pixels = 0
    for x in range(width):
        for y in range(height):
            pixel = image.getpixel((x, y))
            if pixel == (255, 255, 255):  
                white_pixels += 1
    total_pixels = width * height
    return white_pixels / total_pixels

def find_duplicates_with_white_ratio(folder_path):
    image_hashes = {}
    white_ratios = []
    duplicates = []

    for filename in os.listdir(folder_path):
        if filename.endswith('.png') or filename.endswith('.jpg'):
            image_path = os.path.join(folder_path, filename)
            hash_value = image_hash(image_path)
            white_ratio_value = white_ratio(image_path)

            if hash_value in image_hashes:
                duplicates.append((image_hashes[hash_value], filename))
            else:
                image_hashes[hash_value] = filename

            white_ratios.append(white_ratio_value)

    return duplicates, white_ratios

folder_path = "../results/test/baseline/layouttransformer" 
img_path = folder_path+"/img" 
duplicates, white_ratios = find_duplicates_with_white_ratio(img_path)

if duplicates:
    print("repeat:")
    for dup in duplicates:
        print(f"{dup[0]} 和 {dup[1]}")

print(len(white_ratios))
dict_100 = {'ratio': [1-w for w in white_ratios]}
with open(folder_path+'/white.json', 'w') as json_file:
    json.dump(dict_100, json_file)

