import json
from PIL import Image
import os

# Input and output file paths
# input_file = "/datanfs2/medllava/llava/mutimodal_dataset/trump/muti_knowledge/Jiwwa_Trump_sample20.json"
# input_dir = "/datanfs4/data_ex/two"
# input_files = os.listdir(input_dir)
input_files = ['/datanfs2/shenruoyan/data/SIUdata/LLaVA-main/multi_knowledge/muti_knowledge/three/Trump_Jwwa_Elon_sample24.json','/datanfs2/shenruoyan/data/SIUdata/LLaVA-main/multi_knowledge/muti_knowledge/three/Trump_Kitty_Elon_sample24.json']
# import ipdb;ipdb.set_trace()
output_dir = "/datanfs4/data_ex/internvl-version/three"

# Function to get image dimensions
def get_image_dimensions(image_path):
    image_path = os.path.join("/datanfs2/shenruoyan/data/SIUdata/LLaVA-main",image_path)
    # try:
    with Image.open(image_path) as img:
        return img.size  # Returns (width, height)
    # except Exception as e:
    #     # print(f"Error loading image {image_path}: {e}")
    #     raise ValueError((f"Error loading image {image_path}: {e}"))
        # return None
for file in input_files:
    # Read the dataset from JSON file
    try:
        with open(file, 'r', encoding='utf-8') as f:
            dataset = json.load(f)
    except FileNotFoundError:
        print(f"Error: Input file '{file}' not found.")
        exit(1)
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in '{file}'.")
        exit(1)

    # Process dataset and write to JSONL
    # with open(os.path.join(output_dir,file), 'w', encoding='utf-8') as f:
    writer = open(os.path.join(output_dir,file).replace('.json', '.jsonl'), 'w')
    print(os.path.join(output_dir,file).replace('.json', '.jsonl'))
    for item in dataset:
        # Get image dimensions
        image_path = item['image']
        dimensions = get_image_dimensions(image_path)
        
        # Create new item with width and height
        new_item = item.copy()
        # if dimensions:
        new_item['width'], new_item['height'] = dimensions
        # else:
        #     new_item['width'], new_item['height'] = None, None  # Handle missing images
        writer.write(json.dumps(new_item, ensure_ascii=False) + '\n')
        # Write to JSONL
        # jsonl.dump(new_item, f, ensure_ascii=False)
        # f.write('\n')
    # print(item)
    print(f"JSONL file '{file}' has been created.")