import json
from PIL import Image
import os

# Input and output file paths
# input_file = "/datanfs2/medllava/llava/mutimodal_dataset/trump/muti_knowledge/Jiwwa_Trump_sample20.json"
input_dir = "/datanfs4/data_ex/two"
input_files = os.listdir(input_dir)
# import ipdb;ipdb.set_trace()
output_dir = "/datanfs4/data_ex/internvl-version/two"

# Function to get image dimensions
def get_image_dimensions(image_path):
    image_path = os.path.join("/datanfs4/data_ex/train_images",image_path)
    # try:
    with Image.open(image_path) as img:
        return img.size  # Returns (width, height)
    # except Exception as e:
    #     # print(f"Error loading image {image_path}: {e}")
    #     raise ValueError((f"Error loading image {image_path}: {e}"))
        # return None
for file in input_files:
    # Read the dataset from JSON file
    try:
        with open(os.path.join(input_dir,file), 'r', encoding='utf-8') as f:
            dataset = json.load(f)
    except FileNotFoundError:
        print(f"Error: Input file '{input_file}' not found.")
        exit(1)
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in '{input_file}'.")
        exit(1)

    # Process dataset and write to JSONL
    # with open(os.path.join(output_dir,file), 'w', encoding='utf-8') as f:
    writer = open(os.path.join(output_dir,file).replace('.json', '.jsonl'), 'w')
    for item in dataset:
        # Get image dimensions
        image_path = item['image']
        dimensions = get_image_dimensions(image_path)
        
        # Create new item with width and height
        new_item = item.copy()
        # if dimensions:
        new_item['width'], new_item['height'] = dimensions
        # else:
        #     new_item['width'], new_item['height'] = None, None  # Handle missing images
        writer.write(json.dumps(new_item, ensure_ascii=False) + '\n')
        # Write to JSONL
        # jsonl.dump(new_item, f, ensure_ascii=False)
        # f.write('\n')

    print(f"JSONL file '{file}' has been created.")