import os

import datasets

import os
import datasets


# load the test json file
def read_json(file=None):
    if file is None:
        raise ValueError("Please provide a valid file path.")
    data = datasets.load_dataset("json", data_files=file, split='train')
    print(data)
    # get the first 5 items
    # for i in range(5):
    #     print(data[i])
    return data

# now parse the json file to get the image path only and check if that path exists
def check_image_paths(data=None):
    # if data is None or root_dir is None:
    #     raise ValueError("Please provide a valid data and root directory.")
    image_paths = []
    for item in data:
        image_path = item['image_path']
        full_path = os.path.join(image_path)
        if os.path.exists(full_path):
            image_paths.append(full_path)
        else:
            print(f"Image path does not exist: {full_path}")
    return image_paths

data = read_json(file='path')
check_image_paths(data)

