import h5py
import argparse
import numpy as np

def test_shapes_and_datatypes(h5_file):
    H, W = h5_file["depth_images"].shape[2:]
    datasets = {
        "depth_images": {"shape": (None, 1, H, W), "dtype": np.float32},
        "foreground_mask": {"shape": (None, 1, H, W), "dtype": np.float32},
        "image_instance_indices": {"shape": (None, 2), "dtype": np.int64},
        "instance_mask_bboxes": {"shape": (None, 4), "dtype": np.float32},
        "instance_masks": {"shape": (None, 1, H, W), "dtype": np.float32},
        "instance_masks_images": {"shape": (None, 1), "dtype": np.int64},
        "rgb_images": {"shape": (None, 3, H, W), "dtype": np.float32},
        "sequence_indices": {"shape": (None, 2), "dtype": np.int64},
    }

    for ds_name, ds_info in datasets.items():
        ds = h5_file[ds_name]
        assert ds.shape[1:] == ds_info["shape"][1:], f"Invalid shape for dataset '{ds_name}': {ds.shape}"
        assert ds.dtype == ds_info["dtype"], f"Invalid datatype for dataset '{ds_name}': {ds.dtype}"

def test_value_ranges(h5_file):
    datasets = [
        {
            "name": "depth_images",
            "min_value": 0,
            "max_value": 1
        },
        {
            "name": "foreground_mask",
            "min_value": 0,
            "max_value": 1
        },
        {
            "name": "instance_masks",
            "min_value": 0,
            "max_value": 1
        },
        {
            "name": "rgb_images",
            "min_value": 0,
            "max_value": 1
        },
        {
            "name": "instance_mask_bboxes",
            "min_value": 0,
            "max_value": 2048
        }
    ]

    for ds_info in datasets:
        ds_name = ds_info["name"]
        ds = h5_file[ds_name]
        min_value = ds_info["min_value"]
        max_value = ds_info["max_value"]

        for i in range(ds.shape[0]):
            assert np.all(ds[i] >= min_value) and np.all(ds[i] <= max_value), f"Invalid value range in dataset '{ds_name}' at index {i}"
            print(f"Processing dataset '{ds_name}': {i / ds.shape[0] * 100:.2f}%\r", end="")

        print(f"Processing dataset '{ds_name}': 100.00%")

def test_invalid_values(h5_file):
    datasets = [
        "rgb_images",
        "depth_images",
        "foreground_mask",
        "instance_mask_bboxes",
        "instance_masks",
    ]

    for ds_name in datasets:
        ds = h5_file[ds_name]
        for i in range(ds.shape[0]):
            assert not np.isnan(ds[i]).any(), f"Dataset '{ds_name}' contains NaN values"
            assert not np.isinf(ds[i]).any(), f"Dataset '{ds_name}' contains Inf values"
            assert not np.isneginf(ds[i]).any(), f"Dataset '{ds_name}' contains -Inf values"
            assert not np.isposinf(ds[i]).any(), f"Dataset '{ds_name}' contains +Inf values"
            assert np.std(ds[i]) != 0, f"Dataset '{ds_name}' has 0 std"

            print(f"Processing dataset '{ds_name}': {i / ds.shape[0] * 100:.2f}%\r", end="")

        print(f"Processing dataset '{ds_name}': 100.00%")
        

def test_instance_masks_correspondence(h5_file):
    instance_masks_images = h5_file["instance_masks_images"]
    image_instance_indices = h5_file["image_instance_indices"]

    num_images = image_instance_indices.shape[0]
    for i in range(num_images):
        start_idx, num_indices = image_instance_indices[i]
        for j in range(start_idx, start_idx + num_indices):
            assert instance_masks_images[j][0] == i, f"Invalid correspondence between instance masks and images at index {j}"

def test_sequence_indices(h5_file):
    if h5_file["sequence_indices"].shape[0] == 0:
        return

    sequence_indices = h5_file["sequence_indices"]
    num_images = h5_file["rgb_images"].shape[0]

    for i in range(sequence_indices.shape[0] - 1):
        cur_seq_end = sequence_indices[i][0] + sequence_indices[i][1]
        next_seq_start = sequence_indices[i + 1][0]
        assert cur_seq_end == next_seq_start, f"Invalid sequence indices at row {i}: sequences are not contiguous"

    # Check the first and last sequence indices
    first_seq_start = sequence_indices[0][0]
    last_seq_end = sequence_indices[-1][1]
    assert first_seq_start == 0, f"Invalid first sequence start index: {first_seq_start}"
    assert last_seq_end < num_images, f"Invalid last sequence end index: {last_seq_end}"

def main():
    parser = argparse.ArgumentParser(description="Extensive testing of an HDF5 dataset.")
    parser.add_argument("filename", help="Path to the HDF5 file.")
    args = parser.parse_args()

    with h5py.File(args.filename, 'r') as h5_file:
        print(f"Testing dataset in {args.filename}:")

        print("Testing dataset shapes and datatypes...")
        test_shapes_and_datatypes(h5_file)

        print("Testing dataset value ranges...")
        test_value_ranges(h5_file)

        print("Testing instance masks correspondence...")
        test_instance_masks_correspondence(h5_file)

        print("Testing sequence indices...")
        test_sequence_indices(h5_file)

        print("Testing invalid values...")
        test_invalid_values(h5_file)

        print("All tests passed successfully!")

if __name__ == "__main__":
    main()
