import webdataset as wds

def streaming_dataset():
    # Use braceexpand syntax for WebDataset
    path_pattern = "data/laion/000{00..04}.tar"

    dataset = (
        wds.WebDataset(path_pattern, workersplitter=wds.split_by_worker, shardshuffle=False)
        .decode("pil", handler=wds.warn_and_continue)
        .to_tuple("image.png", "caption.txt", handler=wds.warn_and_continue)
    )
    return dataset

if __name__ == "__main__":
    ds = streaming_dataset()
    num_samples = 5
    for i, example in enumerate(ds):
        if i >= num_samples:
            break
        image, text = example
        image.save(f"sample_{i}.png")
        print(text + "\n")