import webdataset as wds

def streaming_dataset():
    # Use braceexpand syntax for WebDataset
    path_pattern = "data/cc/cc3m-train-0{000..199}.tar"

    dataset = (
        wds.WebDataset(path_pattern, workersplitter=wds.split_by_worker)
        .decode("pil", handler=wds.warn_and_continue)
        .to_tuple("jpg", "txt", handler=wds.warn_and_continue)
    )
    return dataset

if __name__ == "__main__":
    ds = streaming_dataset()
    num_samples = 5
    for i, example in enumerate(ds):
        if i >= num_samples:
            break
        image, text = example
        image.save(f"sample_{i}.jpg")
        print(text + "\n")