{"task":{"0":"Load and preprocess text data from a CSV file, then batch and shuffle the data for training.","1":"Load and decode images from a tar file in a webdataset format","2":"Load and preprocess image-caption pairs from a CSV file and a directory of images","3":"Read and process data from CSV files based on the configuration settings.","4":"Load and process a dataset by filtering, mapping, and zipping the data."},"prompt":{"0":"from torchdata.datapipes.iter import *\nfrom torchdata.datapipes.utils import *\nfrom torchdata.datapipes.map import *\n\ndef load_data(file_list, train_batch_size=64):\n    \"\"\"\n    Load and preprocess text data from a CSV file, then batch and shuffle the data for training.\n    \"\"\"\n    ","1":"from torchdata.datapipes.iter import *\ndef decode(item):\n    key, value = item\n    if key.endswith(\".txt\"):\n        return key, value.read().decode(\"utf-8\")\n    if key.endswith(\".bin\"):\n        return key, value.read().decode(\"utf-8\")\n    \ndef func(file_list):\n    \"\"\"\n    Load and decode images from a tar file in a webdataset format\n    \"\"\"\n    ","2":"from torchdata.datapipes.iter import *\n\ndef func(csv_files, buffer_size):\n    \"\"\"\n    Load and preprocess image-caption pairs from a CSV file and a directory of images\n    \"\"\"\n    ","3":"from torchdata.datapipes.iter import IterableWrapper, FileLister, FileOpener, StreamReader\n\ndef read_data(file_dir):\n    \"\"\"\n    Read and process data from CSV files based on the configuration settings.\n    \"\"\"\n    ","4":"from torchdata.datapipes.iter import *\n\ndef filter_fn(data):\n    return data % 2 == 0\n\ndef map_fn(data):\n    return data + 1\n\ndef func(data_list, filter_fn=filter_fn, map_fn=map_fn):\n    \"\"\"\n    Load and process a dataset by filtering, mapping, and zipping the data.\n    \"\"\"\n    "},"canonical_solution":{"0":"['\\n    url_dp = IterableWrapper(file_list)\\n    data_dp = FileOpener(url_dp, mode=\"b\")\\n    data= data_dp.parse_csv()\\n    data = data.shuffle().batch(batch_size=train_batch_size, drop_last=False)\\n    return data\\n\\n']","1":"['\\n    dp = FileOpener(file_list, mode=\"b\").load_from_tar().map(decode).webdataset()\\n    return dp\\n']","2":"['\\n    train_dp = FileOpener(csv_files)\\n    train_dp = train_dp.parse_csv()\\n    train_dp = train_dp.shuffle(buffer_size=buffer_size)\\n    train_dp = train_dp.sharding_filter()\\n\\n    return train_dp\\n\\n']","3":"[\"\\n    datapipe = FileLister(file_dir).filter(lambda fname: fname.endswith('.csv'))\\n    datapipe = IterableWrapper(datapipe)\\n    datapipe = FileOpener(datapipe, mode='t')\\n    summary_dp = datapipe.parse_csv(skip_lines=0)\\n    dataset = summary_dp\\n    return dataset\\n\\n\"]","4":"['\\n    dp = IterableWrapper(data_list)\\n    dp = dp.filter(filter_fn=filter_fn)\\n    dp = dp.map(map_fn)\\n    dp1,dp2 = dp.fork(2)\\n    dp = dp1.zip(dp2)\\n    return dp\\n\\n']"},"test_input":{"0":"{'file_list': \"['<real_path>\/test_data.csv']\", 'train_batch_size': '64'}","1":"{'file_list': \"['<real_path>\/test_data.tar']\"}","2":"{'csv_files': \"['<real_path>\/test_data.csv']\", 'buffer_size': '1'}","3":"{'file_dir': \"'<real_path>\/'\"}","4":"{'data_list': '[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]', 'filter_fn': 'filter_fn', 'map_fn': 'map_fn'}"},"test":{"0":"\n\nMETADATA = {\n    'author': '',\n    'dataset': 'test',\n    'type': 'test'\n}\n\n\ndef check():\n    assert len(list(load_data(file_list=[\"<real_path>\/test_data.csv\"], train_batch_size=64))) == len(list(FileOpener(IterableWrapper([\"<real_path>\/test_data.csv\"]), mode=\"b\").parse_csv().shuffle().batch(batch_size=64, drop_last=False)))\n\n\n\n\n","1":"\n\nMETADATA = {\n    'author': '',\n    'dataset': 'test',\n    'type': 'test'\n}\n\n\ndef check():\n    assert list(func([\"<real_path>\/test_data.tar\"])) == list(FileOpener([\"<real_path>\/test_data.tar\"], mode=\"b\").load_from_tar().map(decode).webdataset())\n\n    assert type(func([\"<real_path>\/test_data.tar\"])) == type(FileOpener([\"<real_path>\/test_data.tar\"], mode=\"b\").load_from_tar().map(decode).webdataset())\n\n\n","2":"\n\nMETADATA = {\n    'author': '',\n    'dataset': 'test',\n    'type': 'test'\n}\n\n\ndef check():\n    assert list(func(csv_files=[\"<real_path>\/test_data.csv\"], buffer_size=1)) == list(FileOpener([\"<real_path>\/test_data.csv\"]).parse_csv().shuffle(buffer_size=1).sharding_filter())\n\n\n","3":"\n\nMETADATA = {\n    'author': '',\n    'dataset': 'test',\n    'type': 'test'\n}\n\n\ndef check():\n    assert len(list(read_data('<real_path>\/'))) == len(list(FileLister('<real_path>\/').filter(lambda fname: fname.endswith('.csv')).open_files(mode='t').parse_csv(skip_lines=0)))\n\n\n","4":"\n\nMETADATA = {\n    'author': '',\n    'dataset': 'test',\n    'type': 'test'\n}\n\n\ndef check():\n    assert len(list(func([1,2,3,4,5,6,7,8,9,10], filter_fn=filter_fn, map_fn=map_fn))) == len(list(IterableWrapper([1,2,3,4,5,6,7,8,9,10]).filter(filter_fn=filter_fn).map(map_fn).fork(2)[0].zip(IterableWrapper([1,2,3,4,5,6,7,8,9,10]).filter(filter_fn=filter_fn).map(map_fn).fork(2)[1])))\n\n    assert type(list(func([1,2,3,4,5,6,7,8,9,10], filter_fn=filter_fn, map_fn=map_fn))[0]) == tuple\n\n\n"}}