from tqdm import tqdm
import torch
from datasets import load_dataset, concatenate_datasets
import numpy as np
from transformers import AutoTokenizer
import datasets
import argparse

parser = argparse.ArgumentParser()

parser.add_argument("--dataset", type=str)
parser.add_argument("--output", type=str)

args = parser.parse_args()

dataset_dir = args.dataset
output_dir = args.output

if __name__ == "__main__":
    my_dataset = load_dataset(dataset_dir, split="train_prefs",
                              download_mode="force_redownload", ignore_verifications=True)
    my_dataset.save_to_disk("./"+output_dir)
