from tqdm import tqdm
import torch
from datasets import load_dataset, concatenate_datasets, load_from_disk
import numpy as np
from transformers import AutoTokenizer
import datasets
import argparse
import os
import json
from huggingface_hub import delete_repo

parser = argparse.ArgumentParser()

parser.add_argument("--dataset", type=str)
parser.add_argument("--output", type=str)


args = parser.parse_args()

dataset_dir = args.dataset

output_dir = args.output


if __name__ == "__main__":
    my_dataset = load_from_disk(dataset_dir)
    my_dataset = my_dataset.remove_columns(["rejected"])
    train_dataset = my_dataset.rename_column("resp0", "rejected")
    train_dataset.save_to_disk("./" + output_dir)
