from tqdm import tqdm
import llm_blender
import torch
from datasets import load_dataset, concatenate_datasets
import numpy as np
from transformers import AutoTokenizer
import datasets
import argparse

if __name__ == "__main__":

    my_dataset = load_dataset("ZHLiu627/ultrafeedback_binarized_with_response_full_part0", split="train_prefs",
                              download_mode="force_redownload", ignore_verifications=True)

    my_dataset.push_to_hub(
        "YYYYYYibo/ultrafeedback_binarized_with_response_full_part0", split="train_prefs", private=False)

    my_dataset.save_to_disk(
        "../datasets/ultrafeedback_binarized_with_response_full_part0")

    small_dataset = my_dataset.select(range(2000))

    small_dataset.push_to_hub(
        "ultrafeedback_binarized_with_response_full_part0_mini", split="train_prefs", private=False)

    small_dataset.save_to_disk(
        "../datasets/ultrafeedback_binarized_with_response_full_part0_mini")

    # -----------------------------------------------

    my_dataset = load_dataset("ZHLiu627/ultrafeedback_binarized_with_response_full_part1", split="train_prefs",
                              download_mode="force_redownload", ignore_verifications=True)

    my_dataset.push_to_hub(
        "YYYYYYibo/ultrafeedback_binarized_with_response_full_part1", split="train_prefs", private=False)

    my_dataset.save_to_disk(
        "../datasets/ultrafeedback_binarized_with_response_full_part1")

    small_dataset = my_dataset.select(range(2000))

    small_dataset.push_to_hub(
        "ultrafeedback_binarized_with_response_full_part1_mini", split="train_prefs", private=False)

    small_dataset.save_to_disk(
        "../datasets/ultrafeedback_binarized_with_response_full_part1_mini")

    # -----------------------------------------------

    my_dataset = load_dataset("ZHLiu627/ultrafeedback_binarized_with_response_full_part2", split="train_prefs",
                              download_mode="force_redownload", ignore_verifications=True)

    my_dataset.push_to_hub(
        "YYYYYYibo/ultrafeedback_binarized_with_response_full_part2", split="train_prefs", private=False)

    my_dataset.save_to_disk(
        "../datasets/ultrafeedback_binarized_with_response_full_part2")

    small_dataset = my_dataset.select(range(2000))

    small_dataset.push_to_hub(
        "ultrafeedback_binarized_with_response_full_part2_mini", split="train_prefs", private=False)

    small_dataset.save_to_disk(
        "../datasets/ultrafeedback_binarized_with_response_full_part2_mini")
