from datasets import load_dataset
from tqdm import tqdm

ds = load_dataset("JeanKaddour/minipile", split='test[:200]')
promptfile = open("prompts.txt", "w")
groundtruthfile = open("groundtruth.txt", "w")

for row in tqdm(ds):
    if (len(row["text"].split()) > 25):
        prompt = ' '.join(row["text"].split()[:25])
        prompt2 = ' ' .join(row["text"].split()[20:25])
        promptfile.write(prompt + "\n")

promptfile.close()
groundtruthfile.close()