import pandas as pd
import os
import random
random.seed({100, 200, 300})
DIR = 'ORIGINAL MMLU DATASET'
TARGET_DIR='MMLU DATASET'
files = os.listdir(DIR)
for file in files: 
    data = pd.read_csv(os.path.join(DIR, file))
    data_size = len(data)
    if data_size <= 108: 
        idxs = random.sample([i for i in range(data_size)], 8)
    else: 
        idxs = random.sample([i for i in range(data_size)], 9)
    df = pd.DataFrame([data.loc[i] for i in idxs])
    df.to_csv(os.path.join(TARGET_DIR, file), index=False)