#!/bin/sh
cd ..

# for dataset in stackexchange wikipedia cc github pubmed_abstracts openwebtext2 freelaw math nih uspto hackernews enron books3 pubmed_central gutenberg arxiv bookcorpus2 opensubtitles youtubesubtitles ubuntu europarl philpapers
for dataset in  enron pubmed_abstracts nih philpapers
do
    echo "dataset: $dataset"
    # CUDA_VISIBLE_DEVICES=$gpu_id python di.py --split $split_name --dataset_name $dataset --model_name $model_name --batch_size $batch_size
    python data_creator.py --dataset_name $dataset 
done