



$DATA_DIR=${1:-"/path/to/home/data"}

python decontaminate.py \
    --dataset $DATA_DIR/nan_do_datasets/leetcode_contests/ \
    --split train \
    --ngram_size 10 \
    --problem_column description \
    --cleanup \
    --output_dir $DATA_DIR/nan_do_datasets/leetcode_contests_cleaned/















python decontaminate.py \
    --dataset $DATA_DIR/codeparrot/apps/ \
    --split train \
    --ngram_size 10 \
    --problem_column question \
    --cleanup \
    --output_dir $DATA_DIR/codeparrot/apps_cleaned_train/

python decontaminate.py \
    --dataset $DATA_DIR/codeparrot/apps/ \
    --split test \
    --ngram_size 10 \
    --problem_column question \
    --cleanup \
    --output_dir $DATA_DIR/codeparrot/apps_cleaned_test/
