#!/bin/bash

# This script showcases how to run ZipLM oneshot pruning on a fine-tuned SQuAD model, reproducing our results in Table 2 of the paper.

# ZipLM inputs:
# --model_name_or_path: path to any SQuAD fine-tuned model; for demo we will use a randomly picked open-sourced bert-base-uncased model from HuggingFace Hub
# --do_ziplm_oneshot: flag to trigger ZipLM oneshot pruning
# --ziplm_target: target speedup for pruning
# --timings_file: file to read inference environment timings; for demo we prepared timings for V100 GPU

CUDA_VISIBLE_DEVICES=0 python run_qa.py \
    --model_name_or_path neuralmagic/oBERT-teacher-squadv1 \
    --dataset_name squad \
    --do_train \
    --fp16 \
    --do_eval \
    --evaluation_strategy epoch \
    --per_device_train_batch_size 32 \
    --per_device_eval_batch_size 32 \
    --max_seq_length 384 \
    --output_dir output_dir/test \
    --overwrite_output_dir \
    --preprocessing_num_workers 8 \
    --seed 42 \
    --report_to none \
    --do_ziplm_oneshot \
    --ziplm_target 2 \
    --timings_file bertbase_squad_V100.txt
