#!/bin/bash
clear
export OMP_NUM_THREADS=8

SPARSITY=${SPARSITY:-0.50}
ITERATIONS=${ITERATIONS:-3}
LR=${LR:-1e-5}

export TRANSFORMERS_CACHE=.

MODEL=microsoft/phi-1_5
DATA_SIZE=128
SEED=42

CUDA_VISIBLE_DEVICES=0 python iht_with_full_gradient_update.py \
    --seed ${SEED} \
    --model_name_or_path ${MODEL} \
    --dtype bfloat16 \
    --dataset_name_or_path ./data/red_pajama_n=4096_2048_context_length_phi-1_5.pth \
    --pre_decoder_modules 'model.embed_tokens' 'model.embed_dropout' \
    --decoder_blocks 'model.layers' \
    --post_decoder_modules 'model.final_layernorm' 'lm_head' \
    --module_regex '.*layers.*((q|k|v)_proj|dense|fc\d+)$' \
    --low_cpu_mem_usage \
    --sequence_length 2048 \
    --calibration_dataset_size ${DATA_SIZE} \
    --iterations ${ITERATIONS} \
    --sparsity ${SPARSITY} \
    --pruning_method FastOBC \
    --sequential \
    --rel_damp 0.01 \
    --attn_implementation flash_attention_2 \
    --lr ${LR} \
    --finetune_batch_size 1 \
    --output_dir "./iht_results/${MODEL}/FastOBC/iterations=${ITERATIONS}-sp=${SPARSITY}-alpha=${ALPHA}-sequential=True_data-size=${DATA_SIZE}_seed=${SEED}"