#!/bin/bash

#profiling for auto gpu mapping 
python profile_llama.py --min-tokens 256 --max-tokens 30720 --device auto --output-file profiling_reuslt/end_to_end_time_auto_int8.log --aggressive-memory --load-in-8bit
python profile_llama.py --min-tokens 256 --max-tokens 30720 --device auto --output-file profiling_reuslt/end_to_end_time_auto_nf4.log --aggressive-memory --load-in-4bit

#profiling for single gpu mapping
python profile_llama.py --min-tokens 256 --max-tokens 30720 --device cuda:0 --output-file profiling_reuslt/end_to_end_time_gpu0_int8.log --aggressive-memory --load-in-8bit
python profile_llama.py --min-tokens 256 --max-tokens 30720 --device cuda:0 --output-file profiling_reuslt/end_to_end_time_gpu0_nf4.log --aggressive-memory --load-in-4bit


python profile_llama.py --min-tokens 256 --max-tokens 30720 --device cpu --output-file profiling_reuslt/end_to_end_time_cpu_int8.log --load-in-8bit


#bert
python profile_bert.py --min-tokens 256 --max-tokens 30720 --device cuda --output-file profiling_reuslt_bert/end_to_end_time_cuda.log --aggressive-memory