#!/bin/bash

python -m awq.entry --model_path "meta-llama/Llama-2-7b-hf" \
--w_bit 4 --q_group_size 128 --run_awq \
--dump_awq awq_cache/llama2-7b-w4-g128_mine_new.pt --no-use-cache

# Generate real quantized weights (INT4)
# python -m awq.entry --model_path "meta-llama/Llama-2-7b-hf" \
#     --w_bit 4 --q_group_size 128 \
#     --load_awq awq_cache/llama2-7b-w4-g128_mine_new.pt \
#     --q_backend real --dump_quant quant_cache/llama-2-7b-chat-w4-g128_mine_preyarn.pt