
# ---------------------- Step 1: SIMT + Activation-aware ALS ----------------------
# Compress pretrained LLaMA-7B using SIMT profiling and ALS factorization.
# Saves the compressed model checkpoint to current directory.

# python -u ALS_ActLR.py --model jeffwan/llama-7b-hf --step 1 --ratio 0.6  --save_path .  --tau 0.003 --rho 0.003 --iter 3

# ---------------------- Step 2: UW-MOD (Uncertainty-Weighted Multi-Objective Distillation) ----------------------
# Perform knowledge distillation on top of the compressed model from Step 1.
# Uses uncertainty-weighted loss balancing (CE, KD, FD).

# python -u ALS_ActLR.py  --model jeffwan/llama-7b-hf  \
#                         --student_ckpt  jeffwan_llama_7b_hf_ratio0.6_tau0.003_rho0.003_iter3.pt \
#                         --step 2  \
#                         --save_path ./runs \
#                         --epochs 50 \
#                         --kd_loader_batch_size 3   

# ---------------------- Step 3: Evaluation ----------------------
# Evaluate the distilled student model on perplexity and commonsense benchmarks.

# python -u ALS_ActLR.py  --model_path  student_distilled.pt  --step 3  

