#!/bin/bash

set -euo pipefail

export TOKENIZERS_PARALLELISM='false'
export CUDA_LAUNCH_BLOCKING='1'
export NCCL_P2P_DISABLE='0'
export TORCH_USE_CUDA_DSA='1'

MODEL="/Path/Mixtral-8x7B-v0.1"
BASE_RESULTS="./output/results/mixtral/ablation"
BASE_MODELS="./output/model/mixtral/ablation"
BASE_OPTS="./output/opt/mixtral/ablation"
METRICS_SAVE_DIR="./output/metrics/mixtral"

export CUDA_VISIBLE_DEVICES=0,1
python adapt_mixtral.py \
  --original_model_name="${MODEL}" \
  --adapt_mode=adapt \
  --optimize_order=prune_first \
  --min_margin=1 \
  --max_neighbor_diff=1 \
  --objective_type=log \
  --prune_method=mutual_information \
  --pruning_ratio=0.75 \
  --sigmoid_t=0.001 \
  --merge_method=output-cosine \
  --merging_ratio=0.67 \
  --merge=norm_drop_fre \
  --cluster=Graph_Partitioning \
  --calib_set=c4 \
  --train_batch_size=16 \
  --eval_batch_size=8 \
  --n_sentences=32 \
  --evaluation_mode=accuracy \
  --eval_task=arc_challenge,arc_easy,boolq,hellaswag,mmlu,openbookqa,rte,winogrande \
  --result_output_dir="${RESULTS_DIR}" \
  --model_output_dir="${MODEL_DIR}" \
  --opt_save_dir="${OPT_DIR}" \
  --metrics_save_dir="${METRICS_DIR}" \
  --metrics=output-cosine,mutual_information \
  --override=False \
  --use_wandb=False

echo "Done."