#!/bin/bash

# Step 4: compute acc
models=(
    "Qwen/Qwen2.5-7B"
    "Qwen/Qwen2.5-14B"
    
    "Qwen/Qwen3-8B"
    "Qwen/Qwen3-14B"
    
    "CohereLabs/aya-expanse-8b"
    
    "meta-llama/Llama-3.1-8B"
    "meta-llama/Llama-3.2-3B"
    
    "google/gemma-3-4b-pt"
    "google/gemma-3-12b-pt"

)

seeds=(
	0
	# 1
	# 2
)

pairs=(
    "1.0 1.0"
)

metric="rankc"
#metric="top1"
# metric="onlycorrect"

for SEED in "${seeds[@]}"; do
    for mname in "${models[@]}"; do
        echo "Consistency results on: " $mname
        python consistency_baseline.py --seed $SEED --dataset mmmlu --instance_num 5000 --mname $mname --metric $metric
        for pair in "${pairs[@]}"; do
            # split into two variables via read
            read -r l1_strength l2_strength <<< "$pair"
            python consistency.py --seed $SEED --dataset mmmlu --instance_num 5000 --mname $mname --lang1_learning_strength $l1_strength --lang2_learning_strength $l2_strength --metric $metric
        done
	done
done

