export CUDA_VISIBLE_DEVICES=1

python two_choices_experiment_runner.py \
  --model_name mistralai/Mixtral-8x7B-Instruct-v0.1 \
  --model_family mixtral_7b \
  --mode qa \
  --model_type chat \
  --selection_method threshold \
  --threshold_factor 0.6 \
  --sum_threshold 0.7159 0.6419 0.6285 \
  --seed 42 \
  --num_choices 2 3 4 5 6 \
  --max_new_tokens 128 \
  --dataset_name arc_easy \
  --output_dir OUTPUT/mixtral_7b_arc_easy_threshold_06_sum_threshold_07159_06419_06285 \
  --sample_size 512 \
  --batch_size 32 \
  --plot_dir plots/mixtral_7b_arc_easy_threshold_06_sum_threshold_07159_06419_06285 &> mixtral_7b_arc_easy_threshold_06_sum_threshold_07159_06419_06285.log


python two_choices_experiment_runner.py \
  --model_name mistralai/Mixtral-8x7B-Instruct-v0.1 \
  --model_family mixtral_7b \
  --mode qa \
  --model_type chat \
  --selection_method threshold \
  --threshold_factor 0.6 \
  --sum_threshold 0.7159 0.6419 0.6285 \
  --seed 42 \
  --num_choices 2 3 4 5 6 \
  --max_new_tokens 128 \
  --dataset_name arc_challenge \
  --output_dir OUTPUT/mixtral_7b_arc_challenge_threshold_06_sum_threshold_07159_06419_06285 \
  --sample_size 512 \
  --batch_size 32 \
  --plot_dir plots/mixtral_7b_arc_challenge_threshold_06_sum_threshold_07159_06419_06285 &> mixtral_7b_arc_challenge_threshold_06_sum_threshold_07159_06419_06285.log

python two_choices_experiment_runner.py \
  --model_name mistralai/Mixtral-8x7B-Instruct-v0.1 \
  --model_family mixtral_7b \
  --mode qa \
  --model_type chat \
  --selection_method threshold \
  --threshold_factor 0.4 \
  --sum_threshold 0.7159 0.6419 0.6285 \
  --seed 42 \
  --num_choices 2 3 4 5 6 \
  --max_new_tokens 128 \
  --dataset_name mmlu \
  --output_dir OUTPUT/mixtral_7b_mmlu_threshold_04_sum_threshold_07159_06419_06285 \
  --sample_size 512 \
  --batch_size 32 \
  --plot_dir plots/mixtral_7b_mmlu_threshold_04_sum_threshold_07159_06419_06285 &> mixtral_7b_mmlu_threshold_04_sum_threshold_07159_06419_06285.log

python two_choices_experiment_runner.py \
  --model_name mistralai/Mixtral-8x7B-Instruct-v0.1 \
  --model_family mixtral_7b \
  --mode qa \
  --model_type chat \
  --selection_method threshold \
  --threshold_factor 0.6 \
  --sum_threshold 0.72 0.75 0.8 \
  --seed 42 \
  --num_choices 2 3 4 5 6 \
  --max_new_tokens 1024 \
  --dataset_name gsm8k \
  --output_dir OUTPUT/mixtral_7b_gsm8k_threshold_06_sum_threshold_072_075_08 \
  --sample_size 256 \
  --batch_size 32 \
  --plot_dir plots/mixtral_7b_gsm8k_threshold_06_sum_threshold_072_075_08 &> mixtral_7b_gsm8k_threshold_06_sum_threshold_072_075_08.log


python two_choices_experiment_runner.py \
  --model_name mistralai/Mixtral-8x7B-Instruct-v0.1 \
  --model_family mixtral_7b \
  --mode qa \
  --model_type chat \
  --selection_method threshold \
  --threshold_factor 0.6 \
  --sum_threshold 0.7159 0.6419 0.6285 \
  --seed 42 \
  --num_choices 2 3 4 5 6 \
  --max_new_tokens 128 \
  --dataset_name arc_easy \
  --output_dir OUTPUT/mixtral_7b_arc_easy_threshold_06_sum_threshold_07159_06419_06285_sample_before_load \
  --sample_size 512 \
  --batch_size 32 \
  --sample_before_load \
  --plot_dir plots/mixtral_7b_arc_easy_threshold_06_sum_threshold_07159_06419_06285_sample_before_load &> mixtral_7b_arc_easy_threshold_06_sum_threshold_07159_06419_06285_sample_before_load.log


