export CUDA_VISIBLE_DEVICES=0

python two_choices_experiment_runner.py \
  --model_name deepseek-ai/deepseek-moe-16b-chat \
  --model_family deepseek \
  --mode qa \
  --model_type chat \
  --selection_method threshold \
  --threshold_factor 0.8 \
  --sum_threshold 0.35 \
  --seed 42 \
  --num_choices 6 7 8 9 10 \
  --max_new_tokens 128 \
  --dataset_name arc_easy \
  --output_dir OUTPUT/ds_16b_arc_easy_threshold_08_sum_threshold_035 \
  --sample_size 512 \
  --batch_size 32 \
  --plot_dir plots/ds_16b_arc_easy_threshold_08_sum_threshold_035 &> ds_16b_arc_easy_threshold_08_sum_threshold_035.log


python two_choices_experiment_runner.py \
  --model_name deepseek-ai/deepseek-moe-16b-chat \
  --model_family deepseek \
  --mode qa \
  --model_type chat \
  --selection_method threshold \
  --threshold_factor 0.8 \
  --sum_threshold 0.4 \
  --seed 42 \
  --num_choices 6 7 8 9 10 \
  --max_new_tokens 128 \
  --dataset_name arc_challenge \
  --output_dir OUTPUT/ds_16b_arc_challenge_threshold_08_sum_threshold_04 \
  --sample_size 512 \
  --batch_size 32 \
  --plot_dir plots/ds_16b_arc_challenge_threshold_08_sum_threshold_04 &> ds_16b_arc_challenge_threshold_08_sum_threshold_04.log

python two_choices_experiment_runner.py \
  --model_name deepseek-ai/deepseek-moe-16b-chat \
  --model_family deepseek \
  --mode qa \
  --model_type chat \
  --selection_method threshold \
  --threshold_factor 0.25 0.45 0.55 \
  --sum_threshold 0.3 \
  --seed 42 \
  --num_choices 6 7 8 9 10 \
  --max_new_tokens 1024 \
  --dataset_name gsm8k \
  --output_dir OUTPUT/ds_16b_gsm8k_threshold_025_045_055_sum_threshold_03 \
  --sample_size 256 \
  --batch_size 32 \
  --plot_dir plots/ds_16b_gsm8k_threshold_025_045_055_sum_threshold_03 &> ds_16b_gsm8k_threshold_025_045_055_sum_threshold_03.log

python two_choices_experiment_runner.py \
  --model_name deepseek-ai/deepseek-moe-16b-chat \
  --model_family deepseek \
  --mode qa \
  --model_type chat \
  --selection_method threshold \
  --threshold_factor 0.8 \
  --sum_threshold 0.4 \
  --seed 42 \
  --num_choices 6 7 8 9 10 \
  --max_new_tokens 128 \
  --dataset_name mmlu \
  --output_dir OUTPUT/ds_16b_mmlu_threshold_08_sum_threshold_04 \
  --sample_size 512 \
  --batch_size 32 \
  --plot_dir plots/ds_16b_mmlu_threshold_08_sum_threshold_04 &> ds_16b_mmlu_threshold_08_sum_threshold_04.log


python two_choices_experiment_runner.py \
  --model_name deepseek-ai/deepseek-moe-16b-chat \
  --model_family deepseek \
  --mode qa \
  --model_type chat \
  --selection_method threshold \
  --threshold_factor 0.8 \
  --sum_threshold 0.35 \
  --seed 42 \
  --num_choices 6 7 8 9 10 \
  --max_new_tokens 128 \
  --dataset_name arc_easy \
  --output_dir OUTPUT/ds_16b_arc_easy_threshold_08_sum_threshold_035_sample_before_load \
  --sample_size 512 \
  --batch_size 32 \
  --sample_before_load \
  --plot_dir plots/ds_16b_arc_easy_threshold_08_sum_threshold_035_sample_before_load &> ds_16b_arc_easy_threshold_08_sum_threshold_035_sample_before_load.log
