OUTPUT_PATH="output/llama3-mog"
DEVICE=0

CUDA_VISIBLE_DEVICES=$DEVICE python commonsense_evaluate.py \
    --dataset boolq \
    --batch_size 1 \
    --model_dir $OUTPUT_PATH|tee -a $OUTPUT_PATH/boolq.txt

CUDA_VISIBLE_DEVICES=$DEVICE python commonsense_evaluate.py \
    --dataset piqa \
    --batch_size 1 \
    --model_dir $OUTPUT_PATH|tee -a $OUTPUT_PATH/piqa.txt

CUDA_VISIBLE_DEVICES=$DEVICE python commonsense_evaluate.py \
    --dataset social_i_qa \
    --batch_size 1 \
    --model_dir $OUTPUT_PATH|tee -a $OUTPUT_PATH/social_i_qa.txt

CUDA_VISIBLE_DEVICES=$DEVICE python commonsense_evaluate.py \
    --dataset winogrande \
    --batch_size 1 \
    --model_dir $OUTPUT_PATH|tee -a $OUTPUT_PATH/winogrande.txt

CUDA_VISIBLE_DEVICES=$DEVICE python commonsense_evaluate.py \
    --dataset ARC-Easy \
    --batch_size 1 \
    --model_dir $OUTPUT_PATH|tee -a $OUTPUT_PATH/ARC-Easy.txt

CUDA_VISIBLE_DEVICES=$DEVICE python commonsense_evaluate.py \
    --dataset ARC-Challenge \
    --batch_size 1 \
    --model_dir $OUTPUT_PATH|tee -a $OUTPUT_PATH/ARC-Challenge.txt

CUDA_VISIBLE_DEVICES=$DEVICE python commonsense_evaluate.py \
    --dataset openbookqa \
    --batch_size 1 \
    --model_dir $OUTPUT_PATH|tee -a $OUTPUT_PATH/openbookqa.txt

CUDA_VISIBLE_DEVICES=$DEVICE python commonsense_evaluate.py \
    --dataset hellaswag \
    --batch_size 1 \
    --model_dir $OUTPUT_PATH|tee -a $OUTPUT_PATH/hellaswag.txt
    