mkdir -p results/humaneval


MODEL_TYPE=mixtral
MODEL_SIZE=ins  



TASK=humaneval


with_keywords=1


CUDA_VISIBLE_DEVICES=4,5,6,7 python generate.py \
    --model_type $MODEL_TYPE \
    --model_size $MODEL_SIZE \
    --greedy \
    --root outputs \
    --dataset $TASK \
    --backend vllm \
    --tp 4 \
    --evalperf_type instruct \
    --with_keywords $witsh_keywords



SAVE_PATH=outputs/$TASK/${MODEL_TYPE}${MODEL_SIZE:+_$MODEL_SIZE}_temp_0.0_keywords-2-rank

SAVE_SANTH_PATH=$SAVE_PATH-sanitized

evalplus.sanitize --samples $SAVE_PATH

evalplus.evaluate \
  --dataset $TASK \
  --samples $SAVE_SANTH_PATH \
  --i-just-wanna-run
