#!/bin/bash
set -e

export HF_DATASETS_TRUST_REMOTE_CODE=true
BASE_OUTPUT_PATH="your output path"
MODEL_PATH="your llada-instruct path or your llada1.5 path"
export CUDA_VISIBLE_DEVICES=3,4

export BASE_RATE=0.95
export RATE_FLEX=0.05
export COLD_START=6
export DEAN_TOPP=0.85
export DEAN_MODE="lrd"
export DIFFTHRESHOLD=0.1
export KLTHRESHOLD=0.1
export GLOBAL_HS=0
OUTPUT_PATH="${BASE_OUTPUT_PATH}/${task}_${length}"
accelerate launch --num_processes=2 --main_process_port 12345 evaluation_script.py \
    -m dllm_eval \
    --model LLaDA_ZHEN \
    --tasks "humaneval" \
    --batch_size 1 \
    --model_args "pretrained=${MODEL_PATH},assistant_prefix=<reasoning> " \
    --gen_kwargs "block_length=32,gen_length=512,steps=512,cfg_scale=0.0,remasking="low_confidence" " \
    --num_fewshot 0 \
    --output_path "${OUTPUT_PATH}" \
    --log_samples \
    --apply_chat_template \
    --fewshot_as_multiturn \
    --confirm_run_unsafe_code    
python metrics/humaneval.py \
    --model_path "${MODEL_PATH}" \
    --res_path "${OUTPUT_PATH}"

OUTPUT_PATH="${BASE_OUTPUT_PATH}/${task}_${length}"
accelerate launch --num_processes=2 --main_process_port 12345 evaluation_script.py \
    -m dllm_eval \
    --model LLaDA_ZHEN \
    --tasks "mbpp" \
    --batch_size 1 \
    --model_args "pretrained=${MODEL_PATH},assistant_prefix=<reasoning> " \
    --gen_kwargs "block_length=32,gen_length=512,steps=512,cfg_scale=0.0,remasking="low_confidence" " \
    --num_fewshot 0 \
    --output_path "${OUTPUT_PATH}" \
    --log_samples \
    --apply_chat_template \
    --fewshot_as_multiturn \
    --confirm_run_unsafe_code    
python metrics/mbpp.py \
    --model_path "${MODEL_PATH}" \
    --res_path "${OUTPUT_PATH}"

OUTPUT_PATH="${BASE_OUTPUT_PATH}/${task}_${length}"
accelerate launch --num_processes=2 --main_process_port 12345 evaluation_script.py \
    -m dllm_eval \
    --model LLaDA_ZHEN \
    --tasks "gsm8k" \
    --batch_size 1 \
    --model_args "pretrained=${MODEL_PATH},assistant_prefix=<reasoning> " \
    --gen_kwargs "block_length=32,gen_length=512,steps=512,cfg_scale=0.0,remasking="low_confidence" " \
    --num_fewshot 0 \
    --output_path "${OUTPUT_PATH}" \
    --log_samples \
    --apply_chat_template \
    --fewshot_as_multiturn
python metrics/gsm8k.py \
    --model_path "${MODEL_PATH}" \
    --res_path "${OUTPUT_PATH}"


OUTPUT_PATH="${BASE_OUTPUT_PATH}/${task}_${length}"
accelerate launch --num_processes=2 --main_process_port 12345 evaluation_script.py \
    -m dllm_eval \
    --model LLaDA_ZHEN \
    --tasks "math500" \
    --batch_size 1 \
    --model_args "pretrained=${MODEL_PATH},assistant_prefix=<reasoning> " \
    --gen_kwargs "block_length=32,gen_length=512,steps=512,cfg_scale=0.0,remasking="low_confidence" " \
    --num_fewshot 0 \
    --output_path "${OUTPUT_PATH}" \
    --log_samples \
    --apply_chat_template \
    --fewshot_as_multiturn
python metrics/math500.py \
    --model_path "${MODEL_PATH}" \
    --res_path "${OUTPUT_PATH}"