experiment:
    project: "sdar_eval" # need to be same of this file name
    num_node: 1 # the number of machines you have
    node_index: 0 # no need to change




model: "/abs/path/of/model" # absolute path of your model
model_base: "sdar" # set sdar for TraDo and SDAR


# dataset you want to eval on, you need to download first, you can also modify your own dataset, see instructions in ./data
dataset:
    eval_dataset: "MATH500" #"MBPP""MATH500""GSM8K""AIME2024""GPQA""LiveCodeBench""HumanEval""LiveBench"
    data_type: "math" #"code""math"

execute:
    num_chunk: 128

rollout:
    tensor_parallel_size: 1 # set to 1 by default, if oom, try reduce max_active first, if still oom, set tensor_parallel_size to 8
    max_active: 256
    num_response_per_task: 3
    temperature: 1.0
    max_token: 2000 # max generation token num
    block_size: 4
    denoising_steps_per_block: 4
    top_p: 1.0
    top_k: 0
    remasking_strategy: "low_confidence_dynamic" #"low_confidence_static""low_confidence_dynamic"
    dynamic_threshold: 0.9
    start_with_think: False # if not reasoning model, set to False, otherwise True
    output_unmasking_history: True


