# main.py
record_time = False
statistics = False
model_name = "LLaDA-Instruct" # "Dream" or "LLaDA"
gen_length = 256
token_per_step = 64
block_length = 64
parallel_mode = True
threshold = 0.9 # for confidence-aware parallel decoding
delay_eos_generation = True # whether to limit the generation of the <eos> token (only generation after unmasked tokens)

# adaptive decoding configuration
use_kvcache = True
block_update_freq = 8
prompt_update_freq = block_length
# ESdLLM_mode = None
ESdLLM_mode = "HiddenState" # "Key", "Value", "Query" or "HiddenState"
importance_score_alpha = 0.5
proportion_steps = [(1, 0), (0.5, 0.125), (0.25, 0.25)]