# Default configuration for model inference

# Python environment
python_vllm: python  # Python interpreter to use

# Generation parameters
max_new_tokens: 8192
do_sample: true
top_p: 1
temperature: 0

# VLLM parameters
vllm_max_num_seqs: 64
tensor_parallel_size: 8  # Will be overridden based on GPU count

# File paths
inference_script: exps/eval_vlms/build_responses_vlms.py
