# General
device: "cuda"
large_model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
model_dtype: "float32"

# training parameters
lr: 0.00001
iterations: 2000
eval_iterations: 20

# dataset
dataset: "wic"
train_batch_size: 32
test_batch_size: 32

# zo parameters
estimator_type: 'vanilla'
grad_estimate_method: 'rge-central'
pert_distribution: 'gaussian'
num_pert: 5
mu: 0.001

# MISC
seed: 1234