# General
device: "cuda"
large_model: "opt-125m"
model_dtype: "float32"

# training parameters
lr: 0.00001
iterations: 1000
eval_iterations: 20

# dataset
dataset: "squad"
train_batch_size: 8
test_batch_size: 8

# zo parameters
estimator_type: 'vanilla'
grad_estimate_method: 'rge-forward'
num_pert: 5
mu: 0.001

# MISC
seed: 1234