# General
device: "cuda"
large_model: "gpt2"
model_dtype: "float32"

# training parameters
lr: 0.0000005
iterations: 1000
eval_iterations: 20

# dataset
dataset: "rte"
train_batch_size: 32
test_batch_size: 32

# zo parameters
estimator_type: 'vanilla'
grad_estimate_method: 'rge-central'
pert_distribution: 'gaussian'
num_pert: 5
mu: 0.001

# MISC
seed: 1234