model_name: bigscience/bloomz-560m
learning_rate: 3e-5
gradient_accumulation_steps: 16
per_device_train_batch_size: 2
max_length: 600
freeze_layer: 12
num_train_epochs: 2
datasets:
  - webgpt
  - hfsummary
