model: 'hf_gen_rand'
model_name: 'llama2-7b'
dataset_name: 'wikitext,wikitext-2-v1'
use_flash_attn: false
seq_max_length: 1024
booster_args:
  initial_scale: 0.125
  min_scale: 0.000001
  growth_factor: 2.
  backoff_factor: 0.5
  hysteresis: 2
special_tokens:
  pad_token: '[PAD]'

