seed: 555
use_wandb: true

compression_config:
  layer8:
    layer_idx: 8
    forward: topk
    forward-EF: true
    forward-EF-method: EF21
    forward-params:
      topk: 0.5
    backward: topk
    backward-EF: true
    backward-EF-method: EF21
    backward-params:
      topk: 0.5
  layer16:
    layer_idx: 16
    forward: topk
    forward-EF: true
    forward-EF-method: EF21
    forward-params:
      topk: 0.5
    backward: topk
    backward-EF: true
    backward-EF-method: EF21
    backward-params:
      topk: 0.5
  layer24:
    layer_idx: 24
    forward: topk
    forward-EF: true
    forward-EF-method: EF21
    forward-params:
      topk: 0.5
    backward: topk
    backward-EF: true
    backward-EF-method: EF21
    backward-params:
      topk: 0.5

training:
  learning_rate: 0.00005     
  model: NousResearch/Llama-2-7b-hf
  dataset: gsm8k # gsm8k/wikitext
  epochs: 4
  batch_size: 2
  block_size: 1024
  gradient_checkpointing: false
  aq_sgd: false                   
  lazy_sampling: false
  lazy_sampling_params:
    schedule: constant
    p_t: 1

wandb:
  project: llama-test  # gpt2-compression
  name: gsm8k-topk05

output_dir: '~/llama2_output'
