experiment:
  dataset:
    name: qa
    split: test
    n_edits: 1000
  model:
    name: gpt2-xl
    sim: checkpoints/qa
  gate:
    name: qa
    split: train
    n_edits: 1000
  editor:
    n_experts: 5
    n_epochs: 25
    n_embed: 1600
    threshold: 0.6
    seq: true
    layer: transformer.h[0]
    top_k: 3
task: edit
