defaults:
  - ppo
  - _self_

alg_id: gail

imdb:
  id: gail
  build_reward: true
  args:
    discrim_batch_size: 28
    discrim_epochs: 1

  reward_fn:
    id: learned_reward
    args: 
      model_name: lvwerra/distilbert-imdb
      label_ix: 1
      include_prompt_for_eval: True
      is_trainable: True
    optimizer:
      id: adamw
      args:
        lr: ${alg.optimizer_kwargs.lr}
        weight_decay: ${alg.optimizer_kwargs.weight_decay}
        eps: 1e-5
