defaults:
  - ppo
  - _self_

alg_id: ppo_pp

imdb:
  id: ppo_pp

  policy:
    id: multi_actor_critic
    args:
      guide_model_name: rajkumarrrk/gpt2-fine-tuned-on-imdb-positive-reviews
      alg_type: 'ppo_pp'
      beta: 0.6
      guide_gen_kwargs:
        do_sample: True
        top_p: 0.92
        top_k: 50
        min_length: 48
        max_new_tokens: 48

commongen:
  id: ppo_pp

  policy:
    id: multi_actor_critic
    args:
      guide_model_name: rajkumarrrk/t5-common-gen
      alg_type: 'ppo_pp'
      beta: 0.8
      guide_gen_kwargs:
        do_sample: True
        top_k: 0
        min_length: 5
        max_new_tokens: 20
        top_p: 0.92

tldr:
  id: ppo_pp

  policy:
    id: multi_actor_critic
    args:
      guide_model_name: CarperAI/openai_summarize_tldr_sft
      alg_type: 'ppo_pp'
      beta: 0.8
      guide_gen_kwargs:
        do_sample: True
        top_k: 0
        min_length: 5
        max_new_tokens: 20
        top_p: 0.92
