pairrm-tulu-2-13b:
  prompt_template: "tulu-2-dpo-70b/prompt.txt"
  fn_completions: "vllm_local_completions"
  completions_kwargs:
    model_name: "allenai/tulu-2-dpo-13b"
    model_kwargs:
      torch_dtype: 'bfloat16'
      tp: 2
    max_new_tokens: 7500
    temperature: 0.0
    top_p: 1.0
    do_sample: False
    best_of: 16 # number of completions to generate, using PairRM to select the best one
    batch_size: 800
  pretty_name: "PairRM 0.4B+Tulu 2+DPO 13B (best-of-16)"
  link: "https://huggingface.co/llm-blender/PairRM"