openbuddy-llama2-70b-v10.1:
  prompt_template: "openbuddy-llama2-70b-v10.1/prompt.txt"
  fn_completions: "vllm_local_completions"
  completions_kwargs:
    model_name: "./openbuddy-llama2-70b-v10.1-bf16" # local path
    model_kwargs:
      dtype: 'bfloat16'
      tensor_parallel_size: 4 # Tensor parallelism, 4 for 4 GPUs
    max_new_tokens: 2000 # on vllm, prompt_tokens+max_new_tokens must be <= model_max_length otherwise it will raise an error and nothing will be generated
    temperature: 0.7
    top_p: 1.0
    do_sample: False
    batch_size: 16
  pretty_name: "OpenBudddy-LLaMA2-70B-v10.1"
  link: "https://huggingface.co/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16"
