{
  "inference_settings": [
    {
      "use_vllm": true,
      "batch": 512,
      "tensor_parallel_size": 1,
      "model_settings": {
        "model_path": "/app/models/llama",
        "model_type": "causal",
        "transformers_settings": {},
        "model_kwargs": {},
        "embeddings_initialization_strategy": {}
      },
      "tokenizer_settings": {
        "use_fast": true
      },
      "generation_settings": [
        {
          "transformers_settings": {
            "num_beams": 1,
            "do_sample": true,
            "num_return_sequences": 1,
            "max_new_tokens": 4096,
            "temperature": 0.9,
            "top_p": 1,
            "top_k": 40,
            "stop_strings": [
              "<|eot_id|>",
              "<|end_of_text|>"
            ],
            "stop_token_ids": [
              128009,
              128001
            ]
          },
          "custom_settings": {
            "skip_special_tokens": true
          }
        }
      ]
    }
  ],
  "dataset_settings": {
    "sources": [
      {
        "name": "alpaca_eval",
        "records_path": "data/alpaca_eval/alpaca_eval.jsonl",
        "sample_rate": 1.0
      }
    ],
    "prompt_template": {
      "role_tag_mapping": {
        "bot": "assistant",
        "user": "user",
        "system": "system"
      },
      "prefix_template": "<|start_header_id|>{role}<|end_header_id|>\n\n",
      "suffix_template": "<|eot_id|>"
    },
    "dataset_type": "chat",
    "max_tokens_count": 4096,
    "keep_end": true,
    "only_answer_loss": true
  },
  "save_path": "inference_output"
}