# 评估模型配置文件
# 默认配置 - 所有模型的通用设置
defaults:
  # 请将 azure_endpoint 替换为你自己的 OpenAI 兼容服务或 Azure OpenAI 端点
  azure_endpoint: https://YOUR_OPENAI_COMPAT_ENDPOINT
  api_version: 2024-03-01-preview
  temperature: 1.0

# 模型配置 - 每个模型由model_name索引
# 必需字段：api_key, max_tokens, temperature
# 可选字段：
# - azure_endpoint：如不指定则使用defaults中的值
# - api_version：如不指定则使用defaults中的值
# - thinking_enabled：该接口是否能支持返回thinking，如果不指定则默认不支持
# - thinking_budget：claude和openai的接口可以返回thinking的token最大数量，如果不指定则默认 2000；gemini 不支持该参数
# 模型列表：<internal_doc_link>
models:
  gemini-2.5-pro:
    api_key: YOUR_API_KEY
    max_tokens: 32000
    temperature: 0.6
    thinking_enabled: true

  gemini-2.5-pro-preview-05-06:
    api_key: YOUR_API_KEY
    max_tokens: 32000
    temperature: 0.6
    thinking_enabled: true

  gemini-2.5-flash:
    azure_endpoint: https://YOUR_OPENAI_COMPAT_ENDPOINT
    api_key: YOUR_API_KEY
    max_tokens: 32000
    temperature: 1.0
    thinking_enabled: true

  gemini-3-pro-preview:
    api_key: YOUR_API_KEY
    max_tokens: 32000
    thinking_enabled: true
    temperature: 0.6

  gcp-claude4-opus:
    azure_endpoint: https://YOUR_OPENAI_COMPAT_ENDPOINT
    api_key: YOUR_API_KEY
    max_tokens: 32000
    temperature: 1.0
    thinking_enabled: true

  gcp-claude4.1-opus:
    azure_endpoint: https://YOUR_OPENAI_COMPAT_ENDPOINT
    api_key: YOUR_API_KEY
    max_tokens: 32000
    temperature: 1.0
    thinking_enabled: true

  gcp-claude4-sonnet:
    azure_endpoint: https://YOUR_OPENAI_COMPAT_ENDPOINT
    api_key: YOUR_API_KEY
    max_tokens: 64000
    temperature: 1.0
    thinking_enabled: true

  gcp-claude37-sonnet:
    azure_endpoint: https://YOUR_OPENAI_COMPAT_ENDPOINT
    api_key: YOUR_API_KEY
    max_tokens: 64000
    temperature: 1.0
    thinking_enabled: true

  gpt-4.1-2025-04-14:
    api_key: YOUR_API_KEY
    max_tokens: 12000
  
  gpt-4o-2024-11-20:
    api_key: YOUR_API_KEY
    max_tokens: 32000

  gpt-4.1-mini-2025-04-14:
    api_key: YOUR_API_KEY
    max_tokens: 32000

  gpt-4.1-nano-2025-04-14:
    api_key: YOUR_API_KEY
    max_tokens: 32000

  gpt-5-2025-08-07:
    azure_endpoint: https://YOUR_OPENAI_COMPAT_ENDPOINT
    api_key: YOUR_API_KEY
    max_tokens: 128000
    thinking_enabled: true

  gpt-5-mini-2025-08-07:
    api_key: YOUR_API_KEY
    max_tokens: 128000
    thinking_enabled: true

  
  o3-2025-04-16:
    api_key: YOUR_API_KEY
    max_tokens: 32768
    temperature: 1.0

  o4-mini-2025-04-16:
    api_key: YOUR_API_KEY
    max_tokens: 32768
    temperature: 1.0

  bot-20250515142114-xmfh2:  # seed_thinking_v1.5
    api_key: YOUR_API_KEY
    azure_endpoint: https://YOUR_ARK_ENDPOINT/bots
    max_tokens: 32000
    is_ark_bot: True

  grok-3:
    api_key: YOUR_API_KEY
    max_tokens: 32768
    thinking_enabled: true
    temperature: 0.6

  gpt-oss-120b:
    api_key: YOUR_API_KEY
    max_tokens: 64000
    thinking_enabled: true
    temperature: 0.6

  o3-2025-04-16:
    api_key: YOUR_API_KEY
    max_tokens: 64000

  o4-mini-2025-04-16:
    api_key: YOUR_API_KEY
    max_tokens: 64000
    temperature: 0.6

  ep-20250328185550-g5zcd: #deepseekv3-0324
    azure_endpoint: https://YOUR_ARK_ENDPOINT
    api_key: YOUR_API_KEY
    max_tokens: 64000
    thinking_enabled: true
    temperature: 0.6  

  ep-20250826141533-cf9bk: #deepseekv3.1
    azure_endpoint: https://YOUR_ARK_ENDPOINT
    api_key: YOUR_API_KEY
    max_tokens: 32000
    thinking_enabled: true
    temperature: 0.6  

  glm-4.6:
    api_key: YOUR_API_KEY
    max_tokens: 32000
    thinking_enabled: true
    temperature: 0.6

  azure-grok-4:
    api_key: YOUR_API_KEY
    max_tokens: 32000
    thinking_enabled: true
    temperature: 0.6

  kimi-k2-thinking:
    api_key: YOUR_API_KEY
    max_tokens: 64000
    thinking_enabled: true
    temperature: 0.6
  
  ep-20250930155653-5qv4q: # Doubao-Seed-1.6｜250615
    azure_endpoint: https://YOUR_ARK_ENDPOINT
    api_key: YOUR_API_KEY
    max_tokens: 32000
    thinking_enabled: true
    temperature: 0.6  

  # ToolACE-2:  # vLLM OpenAI 兼容服务示例
  #   azure_endpoint: http://YOUR_VLLM_HOST:18901/v1  # vLLM server 地址
  #   api_key: EMPTY  # vLLM 默认可用 "EMPTY"
  #   max_tokens: 32000
  #   temperature: 0.7
  #   thinking_enabled: false
  #   is_vllm: true
  #   use_plain_content: false      # vLLM 更稳妥地使用纯文本 content
  #   disable_proxy: true           # 避免代理影响内网访问
  #   client_timeout: 60

  ToolACE-2:
    azure_endpoint: http://YOUR_VLLM_HOST:18901/v1
    api_key: EMPTY
    max_tokens: 32000
    is_vllm: true
    use_plain_content: false
    use_vllm_toolcall_format: true  # 启用刚加的解析分支
    disable_proxy: true
    temperature: 0.7
    thinking_enabled: false

  qwen3-32b:
    azure_endpoint: http://YOUR_VLLM_HOST:18902/v1
    api_key: EMPTY
    max_tokens: 4096
    is_vllm: true
    use_plain_content: true
    use_vllm_toolcall_format: false  # 关闭文本内嵌 <tool_calls>，走 OpenAI function-calling + tools 参数
    disable_proxy: true
    temperature: 0.7
    thinking_enabled: false

  watt-tool-70B:
    azure_endpoint: http://YOUR_VLLM_HOST:18908/v1
    api_key: EMPTY
    max_tokens: 4096
    is_vllm: true
    use_plain_content: false
    use_vllm_toolcall_format: false  # 关闭文本内嵌 <tool_calls>，走 OpenAI function-calling + tools 参数
    disable_proxy: true
    temperature: 0.7
    thinking_enabled: false

  qwen2.5-32b-rl-20:
    azure_endpoint: http://YOUR_VLLM_HOST:18908/v1
    api_key: EMPTY
    max_tokens: 4096
    is_vllm: true
    use_plain_content: true
    use_vllm_toolcall_format: false  # 关闭文本内嵌 <tool_calls>，走 OpenAI function-calling + tools 参数
    disable_proxy: true
    temperature: 0.7
    thinking_enabled: true

  Qwen3-32b-rl:
    azure_endpoint: http://YOUR_VLLM_HOST:18908/v1
    api_key: EMPTY
    max_tokens: 4096
    is_vllm: true
    use_plain_content: true
    use_vllm_toolcall_format: false  # 关闭文本内嵌 <tool_calls>，走 OpenAI function-calling + tools 参数
    disable_proxy: true
    temperature: 0.7
    thinking_enabled: true

  ToolACE-2:
    azure_endpoint: http://YOUR_VLLM_HOST:18901/v1
    api_key: EMPTY
    max_tokens: 4096
    is_vllm: true
    use_plain_content: false
    use_vllm_toolcall_format: true  # 启用刚加的解析分支
    disable_proxy: true
    temperature: 0.7
    thinking_enabled: false
