models:
  # qwen_vl_3b:
  #   provider: vllm
  #   model_name: Qwen/Qwen2.5-VL-3B-Instruct
  #   max_tokens: 150
  #   temperature: 0.7
  #   tensor_parallel_size: 2
  #   gpu_memory_utilization: 0.9

  # gpt4o:
  #   provider: openai
  #   model_name: gpt-4o
  #   max_tokens: 150
  #   temperature: 0.7
  #   presence_penalty: 0.0
  #   frequency_penalty: 0.0
  #   max_retries: 3
  #   timeout: 60

  claude_3_sonnet:
    provider: claude
    model_name: claude-3-7-sonnet-20250219
    max_tokens: 150
    temperature: 0.7
  
  # Gemini_2.5_flask:
  #     provider: gemini
  #     model_name: gemini-2.5-flash-preview-04-17
  #     max_tokens: 150
  #     temperature: 0.7
  
  # # Batch processing configuration example
  # claude_3_haiku_batch:
  #   provider: claude
  #   model_name: claude-3-haiku-20240307
  #   max_tokens: 150
  #   temperature: 0.7
  #   use_batch_api: true  # Use batch API for cost optimization
  #   batch_poll_interval: 10  # Poll every 10 seconds
  #   batch_max_wait_time: 7200  # Wait up to 2 hours