models:
  # qwen_vl_3b:
  #   provider: vllm
  #   model_name: Qwen/Qwen2.5-VL-3B-Instruct
  #   max_tokens: 150
  #   temperature: 0.7
  #   tensor_parallel_size: 2
  #   gpu_memory_utilization: 0.9

  # qwen_vl_3b_api:
  #   provider: routerapi
  #   model_name: qwen/qwen2.5-vl-3b-instruct:free
  #   max_tokens: 150
  #   temperature: 0.7
  
  # qwen_vl_72b:
  #   provider: together
  #   model_name: Qwen/Qwen2.5-VL-72B-Instruct
  #   max_tokens: 150
  #   temperature: 0.7

  # VLM-R1-MATH:
  #   provider: vllm
  #   model_name: omlab/VLM-R1-Qwen2.5VL-3B-Math-0305
  #   max_tokens: 150
  #   temperature: 0.7

  # qwen_vl_3b_api:
  #   provider: routerapi
  #   model_name: qwen/qwen2.5-vl-3b-instruct:free
  #   max_tokens: 150
  #   temperature: 0.7

  # gpt4o:
  #   provider: openai
  #   model_name: gpt-4o
  #   max_tokens: 150
  #   temperature: 0.7
  #   presence_penalty: 0.0
  #   frequency_penalty: 0.0
  #   max_retries: 3
  #   timeout: 60

  # claude_3_sonnet:
  #   provider: claude
  #   model_name: claude-3-7-sonnet-20250219
  #   max_tokens: 150
  #   temperature: 0.7
  
  Gemini_2.0_flask:
      provider: gemini
      model_name: gemini-2.0-flash
      max_tokens: 150
      temperature: 0.7
  
  # # Batch processing configuration example
  # claude_3_haiku_batch:
  #   provider: claude
  #   model_name: claude-3-haiku-20240307
  #   max_tokens: 150
  #   temperature: 0.7
  #   use_batch_api: true  # Use batch API for cost optimization
  #   batch_poll_interval: 10  # Poll every 10 seconds
  #   batch_max_wait_time: 7200  # Wait up to 2 hours
  
  # VLM-R1-OVD:
  #   provider: vllm
  #   model_name: omlab/VLM-R1-Qwen2.5VL-3B-OVD-0321
  #   max_tokens: 150
  #   temperature: 0.7
  
  # VLM-R1-REC:
  #   provider: vllm
  #   model_name: omlab/Qwen2.5VL-3B-VLM-R1-REC-500steps
  #   max_tokens: 150
  #   temperature: 0.7
  # VLM-R1-REC:
  #   provider: vllm
  #   model_name: omlab/Qwen2.5VL-3B-VLM-R1-REC-500steps
  #   max_tokens: 150
  #   temperature: 0.7

    