# Configuration for evolving prompts with Qwen3-8B
# Optimized for GEPA benchmark comparison

# General settings
max_iterations: 100  # Can be overridden by command line
checkpoint_interval: 10
log_level: "INFO"
diff_based_evolution: false  # Full rewrites for prompt evolution
max_code_length: 10000
language: "text"

# LLM Configuration for Qwen3-8B via OpenRouter
llm:
  api_base: "https://openrouter.ai/api/v1"
  models:
    - name: "qwen/qwen3-8b"
      weight: 1.0
  
  temperature: 0.8  # Higher temperature for creative evolution
  max_tokens: 4096
  timeout: 60
  retries: 3

# Prompt Configuration for evolution
prompt:
  template_dir: "templates"
  num_top_programs: 5  # Show top 5 prompts for inspiration
  num_diverse_programs: 3  # Include 3 diverse prompts
  include_artifacts: true
  
  system_message: |
    You are an expert at creating effective prompts for language models.
    Your goal is to evolve prompts that maximize accuracy on the given task.
    
    When creating new prompts:
    1. Build on successful patterns from the examples
    2. Be creative but maintain clarity
    3. Consider different reasoning strategies (direct, step-by-step, few-shot)
    4. Optimize for the specific task requirements

# Database Configuration for MAP-Elites
database:
  population_size: 50  # Moderate population for balance
  archive_size: 500
  num_islands: 4  # Multiple islands for diversity
  
  feature_dimensions: ["prompt_length", "reasoning_strategy"]
  feature_bins: 10
  
  elite_selection_ratio: 0.4  # 40% elites
  exploration_ratio: 0.3  # 30% exploration
  exploitation_ratio: 0.3  # 30% exploitation
  
  migration_interval: 20
  migration_rate: 0.1

# Evaluator Configuration
evaluator:
  timeout: 1800  # 30 minutes timeout for complex evaluations
  max_retries: 3
  parallel_evaluations: 4  # Parallel evaluation for speed
  cascade_evaluation: true  # Use cascading to save API calls
  cascade_thresholds: [0.9]  # Only 2 stages, must achieve 90% in stage 1 to proceed
  
  # Enable LLM feedback for better guidance
  use_llm_feedback: true
  llm_feedback_weight: 0.2  # 20% weight on qualitative feedback