# Hydra configuration for the Novelty Bench Inference

defaults:
  - _self_

hydra:
  run:
    # Output directory pattern: outputs/YYYY-MM-DD/HH-MM-SS
    dir: outputs/${experiment.name}/${now:%Y-%m-%d_%H-%M-%S}
  sweep:
    # Output directory pattern for multirun: multirun/YYYY-MM-DD/HH-MM-SS
    dir: multirun/${experiment.name}/${model.name}/${now:%Y-%m-%d_%H-%M-%S}
    subdir: ${hydra.job.num} # Subdirectory for each run in a multirun sweep

experiment:
  # Name for the experiment (used in logging/output paths)
  name: "novelty_bench_inference"
  
  # Directory to save evaluation results
  eval_dir: null # Will be auto-generated if null
  
  # Source of prompts
  data: "curated" # choices: ["curated", "wildchat"]
  
  # Sampling strategy
  sampling: "regenerate" # choices: ["regenerate", "in-context", "paraphrase", "system-prompt"]
  
  # Number of generations per prompt
  num_generations: 8
  
  # Number of concurrent requests
  concurrent_requests: 20

  # sytem prompt type
  prompt: rsm

model:
  # Model to run inference with
  name: "deepseek_r1"
  litellm_model_name: "openrouter/deepseek/deepseek-r1-0528"
  
  # API configuration
  api_base: null
  api_key: null

sampling:
  # Temperature for sampling
  temperature: null
  
  # Maximum tokens per generation
  max_tokens: null
  
  # Maximum retries for failed requests
  max_retries: 1
