# Teacher-Student Few-Shot Evaluation Configuration for Simple Geometry
# Tests teacher-student few-shot accuracy with specified parameters

evaluation:
  metrics:
  - generalization  # Test with modified parameters
    #- "perplexity"
    - "filler"  # Usefulness
    - "shuffle"  # Usefulness
    - "answer_removed_explanation_only"  # Informativeness
  checkpoint_dir: /nlp/scr/qinanyu/rl-explanations/checkpoints/think/simple_geometry
  base_model_path: /nlp/scr/qinanyu/models/qwen2.5-3b-instruct

  teacher_dataset:
    task_name: simple_geometry
    seed: 42
    size: 21000
    val_start: 20000

  generalization_dataset:
    task_name: simple_geometry
    seed: 45
    size: 500
    min_sides: 6
    max_sides: 11
  student_dataset:
    task_name: simple_geometry
    seed: 44
    size: 21000
    val_start: 20000
    min_sides: 3
    max_sides: 6

  few_shot:
    enabled: False
    n_shot: 3
    seed: 144
  
  teacher_model:
    temperature: 1.0
    top_p: 1
    top_k: -1
    max_tokens: 1024
    use_chat_template: true
    developer_prompt: DeepSeekZero
    developer_role: system
    preappend_token: <think>
  
  student_model:
    model_path: /nlp/scr/qinanyu/models/qwen2.5-3b-instruct
    temperature: 1.0
    top_p: 1
    top_k: -1
    max_tokens: 512
    use_chat_template: true
    developer_prompt: empty
    developer_role: system
  
  vllm:
    tensor_parallel_size: 1
    gpu_memory_utilization: 0.4
    max_model_len: 8192
    enforce_eager: true
    disable_log_stats: true
    dtype: bfloat16
  
  batch_size: 100
  max_checkpoints: 20
  start_step: -1

  output_dir: /sailhome/alexart/rl-explanations/evaluate/results/grpo/simple_geometry  #/nlp/scr/qinanyu/rl-explanations/evaluate/results/grpo/simple_geometry
  save_intermediate_results: true
  plot_results: true
  expert_thinking_dir:
      o3-mini: /nlp/scr/qinanyu/rl-explanations/evaluate/results/o3-mini_gpt-4.1-mini/${evaluation.teacher_dataset.task_name}/teacher/step_0/teacher_responses_step_0.json
      gpt-oss: /nlp/scr/qinanyu/rl-explanations/evaluate/results/gpt-oss-20b_gpt-4.1-mini/${evaluation.teacher_dataset.task_name}/teacher/step_0/teacher_responses_step_0.json
      qwen3-30b-a3b-thinking-2507: /nlp/scr/qinanyu/rl-explanations/evaluate/results/qwen3-30b-a3b-thinking-2507_gpt-4.1-mini/${evaluation.teacher_dataset.task_name}/teacher/step_0/teacher_responses_step_0.json
