experiment:
  name: mmlu_experiment
  description: MMLU experiment with embeddings analysis for prompt similarity

paths:
  data: &data_path /Users/gerschld/Desktop/dev/RnD/prompt_optimisation_dataset/data_preprocess

components:
  llm:
    default: OpenAI
    OpenAI:
      version_name: gpt-4o_v2024-11-20_USEAST
      temperature: 0.5
      top_p: 0.95

  task:
    default: question_answer
    question_answer:
      data_path: *data_path
      dataset_name: mmlu_anatomy_test  # To run a subset of mmlu, use naming convention 'mmlu_subset_name' (See below for full subset list)
      train_test_flag: test
      # [] to run full dataset
      # [1, 2, 3, 4, 5] to run first 5 examples
      id_lst: []
      # datasets:
        # - abstract_algebra_test
        # - anatomy_test
        # - astronomy_test
        # - business_ethics_test
        # - clinical_knowledge_test
        # - college_biology_test
        # - college_chemistry_test
        # - college_computer_science_test
        # - college_mathematics_test
        # - college_medicine_test
        # - college_physics_test
        # - computer_security_test
        # - conceptual_physics_test
        # - econometrics_test
        # - electrical_engineering_test
        # - elementary_mathematics_test
        # - formal_logic_test
        # - global_facts_test
        # - high_school_biology_test
        # - high_school_chemistry_test
        # - high_school_computer_science_test
        # - high_school_european_history_test
        # - high_school_geography_test
        # - high_school_government_and_politics_test
        # - high_school_macroeconomics_test
        # - high_school_mathematics_test
        # - high_school_microeconomics_test
        # - high_school_physics_test
        # - high_school_psychology_test
        # - high_school_statistics_test
        # - high_school_us_history_test
        # - high_school_world_history_test
        # - human_aging_test
        # - human_sexuality_test
        # - international_law_test
        # - jurisprudence_test
        # - logical_fallacies_test
        # - machine_learning_test
        # - management_test
        # - marketing_test
        # - medical_genetics_test
        # - miscellaneous_test
        # - moral_disputes_test
        # - moral_scenarios_test
        # - nutrition_test
        # - philosophy_test
        # - prehistory_test
        # - professional_accounting_test
        # - professional_law_test
        # - professional_medicine_test
        # - professional_psychology_test
        # - public_relations_test
        # - security_studies_test
        # - sociology_test
        # - us_foreign_policy_test
        # - virology_test
        # - world_religions_test
      prompt_msg_template:
        - role: system
          content: >
            You are an expert at answering multiple choice questions. Take a deep breath and work on this problem step-by-step.
            Always provide your final answer as a single letter (A, B, C, or D).
        - role: user
          content: "{question}"

  prompt_optimiser:
    default: psao
    psao:
      psao_intro_prompt: ""
      psao_struct_ann: "(importance ann_var)"
      r_seed: 42
      optimise_user_prompt_flag: false
      optuna_db_name: prompt_opt_psao_db.db
      optuna_study_name: mmlu_psao
      optuna_n_trials: 20

steps:
  - name: "init_llm"
    type: "component"
    component_type: "llm"
    component_name: "${components.llm.default}"
  
  - name: "init_task"
    type: "component"
    component_type: "task"
    component_name: "${components.task.default}"
  
  - name: "init_prompt_optimiser"
    type: "component"
    component_type: "prompt_optimiser"
    component_name: "${components.prompt_optimiser.default}"
  
  - name: "optimise_prompt"
    type: "function"
    function: "optimise_prompt"
    depends_on: ["init_llm", "init_task", "init_prompt_optimiser"]
  
  - name: "run_generation"
    type: "function"
    function: "run_generation"
    depends_on: ["init_llm", "init_task", "optimise_prompt"]
  
  - name: "evaluate_results"
    type: "function"
    function: "evaluate_results"
    depends_on: ["run_generation"]

output:
  results_dataframe: output/mmlu_results.csv