experiment:
  name: multiarith_experiment
  description: MultiArith experiment for mathematical word problems

paths:
  data: &data_path /Users/gerschld/Desktop/dev/RnD/prompt_optimisation_dataset/data_preprocess

components:
  llm:
    default: OpenAI
    OpenAI:
      version_name: gpt-4o_v2024-11-20_USEAST
      temperature: 0.5
      top_p: 0.95

  task:
    default: question_answer
    question_answer:
      data_path: *data_path
      dataset_name: multiarith
      train_test_flag: test
      # [] to run full dataset
      # [1, 2, 3, 4, 5] to run first 5 examples
      id_lst: []
      prompt_msg_template:
        - role: system
          content: >
            Let's work together to solve math word problems! First, we will read and discuss the problem together to make sure we understand it. Then, we will work together to find the solution. I will give you hints and help you work through the problem if you get stuck. 
        - role: user
          content: "{question}"

  prompt_optimiser:
    default: psao
    psao:
      psao_intro_prompt: ""
      psao_struct_ann: "(importance ann_var)"
      r_seed: 42
      optimise_user_prompt_flag: false
      optuna_db_name: prompt_opt_psao_multiarith_db.db
      optuna_study_name: multiarith_psao
      optuna_n_trials: 5

steps:
  - name: "init_llm"
    type: "component"
    component_type: "llm"
    component_name: "${components.llm.default}"
  
  - name: "init_task"
    type: "component"
    component_type: "task"
    component_name: "${components.task.default}"
  
  - name: "init_prompt_optimiser"
    type: "component"
    component_type: "prompt_optimiser"
    component_name: "${components.prompt_optimiser.default}"
  
  - name: "optimise_prompt"
    type: "function"
    function: "optimise_prompt"
    depends_on: ["init_llm", "init_task", "init_prompt_optimiser"]
  
  - name: "run_generation"
    type: "function"
    function: "run_generation"
    depends_on: ["init_llm", "init_task", "optimise_prompt"]
  
  - name: "evaluate_results"
    type: "function"
    function: "evaluate_results"
    depends_on: ["run_generation"]

output:
  results_dataframe: output/multiarith_results.csv