# ------------------------- Qwen3-4B ------------------------------
- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "first_order.json"
    custom_prompt_id: "zero_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-first-order-zero"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "first_order.json"
    custom_prompt_id: "one_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-first-order-one"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "first_order.json"
    custom_prompt_id: "few_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-first-order-few"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "knowledge_graph.json"
    custom_prompt_id: "zero_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-knowledge-graph-zero"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "knowledge_graph.json"
    custom_prompt_id: "one_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-knowledge-graph-one"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "knowledge_graph.json"
    custom_prompt_id: "few_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-knowledge-graph-few"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "propositional.json"
    custom_prompt_id: "zero_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-propositional-zero"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "propositional.json"
    custom_prompt_id: "one_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-propositional-one"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "propositional.json"
    custom_prompt_id: "few_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-propositional-few"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "alist.json"
    custom_prompt_id: "one_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-alist-one"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-4B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "alist.json"
    custom_prompt_id: "few_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-alist-few"

# ------------------------- Qwen3-8B ------------------------------
- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "first_order.json"
    custom_prompt_id: "zero_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-first-order-zero"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "first_order.json"
    custom_prompt_id: "one_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-first-order-one"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "first_order.json"
    custom_prompt_id: "few_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-first-order-few"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "knowledge_graph.json"
    custom_prompt_id: "zero_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-knowledge-graph-zero"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "knowledge_graph.json"
    custom_prompt_id: "one_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-knowledge-graph-one"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "knowledge_graph.json"
    custom_prompt_id: "few_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-knowledge-graph-few"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "propositional.json"
    custom_prompt_id: "zero_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-propositional-zero"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "propositional.json"
    custom_prompt_id: "one_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-propositional-one"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "propositional.json"
    custom_prompt_id: "few_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-propositional-few"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "alist.json"
    custom_prompt_id: "one_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-alist-one"

- hyperparam_set:
    model_name_or_path: "Qwen/Qwen3-8B"
    dataset_name: "openai/gsm8k"
    split: "test"
    input_columns: ["question"]
    subset_name: "main"
    answer_key: "answer"
    eval_batch_size: 64
    custom_prompt_file: "alist.json"
    custom_prompt_id: "few_shot"
    output_dir: "/app/evaluation_output"
    wandb_project: "run-3-prompt-finetuning"
    wandb_run_name: "gsm8k-alist-few"