data:
  !include ../base_data_setting.yml
  
environment:
  dotenv_path: .env
  
model:

  # To be the reasoner model
  # meta-llama/Llama-3.2-1B-Instruct
  # meta-llama/Llama-3.2-3B-Instruct
  # Ensure this model name is the same as the generator's
  model_name: &model_name meta-llama/Llama-3.2-1B-Instruct
  model_type: &model_type llama3.2

  learner: !include ../QwenConcept/Qwen2.5-0.5B_all-MiniLM-L6-v1_FT_on_GPT4o-DSynt-Latent.yml

  reasoner:
    learner_ckpt: ICMLPlan/checkpoints/Qwen2.5-0.5BReasoning__Qwen2.5-0.5B-Instruct__zeroshot_cot__MATH/checkpoint-6408

    model_name: *learning_rate
    model_type: *model_type

    chat_template: llama-3.1
    system_message: You should answer the question using plan-based reasoning in which you first generate the plan before generating each reasoning step.

    max_seq_length: 2048

  load_in_4bit: false



train:
  data_synthetic: !include ../gpt4o-DSynt_data_setting.yml

  # The type of the plan generated from the dataset
  #   - explicit: the plan shows the problem-related guidance
  #   - skeleton: the plan shows the generalized guidance
  plan_type: explicit # explicit or skeleton
  # What is the plan status during the reasoning
  #   - language: the plan will be language directly
  #   - latent: the plan will be replaced with the latent ones obtained from the concept learner
  reason_plan_space: language # language or latent

  epoch: 4
  # 2e-4
  learning_rate: 0.0002 
  per_device_train_batch_size: 2
  gradient_accumulation_steps: 4
  weight_decay: 0.01
  lr_scheduler: linear


  lora:
      # Rank parameter for LoRA. The smaller this value, the fewer parameters will be modified.
      # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
      r: 16
      # The lora target modules set here is motivated by 
      # https://medium.com/@simon.gsponer/a-comprehensive-guide-ii-finetuning-a-bert-llm-with-lora-and-make-it-pipeline-compatible-9508e3822907
      # Make the modules whose name end with the following terms to be trainable.
      target_modules:
        - q_proj
        - k_proj
        - v_proj
        - o_proj
        - gate_proj
        - up_proj
        - down_proj

      # Alpha parameter for LoRA. This value determines the strength of the applied LoRA.
      lora_alpha: 16
      # Supports any, but = 0 is optimized
      lora_dropout: 0
      # Supports any, but = "none" is optimized
      bias: none
      use_rslora: false



logging:

  log_steps: 1
  # path where to save, empty for no saving
  checkpoint_path: experiments/checkpoints
  result_path: experiments/results
  logging_path: experiments/loggings
  visualization_path: experiments/visualizations


evaluation:
  per_device_eval_batch_size: 4
  



   




