data:
  !include ../base_data_setting.yml
  
environment:
  dotenv_path: .env
  
model:

  # To be the reasoner model
  # Qwen/Qwen2.5-0.5B-Instruct
  # Qwen/Qwen2.5-1.5B-Instruct
  # Qwen/Qwen2.5-3B-Instruct
  # Qwen/Qwen2.5-7B-Instruct
  # Qwen/Qwen2.5-14B-Instruct
  # Qwen/Qwen2.5-32B-Instruct
  # Qwen/Qwen2.5-72B-Instruct
  # Ensure this model name is the same as the generator's
  model_name: &model_name Qwen/Qwen2.5-0.5B-Instruct
  model_type: &model_type qwen2.5

  reasoner:
    model_name: *model_name
    model_type: *model_type

    chat_template: qwen-2.5

    # Deepseek's system prompt
    # system_prompt: "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>. "

    system_prompt: "You are a helpful AI assistant that provides well-reasoned and detailed responses. When the user asks a question, you solve it through a step-by-step reasoning process. In each step, you begin by proposing a high-level plan before executing it to generate that specific step. The plan and reasoning step in each step block are enclosed within {} {} and {} {} tags, respectively, i.e.,\n{} plan description here {}\n{} reasoning step here {}\n\n."

    max_seq_length: 2048

  load_in_4bit: false



train:
  data_synthetic: !include ../gpt4o-DSynt_data_setting.yml

  # The type of the plan generated from the dataset
  #   - explicit: the plan shows the problem-related guidance
  #   - skeleton: the plan shows the generalized guidance
  plan_type: explicit # explicit or skeleton
  # What is the plan status during the reasoning
  #   - language: the plan will be language directly
  #   - latent: the plan will be replaced with the latent ones obtained from the concept learner
  reason_plan_space: language # language or latent

  # The type of appearance of the plan
  #   - none: the plans will not appear in the reasoning
  #   - head: the plans will be placed as a whole in the head of the reasoning
  #   - step: each plan will be placed in the front of each reasoning step
  plan_appear: head

  epoch: 4
  # 2e-4
  learning_rate: 0.0002 
  per_device_train_batch_size: 2
  gradient_accumulation_steps: 4
  weight_decay: 0.01
  lr_scheduler: linear


  lora:
      # Rank parameter for LoRA. The smaller this value, the fewer parameters will be modified.
      # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
      r: 16
      # The lora target modules set here is motivated by 
      # https://medium.com/@simon.gsponer/a-comprehensive-guide-ii-finetuning-a-bert-llm-with-lora-and-make-it-pipeline-compatible-9508e3822907
      # Make the modules whose name end with the following terms to be trainable.
      target_modules:
        - q_proj
        - k_proj
        - v_proj
        - o_proj
        - gate_proj
        - up_proj
        - down_proj
      modules_to_save: 
        - plan_embeddings
        
      # Alpha parameter for LoRA. This value determines the strength of the applied LoRA.
      lora_alpha: 16
      # Supports any, but = 0 is optimized
      lora_dropout: 0
      # Supports any, but = "none" is optimized
      bias: none
      use_rslora: false



logging:

  log_steps: 1
  # path where to save, empty for no saving
  checkpoint_path: experiments/checkpoints
  result_path: experiments/results
  logging_path: experiments/loggings
  visualization_path: experiments/visualizations


evaluation:
  per_device_eval_batch_size: 4
  



   




