data:
  !include ../base_data_setting.yml
  
environment:
   dotenv_path: .env
  
model:

  # Qwen/Qwen2.5-0.5B-Instruct
  # Qwen/Qwen2.5-1.5B-Instruct
  # Qwen/Qwen2.5-3B-Instruct
  # Qwen/Qwen2.5-7B-Instruct
  # Qwen/Qwen2.5-14B-Instruct
  # Qwen/Qwen2.5-32B-Instruct
  # Qwen/Qwen2.5-72B-Instruct
  model_name: Qwen/Qwen2.5-0.5B-Instruct
  model_type: qwen2.5

  chat_template: qwen-2.5

  max_seq_length: 2048
  load_in_4bit: false



train:
  data_synthetic: !include ../gpt4o-DSynt_data_setting.yml

  plan_type: explicit

  epoch: 4
  # 2e-4
  learning_rate: 0.0002 
  per_device_train_batch_size: 4
  gradient_accumulation_steps: 4
  weight_decay: 0.01
  lr_scheduler: linear

  lora:
    # Rank parameter for LoRA. The smaller this value, the fewer parameters will be modified.
    # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    r: 16
    target_modules:
      - q_proj
      - k_proj
      - v_proj
      - o_proj
      - gate_proj
      - up_proj
      - down_proj
    # Alpha parameter for LoRA. This value determines the strength of the applied LoRA.
    lora_alpha: 16
    # Supports any, but = 0 is optimized
    lora_dropout: 0
    # Supports any, but = "none" is optimized
    bias: none
    # Whether to use gradient checkpointing to improve memory efficiency
    # True or "unsloth" for very long context
    use_gradient_checkpointing: unsloth
    # Seed value for random number generation
    random_state: 3407
    use_rslora: false
    loftq_config: null

logging:
  # path where to save, empty for no saving

  checkpoint_path: experiments/checkpoints
  result_path: experiments/results
  logging_path: experiments/loggings
  visualization_path: experiments/visualizations


evaluation:
  per_device_eval_batch_size: 4
  # Where the model is saved
  # The model should be placed under the root folder of the project
  finetuned_model_path: ICML25Plan/pretrained_models/Qwen2.5-0.5B-Instruct-finetuned-DirectSynth-GPT4o-Plan
  model_folder_name: checkpoint-2136

  reasoning_model:
    name_or_path: Qwen/Qwen2.5-0.5B-Instruct
    # Supports RoPE Scaling internally, so choose any!
    max_seq_length: 2048
    load_in_4bit: false
   









