# Training mode configuration
model: 'llada_base'  # 'llada' or 'dream'
finetuning_method: 'nara' 
task_name: 'code_feedback'

finetuning_parameters:
  r_ab: 32
  lora_dropout: 0.05
  # target_modules: ["q_proj", "k_proj", "v_proj", "up_proj", "ff_out","attn_out"]
  target_modules: ["q_proj", "k_proj", "v_proj", "attn_out"] 
  
  fnn_hidden_size_1: 256 
  fnn_hidden_size_2: 512
  init_c: "zero_last"          # "zero_last", "kaiming_uniform_m", or "zero_all"
  embedding_dim: 64            # Default 64 for NARA
  c_scale: 0.1
  input_mode: 'nl'
  embedding_type: 'fourier'
data:
  val_split_seed: 42
  val_split_size: 128
  batch_size: 1
  # max_length: 512

train:
  # Will use paths.experiment path
  decoder_resume_path:
  head_resume_path:
  skipped_keys:
  global_step: 
  global_epoch: 
  global_sample_number:
  global_update_number:
  global_token_number:
  stage_1: 0
  random_length: False
  output_dir: 'ckpts'
  logging_dir: 'logs'
  mixed_precision: 'fp16'
  gradient_accumulation_steps: 32
  report_to: 'wandb'
  epoch_num: 1
  lr: 1e-4
  warmup_ratio: 0.05
  eval_every: 64
  eval_from_start: True
  save_every: 64
  per_example_ratio: True
  
  # Updated experiment name to include CLoRA specific params
  exp_name:  &exp_name ""
  wandb_proj: "
  eval:
    # metric: 'accuracy'
    metric: 'loss'
    noise_levels:
    eval_epoches_num: 1
    steps: 32
    gen_length: 512
    block_length: 16
    temperature: 0.0
    cfg_scale: 0.0
    remasking: 'low_confidence'
    observe_steps: True
paths:
  experiment: ''