data:
  # using fast kv store for note info and user lastn info if possible
  train_noteinfo_kv:
    ip: ''
    port: ''
    prefix: ''
    ex: 1

  engage_lastn_kv:
    ip: ''
    port: ''
    prefix: ''
    ex: 1

  click_lastn_data_root: YOUR_CLICK_LASTN_DATA_ROOT

  # item_prompt: 'Summary:\n'
  # for chinese notes: 
  item_prompt: '总结:\n'
  max_text_len: 230         # Maximum text length
  max_topic_nums: 8         # Maximum number of topics
  max_input_token_len: 300  # Maximum input token length
  add_item_hour_embed: false

  train_batch_size: 1         # Batch size per GPU for training
  neg_samples_per_gpu: 400    # Number of negative samples per GPU
  neg_pool_size_per_gpu: 1000000  # Negative sample pool size per GPU

  # Ratio between engage / click data in lastn, with Windows version tail training
  lastn_max_engage_note_num: 0            # Maximum number of engage notes
  lastn_max_click_note_num_homefeed: 128  # Maximum number of homefeed click notes
  lastn_max_click_note_num_ads: 0         # Maximum number of ads click notes
  train_num_workers: 4                    # Number of data loader workers

model:
  model_name: REDRec
  item_pretrain_dir: pre_trained_ckpts/Qwen2.5-1.5B # Item and User LLMs are initialized from pretrain_dir
  item_llm_init: true
  user_pretrain_dir: pre_trained_ckpts/Qwen2.5-1.5B
  user_llm_init: true
  use_ft_flash_attn: true
  use_lora: false
  gradient_checkpointing: true

  # Engage action settings
  engage_action_n: 5
  add_item_action_embed: false
  add_hour_embed: false
  add_position_embed: false
  learnable_interest_query: false

  # Compression dim settings
  AE_compress_dim: -1            # -1 -> disable autoencoder compression
  AE_compress_decay_rate: 0.9995 # Resume autoencoder decay rate

  # Action prediction
  predict_action: false

  # Window NCE loss
  window_pos: 10
  query_nums: 3

training:
  target: homefeed
  optim_args:
    learning_rate: 1.0e-5
    weight_decay: 0.01
    lr_mult_prefix: 
      - 'latent_proj_encoder'
      - 'latent_proj_decoder'
    lr_mult_rate: 5.0

  scheduler_args:
    type: cosine
    warmup_steps: 1000

  eval_step: 5000
  clip_grad_norm: 1.0
  accumulation_steps: 2
  freeze_item: false
  total_step: 100000
  strategy: deepspeed
  stage: 3
  gradient_checkpointing: true
  load_pretrained_model: expr/win10_query3_train_v3_qwen2_1.5b_homefeed/checkpoint-100001/

saver:
  checkpoint_dir: expr
  log_dir: expr
  saved_model_name: win10_query3_train_v3_qwen2_1.5b_0608_homefeed

eval:
  target: "homefeed"
  # target: "ads"
  model_path: expr/win10_query3_train_v3_qwen2_1.5b_0608_homefeed/checkpoint-50001/
  note_eval:
    lastn_note_info_path: eval/lastn_noteid_info.json
    basepool_note_info_path: eval/base_target_noteid_info.json

  user_eval:
    user_lastn_path: eval/user_lastn.json
    max_lastn_len: 72
    user_eval_batch_size: 16
    
show_progress: true
precision: bf16-mixed
loss: nce
nce_thres: 0.98

val_only: false
online_compute_embedding: false
seed: 1112
update_interval: 1
reproducibility: false
