# Reinforcement Learning Configuration

# Model settings
base_model_name: "OpenDFM/ChemDFM-v1.5-8B"
model_mol_type: "SMILES"
use_alpaca: false
use_chemllm: true
rl_task_type: "cap2mol"  # "cap2mol" or "mol2cap"
rl_task: "molecule_chemdfm"
# Data settings
target_datasets:
  - "chebi"
dataset_limits:
  "chebi": 20000
# dataset_processing:
#   "./hf_local_data/caption_chemdfm/ChemDFM-v1.5-8B/$TIMESTAMP": "swap_gen_mol"
# Tasks to train on
tasks:
  - "caption_chemdfm"

# Training parameters
epochs: 1
batch_size: 4
num_generations: 12
gradient_accumulation_steps: 8
learning_rate: 2e-5
weight_decay: 0.001
max_grad_norm: 0.5
warmup_ratio: 0.03
lr_scheduler_type: "cosine"
logging_steps: 10
save_steps: 100
add_gt: False

# LoRA settings
lora_r: 32
lora_alpha: 32
lora_dropout: 0.05
lora_target_modules:
  - "q_proj"
  - "v_proj"

top_k: 40
top_p: 0.9
temperature: 0.9

# Paths
cache_dir: null
exp_save_dir: "./molrl/"
lora_dir: "./mollora/"
load_directory: null
judge_dir: null
vllm_temp_directory: "./tmp"

use_vllm: false

# Experiment settings
exp_name: "molgen_rl"