# Target class for this configuration
_target_: verl.workers.config.FSDPOptimizerConfig

# Learning rate
lr: 1e-3

# LR warmup steps ratio
lr_warmup_steps_ratio: 0.0

# Total training steps
total_training_steps: -1

# Weight decay
weight_decay: 0.01

# LR warmup steps
lr_warmup_steps: -1

# Betas for Adam optimizer
betas: [0.9, 0.999]

# Clip gradient
clip_grad: 1.0

# Minimum LR ratio for cosine schedule
min_lr_ratio: 0.0

# Number of cosine cycles in LR schedule
num_cycles: 0.5

# LR warmup style: "constant" or "cosine"
warmup_style: constant

