# Rollout Correction: corrects off-policy distribution shifts
# See documentation: docs/algo/rollout_corr.md
# Use presets: RolloutCorrectionConfig.decoupled_seq_is(), .pg_is(), etc.

# IS aggregation level: null (disabled), "token" (per-token), "sequence" (per-sequence)
rollout_is: null

# Upper threshold for IS weight truncation (typical: 2.0-5.0)
rollout_is_threshold: 2.0

# RS aggregation level: null (disabled), "token", "sequence", "geometric"
rollout_rs: null

# Upper threshold for rejection sampling (null = use rollout_is_threshold)
rollout_rs_threshold: null

# Lower threshold for rejection sampling (null = auto-compute as 1/upper)
rollout_rs_threshold_lower: null

# Per-token veto threshold for catastrophic outliers (null = disabled)
rollout_token_veto_threshold: null

# Operating mode: false = Decoupled (3 policies), true = Bypass (2 policies)
bypass_mode: false

# Loss function: false = PPO with clipping, true = Policy gradient (no clipping)
use_policy_gradient: false

# Batch normalize IS weights: false = raw weights, true = normalize to mean=1.0
rollout_is_batch_normalize: false