LICENSE
README.md
pyproject.toml
setup.py
./scripts/__init__.py
./scripts/bon1_verify.py
./scripts/bon2_compute_logp_for_responses.py
./scripts/bon3_print.py
./scripts/comvert_kto_dataset.py
./scripts/construct_validation.py
./scripts/contrastive_decoding.py
./scripts/convert_lf_to_rolecontent.py
./scripts/converter_hf_to_mcore.py
./scripts/diagnose.py
./scripts/dsfilter_1_out_length.py
./scripts/dsfilter_2_scored.py
./scripts/dsfilter_3_save_datasetdict.py
./scripts/dsfilter_41_single_prepare_for_pairdpo_trainset.py
./scripts/dsfilter_4_prepare_for_pairdpo_trainset.py
./scripts/dsfilter_5_prepare_for_kto_trainset.py
./scripts/dsfilter_6_prepare_for_verl_dataset.py
./scripts/filter_rl_dataset.py
./scripts/gen_vllm.py
./scripts/init_random_model.py
./scripts/legacy_model_merger.py
./scripts/merge_file.py
./scripts/out_code_task.py
./scripts/prepare_bob_testset.py
./scripts/shuffle_dataset.py
./scripts/split_rm.py
./scripts/test_vr.py
./scripts/utils.py
./scripts/valid_everyce.py
./tests/__init__.py
./tests/test_base_config_on_cpu.py
./tests/test_protocol_on_cpu.py
./tests/interactions/__init__.py
./tests/interactions/test_gsm8k_interaction.py
./tests/interactions/test_interaction_registry.py
./tests/single_controller/__init__.py
./tests/single_controller/test_auto_padding_on_cpu.py
./tests/single_controller/test_colocated_workers.py
./tests/single_controller/test_colocated_workers_fused.py
./tests/single_controller/test_data_transfer.py
./tests/single_controller/test_decorator_on_cpu.py
./tests/single_controller/test_driverfunc_to_worker.py
./tests/single_controller/test_fused_workers_on_cpu.py
./tests/single_controller/test_high_level_scheduling_api.py
./tests/single_controller/test_ray_collectives.py
./tests/single_controller/test_ray_local_envs_on_cpu.py
./tests/single_controller/test_ray_utils_on_cpu.py
./tests/single_controller/test_rvdz.py
./tests/single_controller/test_worker_group_basics.py
./tests/single_controller/test_worker_group_torch.py
./tests/special_e2e/__init__.py
./tests/special_e2e/check_custom_rwd_fn.py
./tests/special_e2e/check_results.py
./tests/special_e2e/envs/__init__.py
./tests/special_e2e/envs/digit_completion/__init__.py
./tests/special_e2e/envs/digit_completion/task.py
./tests/special_e2e/envs/digit_completion/tokenizer.py
./tests/trainer/__init__.py
./tests/trainer/config/__init__.py
./tests/trainer/config/test_algo_config_on_cpu.py
./tests/trainer/config/test_legacy_config_on_cpu.py
./tests/trainer/ppo/__init__.py
./tests/trainer/ppo/test_core_algos_on_cpu.py
./tests/trainer/ppo/test_metric_utils_on_cpu.py
./verl/__init__.py
./verl/base_config.py
./verl/protocol.py
./verl/py.typed
./verl/experimental/__init__.py
./verl/experimental/agent_loop/__init__.py
./verl/experimental/agent_loop/agent_loop.py
./verl/experimental/agent_loop/single_turn_agent_loop.py
./verl/experimental/agent_loop/tool_agent_loop.py
./verl/interactions/__init__.py
./verl/interactions/base.py
./verl/interactions/gsm8k_interaction.py
./verl/interactions/utils/__init__.py
./verl/interactions/utils/interaction_registry.py
./verl/model_merger/__init__.py
./verl/model_merger/__main__.py
./verl/model_merger/base_model_merger.py
./verl/model_merger/fsdp_model_merger.py
./verl/model_merger/megatron_model_merger.py
./verl/models/__init__.py
./verl/models/registry.py
./verl/models/weight_loader_registry.py
./verl/models/llama/__init__.py
./verl/models/llama/megatron/__init__.py
./verl/models/llama/megatron/modeling_llama_megatron.py
./verl/models/llama/megatron/checkpoint_utils/__init__.py
./verl/models/llama/megatron/checkpoint_utils/llama_loader.py
./verl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py
./verl/models/llama/megatron/checkpoint_utils/llama_saver.py
./verl/models/llama/megatron/layers/__init__.py
./verl/models/llama/megatron/layers/parallel_attention.py
./verl/models/llama/megatron/layers/parallel_decoder.py
./verl/models/llama/megatron/layers/parallel_linear.py
./verl/models/llama/megatron/layers/parallel_mlp.py
./verl/models/llama/megatron/layers/parallel_rmsnorm.py
./verl/models/mcore/__init__.py
./verl/models/mcore/config_converter.py
./verl/models/mcore/loader.py
./verl/models/mcore/mbridge.py
./verl/models/mcore/model_forward.py
./verl/models/mcore/model_forward_fused.py
./verl/models/mcore/model_initializer.py
./verl/models/mcore/patch_v012.py
./verl/models/mcore/registry.py
./verl/models/mcore/saver.py
./verl/models/mcore/util.py
./verl/models/mcore/weight_converter.py
./verl/models/mcore/qwen2_5_vl/__init__.py
./verl/models/mcore/qwen2_5_vl/attention.py
./verl/models/mcore/qwen2_5_vl/model.py
./verl/models/mcore/qwen2_5_vl/rope_utils.py
./verl/models/mcore/qwen2_5_vl/vision_config.py
./verl/models/mcore/qwen2_5_vl/vision_model.py
./verl/models/mcore/qwen2_5_vl/vision_transformer_block.py
./verl/models/qwen2/__init__.py
./verl/models/qwen2/megatron/__init__.py
./verl/models/qwen2/megatron/modeling_qwen2_megatron.py
./verl/models/qwen2/megatron/checkpoint_utils/__init__.py
./verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py
./verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py
./verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py
./verl/models/qwen2/megatron/layers/__init__.py
./verl/models/qwen2/megatron/layers/parallel_attention.py
./verl/models/qwen2/megatron/layers/parallel_decoder.py
./verl/models/qwen2/megatron/layers/parallel_linear.py
./verl/models/qwen2/megatron/layers/parallel_mlp.py
./verl/models/qwen2/megatron/layers/parallel_rmsnorm.py
./verl/models/transformers/__init__.py
./verl/models/transformers/dense_common.py
./verl/models/transformers/kimi_vl.py
./verl/models/transformers/llama.py
./verl/models/transformers/monkey_patch.py
./verl/models/transformers/npu_patch.py
./verl/models/transformers/qwen2.py
./verl/models/transformers/qwen2_5_vl.py
./verl/models/transformers/qwen2_vl.py
./verl/single_controller/__init__.py
./verl/single_controller/base/__init__.py
./verl/single_controller/base/decorator.py
./verl/single_controller/base/worker.py
./verl/single_controller/base/worker_group.py
./verl/single_controller/base/megatron/__init__.py
./verl/single_controller/base/megatron/worker.py
./verl/single_controller/base/megatron/worker_group.py
./verl/single_controller/base/register_center/__init__.py
./verl/single_controller/base/register_center/ray.py
./verl/single_controller/ray/__init__.py
./verl/single_controller/ray/base.py
./verl/single_controller/ray/megatron.py
./verl/third_party/__init__.py
./verl/third_party/sglang/__init__.py
./verl/third_party/sglang/parallel_state.py
./verl/third_party/vllm/__init__.py
./verl/tools/__init__.py
./verl/tools/base_tool.py
./verl/tools/geo3k_tool.py
./verl/tools/gsm8k_tool.py
./verl/tools/mcp_base_tool.py
./verl/tools/mcp_search_tool.py
./verl/tools/sandbox_fusion_tools.py
./verl/tools/schemas.py
./verl/tools/search_tool.py
./verl/tools/utils/__init__.py
./verl/tools/utils/search_r1_like_utils.py
./verl/tools/utils/tool_registry.py
./verl/trainer/__init__.py
./verl/trainer/constants_ppo.py
./verl/trainer/fsdp_sft_trainer.py
./verl/trainer/main_eval.py
./verl/trainer/main_generation.py
./verl/trainer/main_ppo.py
./verl/trainer/config/__init__.py
./verl/trainer/config/algorithm.py
./verl/trainer/config/evaluation.yaml
./verl/trainer/config/generation.yaml
./verl/trainer/config/ppo_megatron_trainer.yaml
./verl/trainer/config/ppo_trainer.yaml
./verl/trainer/config/sft_trainer.yaml
./verl/trainer/config/actor/actor.yaml
./verl/trainer/config/actor/dp_actor.yaml
./verl/trainer/config/actor/megatron_actor.yaml
./verl/trainer/config/critic/critic.yaml
./verl/trainer/config/critic/dp_critic.yaml
./verl/trainer/config/critic/megatron_critic.yaml
./verl/trainer/config/data/legacy_data.yaml
./verl/trainer/config/npu_profile/npu_profile.yaml
./verl/trainer/config/ref/dp_ref.yaml
./verl/trainer/config/ref/megatron_ref.yaml
./verl/trainer/config/ref/ref.yaml
./verl/trainer/config/reward_model/dp_reward_model.yaml
./verl/trainer/config/reward_model/megatron_reward_model.yaml
./verl/trainer/config/reward_model/reward_model.yaml
./verl/trainer/config/rollout/rollout.yaml
./verl/trainer/ppo/__init__.py
./verl/trainer/ppo/core_algos.py
./verl/trainer/ppo/metric_utils.py
./verl/trainer/ppo/ray_trainer.py
./verl/trainer/ppo/reward.py
./verl/utils/__init__.py
./verl/utils/activation_offload.py
./verl/utils/config.py
./verl/utils/device.py
./verl/utils/distributed.py
./verl/utils/flops_counter.py
./verl/utils/fs.py
./verl/utils/fsdp_utils.py
./verl/utils/hdfs_io.py
./verl/utils/import_utils.py
./verl/utils/logging_utils.py
./verl/utils/megatron_utils.py
./verl/utils/memory_buffer.py
./verl/utils/model.py
./verl/utils/net_utils.py
./verl/utils/py_functional.py
./verl/utils/ray_utils.py
./verl/utils/rollout_trace.py
./verl/utils/seqlen_balancing.py
./verl/utils/tokenizer.py
./verl/utils/torch_dtypes.py
./verl/utils/torch_functional.py
./verl/utils/tracking.py
./verl/utils/ulysses.py
./verl/utils/vllm_utils.py
./verl/utils/checkpoint/__init__.py
./verl/utils/checkpoint/checkpoint_manager.py
./verl/utils/checkpoint/fsdp_checkpoint_manager.py
./verl/utils/checkpoint/megatron_checkpoint_manager.py
./verl/utils/dataset/__init__.py
./verl/utils/dataset/multiturn_sft_dataset.py
./verl/utils/dataset/my_rl_dataset.py
./verl/utils/dataset/rl_dataset.py
./verl/utils/dataset/rm_dataset.py
./verl/utils/dataset/sft_dataset.py
./verl/utils/dataset/vision_utils.py
./verl/utils/debug/__init__.py
./verl/utils/debug/performance.py
./verl/utils/debug/trajectory_tracker.py
./verl/utils/experimental/__init__.py
./verl/utils/experimental/torch_functional.py
./verl/utils/kernel/__init__.py
./verl/utils/kernel/kernels.py
./verl/utils/kernel/linear_cross_entropy.py
./verl/utils/logger/__init__.py
./verl/utils/logger/aggregate_logger.py
./verl/utils/megatron/__init__.py
./verl/utils/megatron/dist_checkpointing.py
./verl/utils/megatron/memory.py
./verl/utils/megatron/optimizer.py
./verl/utils/megatron/pipeline_parallel.py
./verl/utils/megatron/sequence_parallel.py
./verl/utils/megatron/tensor_parallel.py
./verl/utils/metric/__init__.py
./verl/utils/metric/utils.py
./verl/utils/profiler/__init__.py
./verl/utils/profiler/config.py
./verl/utils/profiler/empty_annotations.py
./verl/utils/profiler/mstx_profile.py
./verl/utils/profiler/nvtx_profile.py
./verl/utils/profiler/performance.py
./verl/utils/profiler/profile.py
./verl/utils/rendezvous/__init__.py
./verl/utils/rendezvous/ray_backend.py
./verl/utils/reward_score/__init__.py
./verl/utils/reward_score/geo3k.py
./verl/utils/reward_score/gsm8k.py
./verl/utils/reward_score/math.py
./verl/utils/reward_score/math_batch.py
./verl/utils/reward_score/math_dapo.py
./verl/utils/reward_score/math_verify.py
./verl/utils/reward_score/search_r1_like_qa_em.py
./verl/utils/reward_score/prime_code/__init__.py
./verl/utils/reward_score/prime_code/testing_util.py
./verl/utils/reward_score/prime_code/utils.py
./verl/utils/reward_score/prime_math/__init__.py
./verl/utils/reward_score/prime_math/grader.py
./verl/utils/reward_score/prime_math/math_normalize.py
./verl/utils/reward_score/sandbox_fusion/__init__.py
./verl/utils/reward_score/sandbox_fusion/utils.py
./verl/version/version
./verl/workers/__init__.py
./verl/workers/fsdp_workers.py
./verl/workers/megatron_workers.py
./verl/workers/actor/__init__.py
./verl/workers/actor/base.py
./verl/workers/actor/dp_actor.py
./verl/workers/actor/megatron_actor.py
./verl/workers/critic/__init__.py
./verl/workers/critic/base.py
./verl/workers/critic/dp_critic.py
./verl/workers/critic/megatron_critic.py
./verl/workers/reward_manager/__init__.py
./verl/workers/reward_manager/batch.py
./verl/workers/reward_manager/dapo.py
./verl/workers/reward_manager/naive.py
./verl/workers/reward_manager/prime.py
./verl/workers/reward_manager/prime_backup.py
./verl/workers/reward_manager/registry.py
./verl/workers/reward_model/__init__.py
./verl/workers/reward_model/base.py
./verl/workers/reward_model/megatron/__init__.py
./verl/workers/reward_model/megatron/reward_model.py
./verl/workers/rollout/__init__.py
./verl/workers/rollout/async_server.py
./verl/workers/rollout/base.py
./verl/workers/rollout/chat_scheduler.py
./verl/workers/rollout/hf_rollout.py
./verl/workers/rollout/schemas.py
./verl/workers/rollout/tokenizer.py
./verl/workers/rollout/naive/__init__.py
./verl/workers/rollout/naive/naive_rollout.py
./verl/workers/rollout/sglang_rollout/__init__.py
./verl/workers/rollout/sglang_rollout/async_sglang_server.py
./verl/workers/rollout/sglang_rollout/sglang_rollout.py
./verl/workers/rollout/sglang_rollout/utils.py
./verl/workers/rollout/vllm_rollout/__init__.py
./verl/workers/rollout/vllm_rollout/vllm_async_server.py
./verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py
./verl/workers/sharding_manager/__init__.py
./verl/workers/sharding_manager/base.py
./verl/workers/sharding_manager/fsdp_sglang.py
./verl/workers/sharding_manager/fsdp_ulysses.py
./verl/workers/sharding_manager/fsdp_vllm.py
./verl/workers/sharding_manager/megatron_sglang.py
./verl/workers/sharding_manager/megatron_vllm.py
scripts/__init__.py
scripts/bon1_verify.py
scripts/bon2_compute_logp_for_responses.py
scripts/bon3_print.py
scripts/comvert_kto_dataset.py
scripts/construct_validation.py
scripts/contrastive_decoding.py
scripts/convert_lf_to_rolecontent.py
scripts/converter_hf_to_mcore.py
scripts/diagnose.py
scripts/dsfilter_1_out_length.py
scripts/dsfilter_2_scored.py
scripts/dsfilter_3_save_datasetdict.py
scripts/dsfilter_41_single_prepare_for_pairdpo_trainset.py
scripts/dsfilter_4_prepare_for_pairdpo_trainset.py
scripts/dsfilter_5_prepare_for_kto_trainset.py
scripts/dsfilter_6_prepare_for_verl_dataset.py
scripts/filter_rl_dataset.py
scripts/gen_vllm.py
scripts/init_random_model.py
scripts/legacy_model_merger.py
scripts/merge_file.py
scripts/out_code_task.py
scripts/prepare_bob_testset.py
scripts/shuffle_dataset.py
scripts/split_rm.py
scripts/test_vr.py
scripts/utils.py
scripts/valid_everyce.py
tests/__init__.py
tests/test_base_config_on_cpu.py
tests/test_protocol_on_cpu.py
tests/interactions/__init__.py
tests/interactions/test_gsm8k_interaction.py
tests/interactions/test_interaction_registry.py
tests/single_controller/__init__.py
tests/single_controller/test_auto_padding_on_cpu.py
tests/single_controller/test_colocated_workers.py
tests/single_controller/test_colocated_workers_fused.py
tests/single_controller/test_data_transfer.py
tests/single_controller/test_decorator_on_cpu.py
tests/single_controller/test_driverfunc_to_worker.py
tests/single_controller/test_fused_workers_on_cpu.py
tests/single_controller/test_high_level_scheduling_api.py
tests/single_controller/test_ray_collectives.py
tests/single_controller/test_ray_local_envs_on_cpu.py
tests/single_controller/test_ray_utils_on_cpu.py
tests/single_controller/test_rvdz.py
tests/single_controller/test_worker_group_basics.py
tests/single_controller/test_worker_group_torch.py
tests/special_e2e/__init__.py
tests/special_e2e/check_custom_rwd_fn.py
tests/special_e2e/check_results.py
tests/special_e2e/envs/__init__.py
tests/special_e2e/envs/digit_completion/__init__.py
tests/special_e2e/envs/digit_completion/task.py
tests/special_e2e/envs/digit_completion/tokenizer.py
tests/trainer/__init__.py
tests/trainer/config/__init__.py
tests/trainer/config/test_algo_config_on_cpu.py
tests/trainer/config/test_legacy_config_on_cpu.py
tests/trainer/ppo/__init__.py
tests/trainer/ppo/test_core_algos_on_cpu.py
tests/trainer/ppo/test_metric_utils_on_cpu.py
verl/__init__.py
verl/base_config.py
verl/protocol.py
verl/py.typed
verl.egg-info/PKG-INFO
verl.egg-info/SOURCES.txt
verl.egg-info/dependency_links.txt
verl.egg-info/requires.txt
verl.egg-info/top_level.txt
verl/experimental/__init__.py
verl/experimental/agent_loop/__init__.py
verl/experimental/agent_loop/agent_loop.py
verl/experimental/agent_loop/single_turn_agent_loop.py
verl/experimental/agent_loop/tool_agent_loop.py
verl/interactions/__init__.py
verl/interactions/base.py
verl/interactions/gsm8k_interaction.py
verl/interactions/utils/__init__.py
verl/interactions/utils/interaction_registry.py
verl/model_merger/__init__.py
verl/model_merger/__main__.py
verl/model_merger/base_model_merger.py
verl/model_merger/fsdp_model_merger.py
verl/model_merger/megatron_model_merger.py
verl/models/__init__.py
verl/models/registry.py
verl/models/weight_loader_registry.py
verl/models/llama/__init__.py
verl/models/llama/megatron/__init__.py
verl/models/llama/megatron/modeling_llama_megatron.py
verl/models/llama/megatron/checkpoint_utils/__init__.py
verl/models/llama/megatron/checkpoint_utils/llama_loader.py
verl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py
verl/models/llama/megatron/checkpoint_utils/llama_saver.py
verl/models/llama/megatron/layers/__init__.py
verl/models/llama/megatron/layers/parallel_attention.py
verl/models/llama/megatron/layers/parallel_decoder.py
verl/models/llama/megatron/layers/parallel_linear.py
verl/models/llama/megatron/layers/parallel_mlp.py
verl/models/llama/megatron/layers/parallel_rmsnorm.py
verl/models/mcore/__init__.py
verl/models/mcore/config_converter.py
verl/models/mcore/loader.py
verl/models/mcore/mbridge.py
verl/models/mcore/model_forward.py
verl/models/mcore/model_forward_fused.py
verl/models/mcore/model_initializer.py
verl/models/mcore/patch_v012.py
verl/models/mcore/registry.py
verl/models/mcore/saver.py
verl/models/mcore/util.py
verl/models/mcore/weight_converter.py
verl/models/mcore/qwen2_5_vl/__init__.py
verl/models/mcore/qwen2_5_vl/attention.py
verl/models/mcore/qwen2_5_vl/model.py
verl/models/mcore/qwen2_5_vl/rope_utils.py
verl/models/mcore/qwen2_5_vl/vision_config.py
verl/models/mcore/qwen2_5_vl/vision_model.py
verl/models/mcore/qwen2_5_vl/vision_transformer_block.py
verl/models/qwen2/__init__.py
verl/models/qwen2/megatron/__init__.py
verl/models/qwen2/megatron/modeling_qwen2_megatron.py
verl/models/qwen2/megatron/checkpoint_utils/__init__.py
verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py
verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py
verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py
verl/models/qwen2/megatron/layers/__init__.py
verl/models/qwen2/megatron/layers/parallel_attention.py
verl/models/qwen2/megatron/layers/parallel_decoder.py
verl/models/qwen2/megatron/layers/parallel_linear.py
verl/models/qwen2/megatron/layers/parallel_mlp.py
verl/models/qwen2/megatron/layers/parallel_rmsnorm.py
verl/models/transformers/__init__.py
verl/models/transformers/dense_common.py
verl/models/transformers/kimi_vl.py
verl/models/transformers/llama.py
verl/models/transformers/monkey_patch.py
verl/models/transformers/npu_patch.py
verl/models/transformers/qwen2.py
verl/models/transformers/qwen2_5_vl.py
verl/models/transformers/qwen2_vl.py
verl/single_controller/__init__.py
verl/single_controller/base/__init__.py
verl/single_controller/base/decorator.py
verl/single_controller/base/worker.py
verl/single_controller/base/worker_group.py
verl/single_controller/base/megatron/__init__.py
verl/single_controller/base/megatron/worker.py
verl/single_controller/base/megatron/worker_group.py
verl/single_controller/base/register_center/__init__.py
verl/single_controller/base/register_center/ray.py
verl/single_controller/ray/__init__.py
verl/single_controller/ray/base.py
verl/single_controller/ray/megatron.py
verl/third_party/__init__.py
verl/third_party/sglang/__init__.py
verl/third_party/sglang/parallel_state.py
verl/third_party/vllm/__init__.py
verl/tools/__init__.py
verl/tools/base_tool.py
verl/tools/geo3k_tool.py
verl/tools/gsm8k_tool.py
verl/tools/mcp_base_tool.py
verl/tools/mcp_search_tool.py
verl/tools/sandbox_fusion_tools.py
verl/tools/schemas.py
verl/tools/search_tool.py
verl/tools/utils/__init__.py
verl/tools/utils/search_r1_like_utils.py
verl/tools/utils/tool_registry.py
verl/trainer/__init__.py
verl/trainer/constants_ppo.py
verl/trainer/fsdp_sft_trainer.py
verl/trainer/main_eval.py
verl/trainer/main_generation.py
verl/trainer/main_ppo.py
verl/trainer/config/__init__.py
verl/trainer/config/algorithm.py
verl/trainer/config/evaluation.yaml
verl/trainer/config/generation.yaml
verl/trainer/config/ppo_megatron_trainer.yaml
verl/trainer/config/ppo_trainer.yaml
verl/trainer/config/sft_trainer.yaml
verl/trainer/config/actor/actor.yaml
verl/trainer/config/actor/dp_actor.yaml
verl/trainer/config/actor/megatron_actor.yaml
verl/trainer/config/critic/critic.yaml
verl/trainer/config/critic/dp_critic.yaml
verl/trainer/config/critic/megatron_critic.yaml
verl/trainer/config/data/legacy_data.yaml
verl/trainer/config/npu_profile/npu_profile.yaml
verl/trainer/config/ref/dp_ref.yaml
verl/trainer/config/ref/megatron_ref.yaml
verl/trainer/config/ref/ref.yaml
verl/trainer/config/reward_model/dp_reward_model.yaml
verl/trainer/config/reward_model/megatron_reward_model.yaml
verl/trainer/config/reward_model/reward_model.yaml
verl/trainer/config/rollout/rollout.yaml
verl/trainer/ppo/__init__.py
verl/trainer/ppo/core_algos.py
verl/trainer/ppo/metric_utils.py
verl/trainer/ppo/ray_trainer.py
verl/trainer/ppo/reward.py
verl/utils/__init__.py
verl/utils/activation_offload.py
verl/utils/config.py
verl/utils/device.py
verl/utils/distributed.py
verl/utils/flops_counter.py
verl/utils/fs.py
verl/utils/fsdp_utils.py
verl/utils/hdfs_io.py
verl/utils/import_utils.py
verl/utils/logging_utils.py
verl/utils/megatron_utils.py
verl/utils/memory_buffer.py
verl/utils/model.py
verl/utils/net_utils.py
verl/utils/py_functional.py
verl/utils/ray_utils.py
verl/utils/rollout_trace.py
verl/utils/seqlen_balancing.py
verl/utils/tokenizer.py
verl/utils/torch_dtypes.py
verl/utils/torch_functional.py
verl/utils/tracking.py
verl/utils/ulysses.py
verl/utils/vllm_utils.py
verl/utils/checkpoint/__init__.py
verl/utils/checkpoint/checkpoint_manager.py
verl/utils/checkpoint/fsdp_checkpoint_manager.py
verl/utils/checkpoint/megatron_checkpoint_manager.py
verl/utils/dataset/__init__.py
verl/utils/dataset/multiturn_sft_dataset.py
verl/utils/dataset/my_rl_dataset.py
verl/utils/dataset/rl_dataset.py
verl/utils/dataset/rm_dataset.py
verl/utils/dataset/sft_dataset.py
verl/utils/dataset/vision_utils.py
verl/utils/debug/__init__.py
verl/utils/debug/performance.py
verl/utils/debug/trajectory_tracker.py
verl/utils/experimental/__init__.py
verl/utils/experimental/torch_functional.py
verl/utils/kernel/__init__.py
verl/utils/kernel/kernels.py
verl/utils/kernel/linear_cross_entropy.py
verl/utils/logger/__init__.py
verl/utils/logger/aggregate_logger.py
verl/utils/megatron/__init__.py
verl/utils/megatron/dist_checkpointing.py
verl/utils/megatron/memory.py
verl/utils/megatron/optimizer.py
verl/utils/megatron/pipeline_parallel.py
verl/utils/megatron/sequence_parallel.py
verl/utils/megatron/tensor_parallel.py
verl/utils/metric/__init__.py
verl/utils/metric/utils.py
verl/utils/profiler/__init__.py
verl/utils/profiler/config.py
verl/utils/profiler/empty_annotations.py
verl/utils/profiler/mstx_profile.py
verl/utils/profiler/nvtx_profile.py
verl/utils/profiler/performance.py
verl/utils/profiler/profile.py
verl/utils/rendezvous/__init__.py
verl/utils/rendezvous/ray_backend.py
verl/utils/reward_score/__init__.py
verl/utils/reward_score/geo3k.py
verl/utils/reward_score/gsm8k.py
verl/utils/reward_score/math.py
verl/utils/reward_score/math_batch.py
verl/utils/reward_score/math_dapo.py
verl/utils/reward_score/math_verify.py
verl/utils/reward_score/search_r1_like_qa_em.py
verl/utils/reward_score/prime_code/__init__.py
verl/utils/reward_score/prime_code/testing_util.py
verl/utils/reward_score/prime_code/utils.py
verl/utils/reward_score/prime_math/__init__.py
verl/utils/reward_score/prime_math/grader.py
verl/utils/reward_score/prime_math/math_normalize.py
verl/utils/reward_score/sandbox_fusion/__init__.py
verl/utils/reward_score/sandbox_fusion/utils.py
verl/version/version
verl/workers/__init__.py
verl/workers/fsdp_workers.py
verl/workers/megatron_workers.py
verl/workers/actor/__init__.py
verl/workers/actor/base.py
verl/workers/actor/dp_actor.py
verl/workers/actor/megatron_actor.py
verl/workers/critic/__init__.py
verl/workers/critic/base.py
verl/workers/critic/dp_critic.py
verl/workers/critic/megatron_critic.py
verl/workers/reward_manager/__init__.py
verl/workers/reward_manager/batch.py
verl/workers/reward_manager/dapo.py
verl/workers/reward_manager/naive.py
verl/workers/reward_manager/prime.py
verl/workers/reward_manager/prime_backup.py
verl/workers/reward_manager/registry.py
verl/workers/reward_model/__init__.py
verl/workers/reward_model/base.py
verl/workers/reward_model/megatron/__init__.py
verl/workers/reward_model/megatron/reward_model.py
verl/workers/rollout/__init__.py
verl/workers/rollout/async_server.py
verl/workers/rollout/base.py
verl/workers/rollout/chat_scheduler.py
verl/workers/rollout/hf_rollout.py
verl/workers/rollout/schemas.py
verl/workers/rollout/tokenizer.py
verl/workers/rollout/naive/__init__.py
verl/workers/rollout/naive/naive_rollout.py
verl/workers/rollout/sglang_rollout/__init__.py
verl/workers/rollout/sglang_rollout/async_sglang_server.py
verl/workers/rollout/sglang_rollout/sglang_rollout.py
verl/workers/rollout/sglang_rollout/utils.py
verl/workers/rollout/vllm_rollout/__init__.py
verl/workers/rollout/vllm_rollout/vllm_async_server.py
verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py
verl/workers/sharding_manager/__init__.py
verl/workers/sharding_manager/base.py
verl/workers/sharding_manager/fsdp_sglang.py
verl/workers/sharding_manager/fsdp_ulysses.py
verl/workers/sharding_manager/fsdp_vllm.py
verl/workers/sharding_manager/megatron_sglang.py
verl/workers/sharding_manager/megatron_vllm.py