README.md
setup.py
./experimental/__init__.py
./experimental/agent_loop/__init__.py
./experimental/agent_loop/agent_loop.py
./experimental/agent_loop/single_turn_agent_loop.py
./experimental/agent_loop/tool_agent_loop.py
./experimental/agent_loop/tool_parser.py
./experimental/dataset/__init__.py
./experimental/dataset/sampler.py
./experimental/dynamic_dataset/__init__.py
./experimental/dynamic_dataset/dynamicgen_dataset.py
./model_merger/__init__.py
./model_merger/__main__.py
./model_merger/base_model_merger.py
./model_merger/fsdp_model_merger.py
./model_merger/megatron_model_merger.py
./models/__init__.py
./models/registry.py
./models/weight_loader_registry.py
./models/llama/__init__.py
./models/llama/megatron/__init__.py
./models/llama/megatron/modeling_llama_megatron.py
./models/llama/megatron/checkpoint_utils/__init__.py
./models/llama/megatron/checkpoint_utils/llama_loader.py
./models/llama/megatron/checkpoint_utils/llama_loader_depracated.py
./models/llama/megatron/checkpoint_utils/llama_saver.py
./models/llama/megatron/layers/__init__.py
./models/llama/megatron/layers/parallel_attention.py
./models/llama/megatron/layers/parallel_decoder.py
./models/llama/megatron/layers/parallel_linear.py
./models/llama/megatron/layers/parallel_mlp.py
./models/llama/megatron/layers/parallel_rmsnorm.py
./models/mcore/__init__.py
./models/mcore/config_converter.py
./models/mcore/loader.py
./models/mcore/mbridge.py
./models/mcore/model_forward.py
./models/mcore/model_forward_fused.py
./models/mcore/model_initializer.py
./models/mcore/patch_v012.py
./models/mcore/registry.py
./models/mcore/saver.py
./models/mcore/util.py
./models/mcore/weight_converter.py
./models/mcore/qwen2_5_vl/__init__.py
./models/mcore/qwen2_5_vl/attention.py
./models/mcore/qwen2_5_vl/model.py
./models/mcore/qwen2_5_vl/rope_utils.py
./models/mcore/qwen2_5_vl/vision_config.py
./models/mcore/qwen2_5_vl/vision_model.py
./models/mcore/qwen2_5_vl/vision_transformer_block.py
./models/qwen2/__init__.py
./models/qwen2/megatron/__init__.py
./models/qwen2/megatron/modeling_qwen2_megatron.py
./models/qwen2/megatron/checkpoint_utils/__init__.py
./models/qwen2/megatron/checkpoint_utils/qwen2_loader.py
./models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py
./models/qwen2/megatron/checkpoint_utils/qwen2_saver.py
./models/qwen2/megatron/layers/__init__.py
./models/qwen2/megatron/layers/parallel_attention.py
./models/qwen2/megatron/layers/parallel_decoder.py
./models/qwen2/megatron/layers/parallel_linear.py
./models/qwen2/megatron/layers/parallel_mlp.py
./models/qwen2/megatron/layers/parallel_rmsnorm.py
./models/transformers/__init__.py
./models/transformers/dense_common.py
./models/transformers/kimi_vl.py
./models/transformers/llama.py
./models/transformers/monkey_patch.py
./models/transformers/npu_patch.py
./models/transformers/qwen2.py
./models/transformers/qwen2_5_vl.py
./models/transformers/qwen2_vl.py
./single_controller/__init__.py
./single_controller/base/__init__.py
./single_controller/base/decorator.py
./single_controller/base/worker.py
./single_controller/base/worker_group.py
./single_controller/ray/__init__.py
./single_controller/ray/base.py
./third_party/__init__.py
./third_party/sglang/__init__.py
./third_party/sglang/parallel_state.py
./third_party/torch/__init__.py
./third_party/torch/distributed/__init__.py
./third_party/torch/distributed/_state_dict_utils.py
./third_party/torch/distributed/checkpoint/__init__.py
./third_party/torch/distributed/checkpoint/state_dict.py
./third_party/vllm/__init__.py
./tools/__init__.py
./tools/base_tool.py
./tools/geo3k_tool.py
./tools/gsm8k_tool.py
./tools/image_zoom_in_tool.py
./tools/mcp_base_tool.py
./tools/mcp_search_tool.py
./tools/sandbox_fusion_tools.py
./tools/schemas.py
./tools/search_tool.py
./tools/utils/__init__.py
./tools/utils/search_r1_like_utils.py
./tools/utils/tool_registry.py
./trainer/__init__.py
./trainer/constants_ppo.py
./trainer/fsdp_sft_trainer.py
./trainer/main_eval.py
./trainer/main_generation.py
./trainer/main_ppo.py
./trainer/sft_trainer.py
./trainer/config/__init__.py
./trainer/config/algorithm.py
./trainer/config/config.py
./trainer/ppo/__init__.py
./trainer/ppo/core_algos.py
./trainer/ppo/metric_utils.py
./trainer/ppo/ray_trainer.py
./trainer/ppo/reward.py
./trainer/ppo/utils.py
./utils/__init__.py
./utils/activation_offload.py
./utils/config.py
./utils/device.py
./utils/distributed.py
./utils/flops_counter.py
./utils/fs.py
./utils/fsdp_utils.py
./utils/hdfs_io.py
./utils/import_utils.py
./utils/logging_utils.py
./utils/megatron_utils.py
./utils/memory_buffer.py
./utils/memory_utils.py
./utils/model.py
./utils/net_utils.py
./utils/py_functional.py
./utils/ray_utils.py
./utils/rollout_skip.py
./utils/rollout_trace.py
./utils/seqlen_balancing.py
./utils/tensordict_utils.py
./utils/tokenizer.py
./utils/torch_dtypes.py
./utils/torch_functional.py
./utils/tracking.py
./utils/transformers_compat.py
./utils/ulysses.py
./utils/checkpoint/__init__.py
./utils/checkpoint/checkpoint_manager.py
./utils/checkpoint/fsdp_checkpoint_manager.py
./utils/checkpoint/megatron_checkpoint_manager.py
./utils/dataset/__init__.py
./utils/dataset/multiturn_sft_dataset.py
./utils/dataset/rl_dataset.py
./utils/dataset/rm_dataset.py
./utils/dataset/sft_dataset.py
./utils/dataset/vision_utils.py
./utils/debug/__init__.py
./utils/debug/metrics.py
./utils/debug/performance.py
./utils/debug/trajectory_tracker.py
./utils/experimental/__init__.py
./utils/experimental/torch_functional.py
./utils/kernel/__init__.py
./utils/kernel/kernels.py
./utils/kernel/linear_cross_entropy.py
./utils/logger/__init__.py
./utils/logger/aggregate_logger.py
./utils/megatron/__init__.py
./utils/megatron/dist_checkpointing.py
./utils/megatron/memory.py
./utils/megatron/optimizer.py
./utils/megatron/pipeline_parallel.py
./utils/megatron/sequence_parallel.py
./utils/megatron/tensor_parallel.py
./utils/metric/__init__.py
./utils/metric/utils.py
./utils/profiler/__init__.py
./utils/profiler/config.py
./utils/profiler/empty_annotations.py
./utils/profiler/mstx_profile.py
./utils/profiler/nvtx_profile.py
./utils/profiler/performance.py
./utils/profiler/profile.py
./utils/rendezvous/__init__.py
./utils/rendezvous/ray_backend.py
./utils/reward_score/__init__.py
./utils/reward_score/geo3k.py
./utils/reward_score/gsm8k.py
./utils/reward_score/math_batch.py
./utils/reward_score/math_dapo.py
./utils/reward_score/math_reward.py
./utils/reward_score/math_verify.py
./utils/reward_score/search_r1_like_qa_em.py
./utils/reward_score/prime_code/__init__.py
./utils/reward_score/prime_code/testing_util.py
./utils/reward_score/prime_code/utils.py
./utils/reward_score/prime_math/__init__.py
./utils/reward_score/prime_math/grader.py
./utils/reward_score/prime_math/math_normalize.py
./utils/reward_score/sandbox_fusion/__init__.py
./utils/reward_score/sandbox_fusion/utils.py
./utils/vllm/__init__.py
./utils/vllm/patch.py
./utils/vllm/utils.py
./workers/__init__.py
./workers/fsdp_workers.py
./workers/megatron_workers.py
./workers/actor/__init__.py
./workers/actor/base.py
./workers/actor/dp_actor.py
./workers/actor/megatron_actor.py
./workers/config/__init__.py
./workers/config/actor.py
./workers/config/critic.py
./workers/config/engine.py
./workers/config/model.py
./workers/config/optimizer.py
./workers/config/reward_model.py
./workers/config/rollout.py
./workers/critic/__init__.py
./workers/critic/base.py
./workers/critic/dp_critic.py
./workers/critic/megatron_critic.py
./workers/engine/__init__.py
./workers/engine/base.py
./workers/engine/utils.py
./workers/engine/fsdp/__init__.py
./workers/engine/fsdp/transformer_impl.py
./workers/engine/fsdp/utils.py
./workers/engine/megatron/__init__.py
./workers/engine/megatron/transformer_impl.py
./workers/engine/megatron/utils.py
./workers/reward_manager/__init__.py
./workers/reward_manager/abstract.py
./workers/reward_manager/batch.py
./workers/reward_manager/dapo.py
./workers/reward_manager/naive.py
./workers/reward_manager/prime.py
./workers/reward_manager/registry.py
./workers/reward_model/__init__.py
./workers/reward_model/base.py
./workers/reward_model/megatron/__init__.py
./workers/reward_model/megatron/reward_model.py
./workers/roles/__init__.py
./workers/roles/actor.py
./workers/roles/critic.py
./workers/roles/hybrid_engine.py
./workers/roles/reward_model.py
./workers/roles/reward_model_engine/__init__.py
./workers/roles/reward_model_engine/base.py
./workers/roles/reward_model_engine/sglang_reward_model.py
./workers/roles/utils/__init__.py
./workers/roles/utils/losses.py
./workers/rollout/__init__.py
./workers/rollout/async_server.py
./workers/rollout/base.py
./workers/rollout/hf_rollout.py
./workers/rollout/schemas.py
./workers/rollout/tokenizer.py
./workers/rollout/naive/__init__.py
./workers/rollout/naive/naive_rollout.py
./workers/rollout/sglang_rollout/__init__.py
./workers/rollout/sglang_rollout/async_sglang_server.py
./workers/rollout/sglang_rollout/http_server_engine.py
./workers/rollout/sglang_rollout/sglang_rollout.py
./workers/rollout/sglang_rollout/utils.py
./workers/rollout/vllm_rollout/__init__.py
./workers/rollout/vllm_rollout/vllm_async_server.py
./workers/rollout/vllm_rollout/vllm_rollout_spmd.py
./workers/sharding_manager/__init__.py
./workers/sharding_manager/base.py
./workers/sharding_manager/fsdp_sglang.py
./workers/sharding_manager/fsdp_ulysses.py
./workers/sharding_manager/fsdp_vllm.py
./workers/sharding_manager/megatron_sglang.py
./workers/sharding_manager/megatron_vllm.py
experimental/__init__.py
experimental/agent_loop/__init__.py
experimental/agent_loop/agent_loop.py
experimental/agent_loop/single_turn_agent_loop.py
experimental/agent_loop/tool_agent_loop.py
experimental/agent_loop/tool_parser.py
experimental/dataset/__init__.py
experimental/dataset/sampler.py
experimental/dynamic_dataset/__init__.py
experimental/dynamic_dataset/dynamicgen_dataset.py
model_merger/__init__.py
model_merger/__main__.py
model_merger/base_model_merger.py
model_merger/fsdp_model_merger.py
model_merger/megatron_model_merger.py
models/__init__.py
models/registry.py
models/weight_loader_registry.py
models/llama/__init__.py
models/llama/megatron/__init__.py
models/llama/megatron/modeling_llama_megatron.py
models/llama/megatron/checkpoint_utils/__init__.py
models/llama/megatron/checkpoint_utils/llama_loader.py
models/llama/megatron/checkpoint_utils/llama_loader_depracated.py
models/llama/megatron/checkpoint_utils/llama_saver.py
models/llama/megatron/layers/__init__.py
models/llama/megatron/layers/parallel_attention.py
models/llama/megatron/layers/parallel_decoder.py
models/llama/megatron/layers/parallel_linear.py
models/llama/megatron/layers/parallel_mlp.py
models/llama/megatron/layers/parallel_rmsnorm.py
models/mcore/__init__.py
models/mcore/config_converter.py
models/mcore/loader.py
models/mcore/mbridge.py
models/mcore/model_forward.py
models/mcore/model_forward_fused.py
models/mcore/model_initializer.py
models/mcore/patch_v012.py
models/mcore/registry.py
models/mcore/saver.py
models/mcore/util.py
models/mcore/weight_converter.py
models/mcore/qwen2_5_vl/__init__.py
models/mcore/qwen2_5_vl/attention.py
models/mcore/qwen2_5_vl/model.py
models/mcore/qwen2_5_vl/rope_utils.py
models/mcore/qwen2_5_vl/vision_config.py
models/mcore/qwen2_5_vl/vision_model.py
models/mcore/qwen2_5_vl/vision_transformer_block.py
models/qwen2/__init__.py
models/qwen2/megatron/__init__.py
models/qwen2/megatron/modeling_qwen2_megatron.py
models/qwen2/megatron/checkpoint_utils/__init__.py
models/qwen2/megatron/checkpoint_utils/qwen2_loader.py
models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py
models/qwen2/megatron/checkpoint_utils/qwen2_saver.py
models/qwen2/megatron/layers/__init__.py
models/qwen2/megatron/layers/parallel_attention.py
models/qwen2/megatron/layers/parallel_decoder.py
models/qwen2/megatron/layers/parallel_linear.py
models/qwen2/megatron/layers/parallel_mlp.py
models/qwen2/megatron/layers/parallel_rmsnorm.py
models/transformers/__init__.py
models/transformers/dense_common.py
models/transformers/kimi_vl.py
models/transformers/llama.py
models/transformers/monkey_patch.py
models/transformers/npu_patch.py
models/transformers/qwen2.py
models/transformers/qwen2_5_vl.py
models/transformers/qwen2_vl.py
single_controller/__init__.py
single_controller/base/__init__.py
single_controller/base/decorator.py
single_controller/base/worker.py
single_controller/base/worker_group.py
single_controller/ray/__init__.py
single_controller/ray/base.py
third_party/__init__.py
third_party/sglang/__init__.py
third_party/sglang/parallel_state.py
third_party/torch/__init__.py
third_party/torch/distributed/__init__.py
third_party/torch/distributed/_state_dict_utils.py
third_party/torch/distributed/checkpoint/__init__.py
third_party/torch/distributed/checkpoint/state_dict.py
third_party/vllm/__init__.py
tools/__init__.py
tools/base_tool.py
tools/geo3k_tool.py
tools/gsm8k_tool.py
tools/image_zoom_in_tool.py
tools/mcp_base_tool.py
tools/mcp_search_tool.py
tools/sandbox_fusion_tools.py
tools/schemas.py
tools/search_tool.py
tools/utils/__init__.py
tools/utils/search_r1_like_utils.py
tools/utils/tool_registry.py
trainer/__init__.py
trainer/constants_ppo.py
trainer/fsdp_sft_trainer.py
trainer/main_eval.py
trainer/main_generation.py
trainer/main_ppo.py
trainer/sft_trainer.py
trainer/config/__init__.py
trainer/config/algorithm.py
trainer/config/config.py
trainer/ppo/__init__.py
trainer/ppo/core_algos.py
trainer/ppo/metric_utils.py
trainer/ppo/ray_trainer.py
trainer/ppo/reward.py
trainer/ppo/utils.py
utils/__init__.py
utils/activation_offload.py
utils/config.py
utils/device.py
utils/distributed.py
utils/flops_counter.py
utils/fs.py
utils/fsdp_utils.py
utils/hdfs_io.py
utils/import_utils.py
utils/logging_utils.py
utils/megatron_utils.py
utils/memory_buffer.py
utils/memory_utils.py
utils/model.py
utils/net_utils.py
utils/py_functional.py
utils/ray_utils.py
utils/rollout_skip.py
utils/rollout_trace.py
utils/seqlen_balancing.py
utils/tensordict_utils.py
utils/tokenizer.py
utils/torch_dtypes.py
utils/torch_functional.py
utils/tracking.py
utils/transformers_compat.py
utils/ulysses.py
utils/checkpoint/__init__.py
utils/checkpoint/checkpoint_manager.py
utils/checkpoint/fsdp_checkpoint_manager.py
utils/checkpoint/megatron_checkpoint_manager.py
utils/dataset/__init__.py
utils/dataset/multiturn_sft_dataset.py
utils/dataset/rl_dataset.py
utils/dataset/rm_dataset.py
utils/dataset/sft_dataset.py
utils/dataset/vision_utils.py
utils/debug/__init__.py
utils/debug/metrics.py
utils/debug/performance.py
utils/debug/trajectory_tracker.py
utils/experimental/__init__.py
utils/experimental/torch_functional.py
utils/kernel/__init__.py
utils/kernel/kernels.py
utils/kernel/linear_cross_entropy.py
utils/logger/__init__.py
utils/logger/aggregate_logger.py
utils/megatron/__init__.py
utils/megatron/dist_checkpointing.py
utils/megatron/memory.py
utils/megatron/optimizer.py
utils/megatron/pipeline_parallel.py
utils/megatron/sequence_parallel.py
utils/megatron/tensor_parallel.py
utils/metric/__init__.py
utils/metric/utils.py
utils/profiler/__init__.py
utils/profiler/config.py
utils/profiler/empty_annotations.py
utils/profiler/mstx_profile.py
utils/profiler/nvtx_profile.py
utils/profiler/performance.py
utils/profiler/profile.py
utils/rendezvous/__init__.py
utils/rendezvous/ray_backend.py
utils/reward_score/__init__.py
utils/reward_score/geo3k.py
utils/reward_score/gsm8k.py
utils/reward_score/math_batch.py
utils/reward_score/math_dapo.py
utils/reward_score/math_reward.py
utils/reward_score/math_verify.py
utils/reward_score/search_r1_like_qa_em.py
utils/reward_score/prime_code/__init__.py
utils/reward_score/prime_code/testing_util.py
utils/reward_score/prime_code/utils.py
utils/reward_score/prime_math/__init__.py
utils/reward_score/prime_math/grader.py
utils/reward_score/prime_math/math_normalize.py
utils/reward_score/sandbox_fusion/__init__.py
utils/reward_score/sandbox_fusion/utils.py
utils/vllm/__init__.py
utils/vllm/patch.py
utils/vllm/utils.py
verl.egg-info/PKG-INFO
verl.egg-info/SOURCES.txt
verl.egg-info/dependency_links.txt
verl.egg-info/requires.txt
verl.egg-info/top_level.txt
workers/__init__.py
workers/fsdp_workers.py
workers/megatron_workers.py
workers/actor/__init__.py
workers/actor/base.py
workers/actor/dp_actor.py
workers/actor/megatron_actor.py
workers/config/__init__.py
workers/config/actor.py
workers/config/critic.py
workers/config/engine.py
workers/config/model.py
workers/config/optimizer.py
workers/config/reward_model.py
workers/config/rollout.py
workers/critic/__init__.py
workers/critic/base.py
workers/critic/dp_critic.py
workers/critic/megatron_critic.py
workers/engine/__init__.py
workers/engine/base.py
workers/engine/utils.py
workers/engine/fsdp/__init__.py
workers/engine/fsdp/transformer_impl.py
workers/engine/fsdp/utils.py
workers/engine/megatron/__init__.py
workers/engine/megatron/transformer_impl.py
workers/engine/megatron/utils.py
workers/reward_manager/__init__.py
workers/reward_manager/abstract.py
workers/reward_manager/batch.py
workers/reward_manager/dapo.py
workers/reward_manager/naive.py
workers/reward_manager/prime.py
workers/reward_manager/registry.py
workers/reward_model/__init__.py
workers/reward_model/base.py
workers/reward_model/megatron/__init__.py
workers/reward_model/megatron/reward_model.py
workers/roles/__init__.py
workers/roles/actor.py
workers/roles/critic.py
workers/roles/hybrid_engine.py
workers/roles/reward_model.py
workers/roles/reward_model_engine/__init__.py
workers/roles/reward_model_engine/base.py
workers/roles/reward_model_engine/sglang_reward_model.py
workers/roles/utils/__init__.py
workers/roles/utils/losses.py
workers/rollout/__init__.py
workers/rollout/async_server.py
workers/rollout/base.py
workers/rollout/hf_rollout.py
workers/rollout/schemas.py
workers/rollout/tokenizer.py
workers/rollout/naive/__init__.py
workers/rollout/naive/naive_rollout.py
workers/rollout/sglang_rollout/__init__.py
workers/rollout/sglang_rollout/async_sglang_server.py
workers/rollout/sglang_rollout/http_server_engine.py
workers/rollout/sglang_rollout/sglang_rollout.py
workers/rollout/sglang_rollout/utils.py
workers/rollout/vllm_rollout/__init__.py
workers/rollout/vllm_rollout/vllm_async_server.py
workers/rollout/vllm_rollout/vllm_rollout_spmd.py
workers/sharding_manager/__init__.py
workers/sharding_manager/base.py
workers/sharding_manager/fsdp_sglang.py
workers/sharding_manager/fsdp_ulysses.py
workers/sharding_manager/fsdp_vllm.py
workers/sharding_manager/megatron_sglang.py
workers/sharding_manager/megatron_vllm.py