LICENSE
README.md
pyproject.toml
setup.py
./graph/__init__.py
./graph/core_graph.py
./graph/try.py
./tests/__init__.py
./tests/test_protocol.py
./tests/e2e/__init__.py
./tests/e2e/check_custom_rwd_fn.py
./tests/e2e/check_results.py
./tests/e2e/envs/__init__.py
./tests/e2e/envs/digit_completion/__init__.py
./tests/e2e/envs/digit_completion/task.py
./tests/e2e/envs/digit_completion/tokenizer.py
./tests/trainer/__init__.py
./tests/trainer/ppo/__init__.py
./tests/trainer/ppo/test_metric_utils.py
./verl/__init__.py
./verl/protocol.py
./verl/models/__init__.py
./verl/models/registry.py
./verl/models/weight_loader_registry.py
./verl/models/llama/__init__.py
./verl/models/llama/megatron/__init__.py
./verl/models/llama/megatron/modeling_llama_megatron.py
./verl/models/llama/megatron/checkpoint_utils/__init__.py
./verl/models/llama/megatron/checkpoint_utils/llama_loader.py
./verl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py
./verl/models/llama/megatron/checkpoint_utils/llama_saver.py
./verl/models/llama/megatron/layers/__init__.py
./verl/models/llama/megatron/layers/parallel_attention.py
./verl/models/llama/megatron/layers/parallel_decoder.py
./verl/models/llama/megatron/layers/parallel_linear.py
./verl/models/llama/megatron/layers/parallel_mlp.py
./verl/models/llama/megatron/layers/parallel_rmsnorm.py
./verl/models/mcore/__init__.py
./verl/models/mcore/config_converter.py
./verl/models/mcore/loader.py
./verl/models/mcore/model_forward.py
./verl/models/mcore/model_initializer.py
./verl/models/mcore/patch_v012.py
./verl/models/mcore/registry.py
./verl/models/mcore/saver.py
./verl/models/mcore/util.py
./verl/models/mcore/weight_converter.py
./verl/models/qwen2/__init__.py
./verl/models/qwen2/megatron/__init__.py
./verl/models/qwen2/megatron/modeling_qwen2_megatron.py
./verl/models/qwen2/megatron/checkpoint_utils/__init__.py
./verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py
./verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py
./verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py
./verl/models/qwen2/megatron/layers/__init__.py
./verl/models/qwen2/megatron/layers/parallel_attention.py
./verl/models/qwen2/megatron/layers/parallel_decoder.py
./verl/models/qwen2/megatron/layers/parallel_linear.py
./verl/models/qwen2/megatron/layers/parallel_mlp.py
./verl/models/qwen2/megatron/layers/parallel_rmsnorm.py
./verl/models/transformers/__init__.py
./verl/models/transformers/kimi_vl.py
./verl/models/transformers/llama.py
./verl/models/transformers/monkey_patch.py
./verl/models/transformers/qwen2.py
./verl/models/transformers/qwen2_5_vl.py
./verl/models/transformers/qwen2_vl.py
./verl/single_controller/__init__.py
./verl/single_controller/base/__init__.py
./verl/single_controller/base/decorator.py
./verl/single_controller/base/worker.py
./verl/single_controller/base/worker_group.py
./verl/single_controller/base/megatron/__init__.py
./verl/single_controller/base/megatron/worker.py
./verl/single_controller/base/megatron/worker_group.py
./verl/single_controller/base/register_center/__init__.py
./verl/single_controller/base/register_center/ray.py
./verl/single_controller/ray/__init__.py
./verl/single_controller/ray/base.py
./verl/single_controller/ray/megatron.py
./verl/third_party/__init__.py
./verl/third_party/sglang/__init__.py
./verl/third_party/sglang/parallel_state.py
./verl/third_party/vllm/__init__.py
./verl/third_party/vllm/vllm_v_0_5_4/__init__.py
./verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py
./verl/third_party/vllm/vllm_v_0_5_4/config.py
./verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py
./verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py
./verl/third_party/vllm/vllm_v_0_5_4/llm.py
./verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py
./verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py
./verl/third_party/vllm/vllm_v_0_5_4/model_loader.py
./verl/third_party/vllm/vllm_v_0_5_4/model_runner.py
./verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py
./verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py
./verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py
./verl/third_party/vllm/vllm_v_0_5_4/worker.py
./verl/third_party/vllm/vllm_v_0_6_3/__init__.py
./verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py
./verl/third_party/vllm/vllm_v_0_6_3/config.py
./verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py
./verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py
./verl/third_party/vllm/vllm_v_0_6_3/llm.py
./verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py
./verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py
./verl/third_party/vllm/vllm_v_0_6_3/model_loader.py
./verl/third_party/vllm/vllm_v_0_6_3/model_runner.py
./verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py
./verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py
./verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py
./verl/third_party/vllm/vllm_v_0_6_3/worker.py
./verl/tools/__init__.py
./verl/tools/base_tool.py
./verl/tools/gsm8k_tool.py
./verl/tools/sandbox_fusion_tools.py
./verl/tools/schemas.py
./verl/tools/search_tool.py
./verl/tools/utils/__init__.py
./verl/tools/utils/search_r1_like_utils.py
./verl/trainer/__init__.py
./verl/trainer/fsdp_sft_trainer.py
./verl/trainer/main_eval.py
./verl/trainer/main_generation.py
./verl/trainer/main_ppo.py
./verl/trainer/config/evaluation.yaml
./verl/trainer/config/generation.yaml
./verl/trainer/config/ppo_megatron_trainer.yaml
./verl/trainer/config/ppo_trainer.yaml
./verl/trainer/config/sft_trainer.yaml
./verl/trainer/ppo/__init__.py
./verl/trainer/ppo/core_algos.py
./verl/trainer/ppo/metric_utils.py
./verl/trainer/ppo/ray_trainer.py
./verl/trainer/ppo/reward.py
./verl/utils/__init__.py
./verl/utils/activation_offload.py
./verl/utils/config.py
./verl/utils/device.py
./verl/utils/distributed.py
./verl/utils/flops_counter.py
./verl/utils/fs.py
./verl/utils/fsdp_utils.py
./verl/utils/hdfs_io.py
./verl/utils/import_utils.py
./verl/utils/logging_utils.py
./verl/utils/megatron_utils.py
./verl/utils/memory_buffer.py
./verl/utils/model.py
./verl/utils/net_utils.py
./verl/utils/py_functional.py
./verl/utils/ray_utils.py
./verl/utils/seqlen_balancing.py
./verl/utils/tokenizer.py
./verl/utils/torch_dtypes.py
./verl/utils/torch_functional.py
./verl/utils/tracking.py
./verl/utils/ulysses.py
./verl/utils/vllm_utils.py
./verl/utils/checkpoint/__init__.py
./verl/utils/checkpoint/checkpoint_manager.py
./verl/utils/checkpoint/fsdp_checkpoint_manager.py
./verl/utils/checkpoint/megatron_checkpoint_manager.py
./verl/utils/dataset/__init__.py
./verl/utils/dataset/multiturn_sft_dataset.py
./verl/utils/dataset/rl_dataset.py
./verl/utils/dataset/rm_dataset.py
./verl/utils/dataset/sft_dataset.py
./verl/utils/dataset/vision_utils.py
./verl/utils/debug/__init__.py
./verl/utils/debug/performance.py
./verl/utils/debug/profile.py
./verl/utils/debug/trajectory_tracker.py
./verl/utils/experimental/__init__.py
./verl/utils/experimental/torch_functional.py
./verl/utils/logger/__init__.py
./verl/utils/logger/aggregate_logger.py
./verl/utils/megatron/__init__.py
./verl/utils/megatron/memory.py
./verl/utils/megatron/optimizer.py
./verl/utils/megatron/pipeline_parallel.py
./verl/utils/megatron/sequence_parallel.py
./verl/utils/megatron/tensor_parallel.py
./verl/utils/metric/__init__.py
./verl/utils/metric/utils.py
./verl/utils/rendezvous/__init__.py
./verl/utils/rendezvous/ray_backend.py
./verl/utils/reward_score/__init__.py
./verl/utils/reward_score/geo3k.py
./verl/utils/reward_score/gsm8k.py
./verl/utils/reward_score/math.py
./verl/utils/reward_score/math_batch.py
./verl/utils/reward_score/math_dapo.py
./verl/utils/reward_score/math_verify.py
./verl/utils/reward_score/search_r1_like_qa_em.py
./verl/utils/reward_score/prime_code/__init__.py
./verl/utils/reward_score/prime_code/testing_util.py
./verl/utils/reward_score/prime_code/utils.py
./verl/utils/reward_score/prime_math/__init__.py
./verl/utils/reward_score/prime_math/grader.py
./verl/utils/reward_score/prime_math/math_normalize.py
./verl/utils/reward_score/sandbox_fusion/__init__.py
./verl/utils/reward_score/sandbox_fusion/utils.py
./verl/version/version
./verl/workers/__init__.py
./verl/workers/fsdp_workers.py
./verl/workers/megatron_workers.py
./verl/workers/actor/__init__.py
./verl/workers/actor/base.py
./verl/workers/actor/dp_actor.py
./verl/workers/actor/megatron_actor.py
./verl/workers/critic/__init__.py
./verl/workers/critic/base.py
./verl/workers/critic/dp_critic.py
./verl/workers/critic/megatron_critic.py
./verl/workers/reward_manager/__init__.py
./verl/workers/reward_manager/batch.py
./verl/workers/reward_manager/dapo.py
./verl/workers/reward_manager/naive.py
./verl/workers/reward_manager/prime.py
./verl/workers/reward_model/__init__.py
./verl/workers/reward_model/base.py
./verl/workers/reward_model/megatron/__init__.py
./verl/workers/reward_model/megatron/reward_model.py
./verl/workers/rollout/__init__.py
./verl/workers/rollout/async_server.py
./verl/workers/rollout/base.py
./verl/workers/rollout/hf_rollout.py
./verl/workers/rollout/schemas.py
./verl/workers/rollout/tokenizer.py
./verl/workers/rollout/naive/__init__.py
./verl/workers/rollout/naive/naive_rollout.py
./verl/workers/rollout/sglang_rollout/__init__.py
./verl/workers/rollout/sglang_rollout/async_sglang_server.py
./verl/workers/rollout/sglang_rollout/sglang_rollout.py
./verl/workers/rollout/sglang_rollout/utils.py
./verl/workers/rollout/vllm_rollout/__init__.py
./verl/workers/rollout/vllm_rollout/fire_vllm_rollout.py
./verl/workers/rollout/vllm_rollout/vllm_async_server.py
./verl/workers/rollout/vllm_rollout/vllm_rollout.py
./verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py
./verl/workers/sharding_manager/__init__.py
./verl/workers/sharding_manager/base.py
./verl/workers/sharding_manager/fsdp_sglang.py
./verl/workers/sharding_manager/fsdp_ulysses.py
./verl/workers/sharding_manager/fsdp_vllm.py
./verl/workers/sharding_manager/megatron_sglang.py
./verl/workers/sharding_manager/megatron_vllm.py
graph/__init__.py
graph/core_graph.py
graph/try.py
tests/__init__.py
tests/test_protocol.py
tests/e2e/__init__.py
tests/e2e/check_custom_rwd_fn.py
tests/e2e/check_results.py
tests/e2e/envs/__init__.py
tests/e2e/envs/digit_completion/__init__.py
tests/e2e/envs/digit_completion/task.py
tests/e2e/envs/digit_completion/tokenizer.py
tests/trainer/__init__.py
tests/trainer/ppo/__init__.py
tests/trainer/ppo/test_metric_utils.py
verl/__init__.py
verl/protocol.py
verl.egg-info/PKG-INFO
verl.egg-info/SOURCES.txt
verl.egg-info/dependency_links.txt
verl.egg-info/requires.txt
verl.egg-info/top_level.txt
verl/models/__init__.py
verl/models/registry.py
verl/models/weight_loader_registry.py
verl/models/llama/__init__.py
verl/models/llama/megatron/__init__.py
verl/models/llama/megatron/modeling_llama_megatron.py
verl/models/llama/megatron/checkpoint_utils/__init__.py
verl/models/llama/megatron/checkpoint_utils/llama_loader.py
verl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py
verl/models/llama/megatron/checkpoint_utils/llama_saver.py
verl/models/llama/megatron/layers/__init__.py
verl/models/llama/megatron/layers/parallel_attention.py
verl/models/llama/megatron/layers/parallel_decoder.py
verl/models/llama/megatron/layers/parallel_linear.py
verl/models/llama/megatron/layers/parallel_mlp.py
verl/models/llama/megatron/layers/parallel_rmsnorm.py
verl/models/mcore/__init__.py
verl/models/mcore/config_converter.py
verl/models/mcore/loader.py
verl/models/mcore/model_forward.py
verl/models/mcore/model_initializer.py
verl/models/mcore/patch_v012.py
verl/models/mcore/registry.py
verl/models/mcore/saver.py
verl/models/mcore/util.py
verl/models/mcore/weight_converter.py
verl/models/qwen2/__init__.py
verl/models/qwen2/megatron/__init__.py
verl/models/qwen2/megatron/modeling_qwen2_megatron.py
verl/models/qwen2/megatron/checkpoint_utils/__init__.py
verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py
verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py
verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py
verl/models/qwen2/megatron/layers/__init__.py
verl/models/qwen2/megatron/layers/parallel_attention.py
verl/models/qwen2/megatron/layers/parallel_decoder.py
verl/models/qwen2/megatron/layers/parallel_linear.py
verl/models/qwen2/megatron/layers/parallel_mlp.py
verl/models/qwen2/megatron/layers/parallel_rmsnorm.py
verl/models/transformers/__init__.py
verl/models/transformers/kimi_vl.py
verl/models/transformers/llama.py
verl/models/transformers/monkey_patch.py
verl/models/transformers/qwen2.py
verl/models/transformers/qwen2_5_vl.py
verl/models/transformers/qwen2_vl.py
verl/single_controller/__init__.py
verl/single_controller/base/__init__.py
verl/single_controller/base/decorator.py
verl/single_controller/base/worker.py
verl/single_controller/base/worker_group.py
verl/single_controller/base/megatron/__init__.py
verl/single_controller/base/megatron/worker.py
verl/single_controller/base/megatron/worker_group.py
verl/single_controller/base/register_center/__init__.py
verl/single_controller/base/register_center/ray.py
verl/single_controller/ray/__init__.py
verl/single_controller/ray/base.py
verl/single_controller/ray/megatron.py
verl/third_party/__init__.py
verl/third_party/sglang/__init__.py
verl/third_party/sglang/parallel_state.py
verl/third_party/vllm/__init__.py
verl/third_party/vllm/vllm_v_0_5_4/__init__.py
verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py
verl/third_party/vllm/vllm_v_0_5_4/config.py
verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py
verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py
verl/third_party/vllm/vllm_v_0_5_4/llm.py
verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py
verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py
verl/third_party/vllm/vllm_v_0_5_4/model_loader.py
verl/third_party/vllm/vllm_v_0_5_4/model_runner.py
verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py
verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py
verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py
verl/third_party/vllm/vllm_v_0_5_4/worker.py
verl/third_party/vllm/vllm_v_0_6_3/__init__.py
verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py
verl/third_party/vllm/vllm_v_0_6_3/config.py
verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py
verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py
verl/third_party/vllm/vllm_v_0_6_3/llm.py
verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py
verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py
verl/third_party/vllm/vllm_v_0_6_3/model_loader.py
verl/third_party/vllm/vllm_v_0_6_3/model_runner.py
verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py
verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py
verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py
verl/third_party/vllm/vllm_v_0_6_3/worker.py
verl/tools/__init__.py
verl/tools/base_tool.py
verl/tools/gsm8k_tool.py
verl/tools/sandbox_fusion_tools.py
verl/tools/schemas.py
verl/tools/search_tool.py
verl/tools/utils/__init__.py
verl/tools/utils/search_r1_like_utils.py
verl/trainer/__init__.py
verl/trainer/fsdp_sft_trainer.py
verl/trainer/main_eval.py
verl/trainer/main_generation.py
verl/trainer/main_ppo.py
verl/trainer/config/evaluation.yaml
verl/trainer/config/generation.yaml
verl/trainer/config/ppo_megatron_trainer.yaml
verl/trainer/config/ppo_trainer.yaml
verl/trainer/config/sft_trainer.yaml
verl/trainer/ppo/__init__.py
verl/trainer/ppo/core_algos.py
verl/trainer/ppo/metric_utils.py
verl/trainer/ppo/ray_trainer.py
verl/trainer/ppo/reward.py
verl/utils/__init__.py
verl/utils/activation_offload.py
verl/utils/config.py
verl/utils/device.py
verl/utils/distributed.py
verl/utils/flops_counter.py
verl/utils/fs.py
verl/utils/fsdp_utils.py
verl/utils/hdfs_io.py
verl/utils/import_utils.py
verl/utils/logging_utils.py
verl/utils/megatron_utils.py
verl/utils/memory_buffer.py
verl/utils/model.py
verl/utils/net_utils.py
verl/utils/py_functional.py
verl/utils/ray_utils.py
verl/utils/seqlen_balancing.py
verl/utils/tokenizer.py
verl/utils/torch_dtypes.py
verl/utils/torch_functional.py
verl/utils/tracking.py
verl/utils/ulysses.py
verl/utils/vllm_utils.py
verl/utils/checkpoint/__init__.py
verl/utils/checkpoint/checkpoint_manager.py
verl/utils/checkpoint/fsdp_checkpoint_manager.py
verl/utils/checkpoint/megatron_checkpoint_manager.py
verl/utils/dataset/__init__.py
verl/utils/dataset/multiturn_sft_dataset.py
verl/utils/dataset/rl_dataset.py
verl/utils/dataset/rm_dataset.py
verl/utils/dataset/sft_dataset.py
verl/utils/dataset/vision_utils.py
verl/utils/debug/__init__.py
verl/utils/debug/performance.py
verl/utils/debug/profile.py
verl/utils/debug/trajectory_tracker.py
verl/utils/experimental/__init__.py
verl/utils/experimental/torch_functional.py
verl/utils/logger/__init__.py
verl/utils/logger/aggregate_logger.py
verl/utils/megatron/__init__.py
verl/utils/megatron/memory.py
verl/utils/megatron/optimizer.py
verl/utils/megatron/pipeline_parallel.py
verl/utils/megatron/sequence_parallel.py
verl/utils/megatron/tensor_parallel.py
verl/utils/metric/__init__.py
verl/utils/metric/utils.py
verl/utils/rendezvous/__init__.py
verl/utils/rendezvous/ray_backend.py
verl/utils/reward_score/__init__.py
verl/utils/reward_score/geo3k.py
verl/utils/reward_score/gsm8k.py
verl/utils/reward_score/math.py
verl/utils/reward_score/math_batch.py
verl/utils/reward_score/math_dapo.py
verl/utils/reward_score/math_verify.py
verl/utils/reward_score/search_r1_like_qa_em.py
verl/utils/reward_score/prime_code/__init__.py
verl/utils/reward_score/prime_code/testing_util.py
verl/utils/reward_score/prime_code/utils.py
verl/utils/reward_score/prime_math/__init__.py
verl/utils/reward_score/prime_math/grader.py
verl/utils/reward_score/prime_math/math_normalize.py
verl/utils/reward_score/sandbox_fusion/__init__.py
verl/utils/reward_score/sandbox_fusion/utils.py
verl/version/version
verl/workers/__init__.py
verl/workers/fsdp_workers.py
verl/workers/megatron_workers.py
verl/workers/actor/__init__.py
verl/workers/actor/base.py
verl/workers/actor/dp_actor.py
verl/workers/actor/megatron_actor.py
verl/workers/critic/__init__.py
verl/workers/critic/base.py
verl/workers/critic/dp_critic.py
verl/workers/critic/megatron_critic.py
verl/workers/reward_manager/__init__.py
verl/workers/reward_manager/batch.py
verl/workers/reward_manager/dapo.py
verl/workers/reward_manager/naive.py
verl/workers/reward_manager/prime.py
verl/workers/reward_model/__init__.py
verl/workers/reward_model/base.py
verl/workers/reward_model/megatron/__init__.py
verl/workers/reward_model/megatron/reward_model.py
verl/workers/rollout/__init__.py
verl/workers/rollout/async_server.py
verl/workers/rollout/base.py
verl/workers/rollout/hf_rollout.py
verl/workers/rollout/schemas.py
verl/workers/rollout/tokenizer.py
verl/workers/rollout/naive/__init__.py
verl/workers/rollout/naive/naive_rollout.py
verl/workers/rollout/sglang_rollout/__init__.py
verl/workers/rollout/sglang_rollout/async_sglang_server.py
verl/workers/rollout/sglang_rollout/sglang_rollout.py
verl/workers/rollout/sglang_rollout/utils.py
verl/workers/rollout/vllm_rollout/__init__.py
verl/workers/rollout/vllm_rollout/fire_vllm_rollout.py
verl/workers/rollout/vllm_rollout/vllm_async_server.py
verl/workers/rollout/vllm_rollout/vllm_rollout.py
verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py
verl/workers/sharding_manager/__init__.py
verl/workers/sharding_manager/base.py
verl/workers/sharding_manager/fsdp_sglang.py
verl/workers/sharding_manager/fsdp_ulysses.py
verl/workers/sharding_manager/fsdp_vllm.py
verl/workers/sharding_manager/megatron_sglang.py
verl/workers/sharding_manager/megatron_vllm.py