dag_id: "dapo_training_pipeline"
description: "This is DAPO DAG workflow configured via YAML."

actor1_config: &actor1_config
  model.path: Qwen/Qwen2.5-0.5B
  rollout.log_prob_micro_batch_size_per_gpu: 16
  rollout.tensor_model_parallel_size: 2
  rollout.gpu_memory_utilization: 0.7
  rollout.n: 16

nodes:
  - node_id: "rollout_actor"
    node_type: "MODEL_INFERENCE"
    node_role: "ROLLOUT"
    dependencies: []

  - node_id: "function_reward"
    node_type: "COMPUTE"
    node_role: "REWARD"
    dependencies:
      - "rollout_actor"

  - node_id: "calculate_advantages"
    node_type: "COMPUTE"
    node_role: "ADVANTAGE"
    dependencies:
      - "function_reward"

  - node_id: "dynamic_sampling"
    node_type: "COMPUTE"
    executable_ref: "distflow.algorithm.dapo.dynamic_sampling"
    dependencies:
      - "calculate_advantages"

  - node_id: "data_rebalance"
    node_type: "COMPUTE"
    node_role: "DATA_REBALANCE"
    dependencies:
      - "dynamic_sampling"

  - node_id: "actor_old_log_prob"
    node_type: "MODEL_TRAIN"
    node_role: "ACTOR"
    only_forward_compute: true
    dependencies:
      - "data_rebalance"

  - node_id: "reference_log_prob"
    node_type: "MODEL_TRAIN"
    node_role: "REFERENCE"
    dependencies:
      - "actor_old_log_prob"

  - node_id: "actor_train"
    node_type: "MODEL_TRAIN"
    node_role: "ACTOR"
    dependencies:
      - "reference_log_prob"
