{
  "model": "Qwen/Qwen3-1.7B",
  "total_steps": 50,
  "batch_size": 2,
  "start_time": "2026-01-25T01:19:49.628785",
  "end_time": "2026-01-25T02:17:59.538046",
  "individual_results": [
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 960.265622138977,
      "avg_throughput": 0.05206892639624624,
      "train_metrics": [
        {
          "policy_loss": 0.2828420400619507,
          "kl_loss": 0.0,
          "total_loss": 0.2828420400619507,
          "approx_kl": 30.88092041015625,
          "clip_fraction": 1.0,
          "grad_norm": 0.00015643723600078374,
          "mean_reward": 0.5,
          "mean_advantage": 0.0,
          "std_advantage": 1.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.5,
          "avg_reward": 0.5,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -580.6595458984375,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 18.51063632965088,
          "cumulative_gpu_hours": 0.005141843424903022,
          "collection_time": 16.877974033355713,
          "train_time": 1.6115829944610596,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 18.510634660720825,
          "throughput": 0.10804599824143024
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 57.35659432411194,
          "cumulative_gpu_hours": 0.015932387312253316,
          "collection_time": 38.19233727455139,
          "train_time": 0.6327617168426514,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.84585213661194,
          "throughput": 0.051485548391793784
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 96.23747730255127,
          "cumulative_gpu_hours": 0.02673263258404202,
          "collection_time": 38.148733615875244,
          "train_time": 0.7110843658447266,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.88078188896179,
          "throughput": 0.051439294757799035
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 134.91944360733032,
          "cumulative_gpu_hours": 0.037477623224258426,
          "collection_time": 38.105422258377075,
          "train_time": 0.5557897090911865,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.68187355995178,
          "throughput": 0.0517038037700078
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 174.05006957054138,
          "cumulative_gpu_hours": 0.048347241547372605,
          "collection_time": 38.48775029182434,
          "train_time": 0.6214618682861328,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 39.130534172058105,
          "throughput": 0.051110981291641495
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 213.04245567321777,
          "cumulative_gpu_hours": 0.05917845990922716,
          "collection_time": 38.413246154785156,
          "train_time": 0.558671236038208,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.99229598045349,
          "throughput": 0.05129218348677347
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 251.8762767314911,
          "cumulative_gpu_hours": 0.06996563242541419,
          "collection_time": 38.31081461906433,
          "train_time": 0.5021169185638428,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 38.83373165130615,
          "throughput": 0.05150161766472244
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 290.7578184604645,
          "cumulative_gpu_hours": 0.08076606068346236,
          "collection_time": 38.22652554512024,
          "train_time": 0.6336605548858643,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.881449699401855,
          "throughput": 0.051438411259412675
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 330.139075756073,
          "cumulative_gpu_hours": 0.09170529882113139,
          "collection_time": 38.36374235153198,
          "train_time": 0.9967336654663086,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.38116407394409,
          "throughput": 0.05078570039841122
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 370.1383991241455,
          "cumulative_gpu_hours": 0.10281622197892931,
          "collection_time": 39.29782223701477,
          "train_time": 0.6806340217590332,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 39.999234676361084,
          "throughput": 0.05000095667285275
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 408.47034072875977,
          "cumulative_gpu_hours": 0.1134639835357666,
          "collection_time": 37.82807445526123,
          "train_time": 0.4841127395629883,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.33178472518921,
          "throughput": 0.05217602087506579
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 446.8871600627899,
          "cumulative_gpu_hours": 0.12413532223966386,
          "collection_time": 37.956007957458496,
          "train_time": 0.43991804122924805,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.41673278808594,
          "throughput": 0.052060647922153695
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 485.4092471599579,
          "cumulative_gpu_hours": 0.13483590198887718,
          "collection_time": 37.92720699310303,
          "train_time": 0.5740511417388916,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.521995306015015,
          "throughput": 0.051918390626243344
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 523.6751320362091,
          "cumulative_gpu_hours": 0.14546531445450253,
          "collection_time": 37.75641942024231,
          "train_time": 0.48902463912963867,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.26579260826111,
          "throughput": 0.05226600218306271
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 562.070454120636,
          "cumulative_gpu_hours": 0.15613068170017666,
          "collection_time": 37.89005470275879,
          "train_time": 0.48412322998046875,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.39523148536682,
          "throughput": 0.052089801848498804
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 600.8139936923981,
          "cumulative_gpu_hours": 0.16689277602566613,
          "collection_time": 38.18233919143677,
          "train_time": 0.5404636859893799,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.74345111846924,
          "throughput": 0.051621627456068
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 639.1991028785706,
          "cumulative_gpu_hours": 0.17755530635515848,
          "collection_time": 37.9100387096405,
          "train_time": 0.45412540435791016,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.38501691818237,
          "throughput": 0.05210366337112728
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 677.530846118927,
          "cumulative_gpu_hours": 0.18820301281081306,
          "collection_time": 37.62280035018921,
          "train_time": 0.6885111331939697,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.33163833618164,
          "throughput": 0.05217622013594391
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 715.9476685523987,
          "cumulative_gpu_hours": 0.1988743523756663,
          "collection_time": 37.5972683429718,
          "train_time": 0.7986955642700195,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.416733741760254,
          "throughput": 0.052060646629776705
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 754.1670370101929,
          "cumulative_gpu_hours": 0.20949084361394246,
          "collection_time": 37.730090856552124,
          "train_time": 0.4684126377105713,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.21927452087402,
          "throughput": 0.05232961706030475
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 792.789039850235,
          "cumulative_gpu_hours": 0.22021917773617639,
          "collection_time": 37.88833975791931,
          "train_time": 0.7120444774627686,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.621851205825806,
          "throughput": 0.0517841568323974
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 831.004159450531,
          "cumulative_gpu_hours": 0.23083448873625861,
          "collection_time": 37.70484375953674,
          "train_time": 0.4887208938598633,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.215020179748535,
          "throughput": 0.052335442728874165
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 869.084459066391,
          "cumulative_gpu_hours": 0.24141234974066417,
          "collection_time": 37.58259415626526,
          "train_time": 0.4771440029144287,
          "broadcast_time": 2.1457672119140625e-06,
          "update_time": 38.0802104473114,
          "throughput": 0.05252071815010694
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 907.4584760665894,
          "cumulative_gpu_hours": 0.25207179890738596,
          "collection_time": 37.85832333564758,
          "train_time": 0.49481773376464844,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.37392473220825,
          "throughput": 0.052118724210696826
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 945.9228675365448,
          "cumulative_gpu_hours": 0.2627563520934847,
          "collection_time": 37.89361333847046,
          "train_time": 0.549940824508667,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.464298486709595,
          "throughput": 0.05199626871372817
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "eta": 8.0,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "avg_staleness": 0.0
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        }
      },
      "method": "sync",
      "seed": 42,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 988.7453982830048,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 969.0753214359283,
      "avg_throughput": 0.05159557662237487,
      "train_metrics": [
        {
          "policy_loss": 0.2821888029575348,
          "kl_loss": 0.0,
          "total_loss": 0.2821888029575348,
          "approx_kl": 21.816896438598633,
          "clip_fraction": 1.0,
          "grad_norm": 0.17889602482318878,
          "mean_reward": 0.5,
          "mean_advantage": 0.0,
          "std_advantage": 1.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.5,
          "avg_reward": 0.5,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -591.099609375,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 21.885319232940674,
          "cumulative_gpu_hours": 0.00607925534248352,
          "collection_time": 20.685489177703857,
          "train_time": 1.179173469543457,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 21.8853178024292,
          "throughput": 0.09138546755661033
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 61.02167844772339,
          "cumulative_gpu_hours": 0.01695046623547872,
          "collection_time": 38.396355390548706,
          "train_time": 0.7185966968536377,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.136253356933594,
          "throughput": 0.05110351217730118
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 100.15199494361877,
          "cumulative_gpu_hours": 0.02781999859544966,
          "collection_time": 38.21966290473938,
          "train_time": 0.8897771835327148,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.13021421432495,
          "throughput": 0.05111139921303655
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 139.2343955039978,
          "cumulative_gpu_hours": 0.038676220973332724,
          "collection_time": 38.4845290184021,
          "train_time": 0.5769860744476318,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.08231043815613,
          "throughput": 0.051174047224377926
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 178.93081855773926,
          "cumulative_gpu_hours": 0.04970300515492757,
          "collection_time": 38.77894163131714,
          "train_time": 0.8971993923187256,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.696327209472656,
          "throughput": 0.050382494819892154
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 218.11775469779968,
          "cumulative_gpu_hours": 0.06058826519383324,
          "collection_time": 38.74178647994995,
          "train_time": 0.42467832565307617,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.18684387207031,
          "throughput": 0.05103753715224467
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 257.1421854496002,
          "cumulative_gpu_hours": 0.07142838484711117,
          "collection_time": 38.515586614608765,
          "train_time": 0.4879295825958252,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.02433776855469,
          "throughput": 0.05125006891498296
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 296.55890011787415,
          "cumulative_gpu_hours": 0.08237747225496504,
          "collection_time": 38.7746205329895,
          "train_time": 0.6220705509185791,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.416624307632446,
          "throughput": 0.050740012244344566
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 335.56354546546936,
          "cumulative_gpu_hours": 0.09321209596263037,
          "collection_time": 38.51198434829712,
          "train_time": 0.47264623641967773,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.00455355644226,
          "throughput": 0.05127606439863138
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 374.6687169075012,
          "cumulative_gpu_hours": 0.104074643585417,
          "collection_time": 38.507242918014526,
          "train_time": 0.5770013332366943,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.105074405670166,
          "throughput": 0.051144257628877025
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 413.0561249256134,
          "cumulative_gpu_hours": 0.11473781247933705,
          "collection_time": 37.843390226364136,
          "train_time": 0.5227103233337402,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.38724136352539,
          "throughput": 0.05210064409317911
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 451.62598419189453,
          "cumulative_gpu_hours": 0.12545166227552626,
          "collection_time": 37.98268938064575,
          "train_time": 0.5662698745727539,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.56976389884949,
          "throughput": 0.05185408977988737
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 489.95884561538696,
          "cumulative_gpu_hours": 0.1360996793376075,
          "collection_time": 37.826868772506714,
          "train_time": 0.4853968620300293,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.33276414871216,
          "throughput": 0.05217468774860559
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 528.4824523925781,
          "cumulative_gpu_hours": 0.1468006812201606,
          "collection_time": 38.074812173843384,
          "train_time": 0.4283139705657959,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.52351522445679,
          "throughput": 0.051916342222329
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 566.9206206798553,
          "cumulative_gpu_hours": 0.15747795018884872,
          "collection_time": 37.952162742614746,
          "train_time": 0.46550536155700684,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.43807625770569,
          "throughput": 0.052031740261690634
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 605.8297023773193,
          "cumulative_gpu_hours": 0.16828602843814425,
          "collection_time": 38.396629095077515,
          "train_time": 0.4921376705169678,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.90898942947388,
          "throughput": 0.05140200322152247
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 644.8200747966766,
          "cumulative_gpu_hours": 0.1791166874435213,
          "collection_time": 37.833172082901,
          "train_time": 1.1355257034301758,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.99028301239014,
          "throughput": 0.05129483157032869
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 683.3637588024139,
          "cumulative_gpu_hours": 0.18982326633400387,
          "collection_time": 38.059645891189575,
          "train_time": 0.4636976718902588,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.54358911514282,
          "throughput": 0.051889303666695884
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 722.3746366500854,
          "cumulative_gpu_hours": 0.2006596212916904,
          "collection_time": 38.48990440368652,
          "train_time": 0.5003740787506104,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.010786294937134,
          "throughput": 0.051267872041317004
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 761.7536354064941,
          "cumulative_gpu_hours": 0.21159823205735948,
          "collection_time": 38.81078505516052,
          "train_time": 0.5474839210510254,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.3788959980011,
          "throughput": 0.05078862546328169
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 800.6436975002289,
          "cumulative_gpu_hours": 0.2224010270833969,
          "collection_time": 38.266472816467285,
          "train_time": 0.6023449897766113,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.889896631240845,
          "throughput": 0.05142723877525994
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 839.274749994278,
          "cumulative_gpu_hours": 0.23313187499841054,
          "collection_time": 38.06584072113037,
          "train_time": 0.5446760654449463,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.63095808029175,
          "throughput": 0.05177194921863288
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 877.5081186294556,
          "cumulative_gpu_hours": 0.24375225517484878,
          "collection_time": 37.76785469055176,
          "train_time": 0.44309353828430176,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.2332763671875,
          "throughput": 0.05231045283151398
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 916.2367298603058,
          "cumulative_gpu_hours": 0.25451020273897385,
          "collection_time": 37.99492049217224,
          "train_time": 0.7134323120117188,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.728511810302734,
          "throughput": 0.051641540211931174
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 954.6283321380615,
          "cumulative_gpu_hours": 0.2651745367050171,
          "collection_time": 37.9132022857666,
          "train_time": 0.45764946937561035,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.39150905609131,
          "throughput": 0.0520948524601607
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "eta": 8.0,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "avg_staleness": 0.0
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        }
      },
      "method": "sync",
      "seed": 123,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 997.4154725074768,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 977.5520894527435,
      "avg_throughput": 0.0511481695343633,
      "train_metrics": [
        {
          "policy_loss": -0.0,
          "kl_loss": 0.0,
          "total_loss": 0.0,
          "approx_kl": 41.778480529785156,
          "clip_fraction": 1.0,
          "grad_norm": 0.0,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -442.9190979003906,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 23.819485902786255,
          "cumulative_gpu_hours": 0.006616523861885071,
          "collection_time": 22.145203828811646,
          "train_time": 1.6511852741241455,
          "broadcast_time": 2.1457672119140625e-06,
          "update_time": 23.819483995437622,
          "throughput": 0.08396487515779433
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 62.959304332733154,
          "cumulative_gpu_hours": 0.01748869564798143,
          "collection_time": 38.49792671203613,
          "train_time": 0.6209430694580078,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.139713525772095,
          "throughput": 0.05109899434197626
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 101.68884515762329,
          "cumulative_gpu_hours": 0.028246901432673135,
          "collection_time": 38.239357709884644,
          "train_time": 0.4691133499145508,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.729440689086914,
          "throughput": 0.05164030165205962
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 140.56981945037842,
          "cumulative_gpu_hours": 0.03904717206954956,
          "collection_time": 38.36141872406006,
          "train_time": 0.4990241527557373,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.880879163742065,
          "throughput": 0.05143916606353588
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 179.97584629058838,
          "cumulative_gpu_hours": 0.04999329063627455,
          "collection_time": 38.71121525764465,
          "train_time": 0.6739330291748047,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.40593409538269,
          "throughput": 0.05075377721433955
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 219.37338972091675,
          "cumulative_gpu_hours": 0.06093705270025465,
          "collection_time": 38.7275915145874,
          "train_time": 0.6491148471832275,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.39744710922241,
          "throughput": 0.05076471057770205
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 258.40976452827454,
          "cumulative_gpu_hours": 0.07178049014674293,
          "collection_time": 38.580002546310425,
          "train_time": 0.4356272220611572,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.03628373146057,
          "throughput": 0.051234385264705334
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 297.46763157844543,
          "cumulative_gpu_hours": 0.08262989766067928,
          "collection_time": 38.5379741191864,
          "train_time": 0.4987630844116211,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.05777645111084,
          "throughput": 0.05120619199875415
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 336.6616449356079,
          "cumulative_gpu_hours": 0.09351712359322442,
          "collection_time": 38.671722173690796,
          "train_time": 0.49620485305786133,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.19391632080078,
          "throughput": 0.05102832755037982
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 375.6518998146057,
          "cumulative_gpu_hours": 0.10434774994850159,
          "collection_time": 38.49421191215515,
          "train_time": 0.4734020233154297,
          "broadcast_time": 2.1457672119140625e-06,
          "update_time": 38.990150928497314,
          "throughput": 0.05129500533782828
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 414.9552512168884,
          "cumulative_gpu_hours": 0.11526534756024678,
          "collection_time": 38.676543951034546,
          "train_time": 0.6057777404785156,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 39.30317187309265,
          "throughput": 0.05088647823279678
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 454.1327555179596,
          "cumulative_gpu_hours": 0.12614798764387766,
          "collection_time": 38.51832365989685,
          "train_time": 0.6382937431335449,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.17739152908325,
          "throughput": 0.051049850996723566
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 493.10462951660156,
          "cumulative_gpu_hours": 0.136973508199056,
          "collection_time": 38.44527339935303,
          "train_time": 0.5053610801696777,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.971779346466064,
          "throughput": 0.05131918617878962
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 532.4013271331787,
          "cumulative_gpu_hours": 0.14788925753699408,
          "collection_time": 38.80428123474121,
          "train_time": 0.47029638290405273,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.29659676551819,
          "throughput": 0.05089499255963437
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 571.6451110839844,
          "cumulative_gpu_hours": 0.1587903086344401,
          "collection_time": 38.75216841697693,
          "train_time": 0.47032976150512695,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.243693351745605,
          "throughput": 0.050963602790231204
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 611.2558319568634,
          "cumulative_gpu_hours": 0.16979328665468427,
          "collection_time": 38.80823493003845,
          "train_time": 0.7816312313079834,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.610626459121704,
          "throughput": 0.05049150136678617
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 650.2199988365173,
          "cumulative_gpu_hours": 0.18061666634347703,
          "collection_time": 38.35211396217346,
          "train_time": 0.5910260677337646,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.96407222747803,
          "throughput": 0.051329337147403474
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 688.8457968235016,
          "cumulative_gpu_hours": 0.19134605467319488,
          "collection_time": 38.0527184009552,
          "train_time": 0.5517182350158691,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.62570095062256,
          "throughput": 0.051778995611153164
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 727.8446724414825,
          "cumulative_gpu_hours": 0.2021790756781896,
          "collection_time": 38.44845390319824,
          "train_time": 0.5289967060089111,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.99878454208374,
          "throughput": 0.05128364956712413
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 766.8252601623535,
          "cumulative_gpu_hours": 0.21300701671176486,
          "collection_time": 38.34877610206604,
          "train_time": 0.6102538108825684,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.98049354553223,
          "throughput": 0.05130771363025061
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 805.8630182743073,
          "cumulative_gpu_hours": 0.2238508384095298,
          "collection_time": 38.52127289772034,
          "train_time": 0.4951457977294922,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.037595987319946,
          "throughput": 0.051232663011565385
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 845.3924140930176,
          "cumulative_gpu_hours": 0.23483122613694932,
          "collection_time": 38.96177363395691,
          "train_time": 0.5466482639312744,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.52930545806885,
          "throughput": 0.050595374161620986
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 884.3068497180939,
          "cumulative_gpu_hours": 0.24564079158835941,
          "collection_time": 38.41788578033447,
          "train_time": 0.47539734840393066,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.914342641830444,
          "throughput": 0.05139493215671404
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 923.5768752098083,
          "cumulative_gpu_hours": 0.25654913200272456,
          "collection_time": 38.54384922981262,
          "train_time": 0.7009778022766113,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.269930362701416,
          "throughput": 0.05092955300729538
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 962.429007768631,
          "cumulative_gpu_hours": 0.26734139104684196,
          "collection_time": 38.37596583366394,
          "train_time": 0.455319881439209,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.85204529762268,
          "throughput": 0.0514773414032434
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "eta": 8.0,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "avg_staleness": 0.0
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        }
      },
      "method": "sync",
      "seed": 456,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 1004.5981786251068,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 995.575311422348,
      "avg_throughput": 0.05022221767288155,
      "train_metrics": [
        {
          "policy_loss": 0.2828427255153656,
          "kl_loss": 0.0,
          "total_loss": 0.2828427255153656,
          "approx_kl": 284.47119140625,
          "clip_fraction": 1.0,
          "grad_norm": 0.0,
          "mean_reward": 0.5,
          "mean_advantage": 0.0,
          "std_advantage": 1.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.5,
          "avg_reward": 0.5,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -413.8135986328125,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 33.85216426849365,
          "cumulative_gpu_hours": 0.009403378963470458,
          "collection_time": 30.609025716781616,
          "train_time": 3.2221062183380127,
          "broadcast_time": 2.1457672119140625e-06,
          "update_time": 33.85216236114502,
          "throughput": 0.05908042088016122
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 73.0932445526123,
          "cumulative_gpu_hours": 0.020303679042392307,
          "collection_time": 38.649242639541626,
          "train_time": 0.5702180862426758,
          "broadcast_time": 2.1457672119140625e-06,
          "update_time": 39.240962743759155,
          "throughput": 0.050967149125771084
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 112.48282957077026,
          "cumulative_gpu_hours": 0.031245230436325072,
          "collection_time": 38.69786787033081,
          "train_time": 0.6709532737731934,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.38947892189026,
          "throughput": 0.050774979886533164
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 151.84613943099976,
          "cumulative_gpu_hours": 0.04217948317527771,
          "collection_time": 38.86155581474304,
          "train_time": 0.48082590103149414,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.36322355270386,
          "throughput": 0.05080884692591748
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 191.58280205726624,
          "cumulative_gpu_hours": 0.053217445015907285,
          "collection_time": 38.84697389602661,
          "train_time": 0.8690309524536133,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.736573934555054,
          "throughput": 0.050331465498106104
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 230.9663233757019,
          "cumulative_gpu_hours": 0.06415731204880608,
          "collection_time": 38.73987650871277,
          "train_time": 0.6226847171783447,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.38343000411987,
          "throughput": 0.05078277843729664
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 270.3879044055939,
          "cumulative_gpu_hours": 0.07510775122377608,
          "collection_time": 38.875139474868774,
          "train_time": 0.5270190238952637,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.42149019241333,
          "throughput": 0.05073374928847566
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 310.5870568752289,
          "cumulative_gpu_hours": 0.08627418246534135,
          "collection_time": 39.46035027503967,
          "train_time": 0.7172908782958984,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 40.199066400527954,
          "throughput": 0.04975239922422012
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 351.66604566574097,
          "cumulative_gpu_hours": 0.09768501268492805,
          "collection_time": 40.32106947898865,
          "train_time": 0.7372472286224365,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 41.078895807266235,
          "throughput": 0.04868680038001971
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 392.4877998828888,
          "cumulative_gpu_hours": 0.109024388856358,
          "collection_time": 40.088101625442505,
          "train_time": 0.7126927375793457,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 40.82166147232056,
          "throughput": 0.048993596239489554
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 433.42918133735657,
          "cumulative_gpu_hours": 0.12039699481593238,
          "collection_time": 39.655367851257324,
          "train_time": 1.2636280059814453,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 40.941219091415405,
          "throughput": 0.048850523857980624
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 473.77963185310364,
          "cumulative_gpu_hours": 0.1316054532925288,
          "collection_time": 39.70883655548096,
          "train_time": 0.6191215515136719,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 40.35035848617554,
          "throughput": 0.04956585455579586
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 513.9097356796265,
          "cumulative_gpu_hours": 0.1427527043554518,
          "collection_time": 39.63489031791687,
          "train_time": 0.47441792488098145,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 40.13000679016113,
          "throughput": 0.04983801798135628
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 552.8903486728668,
          "cumulative_gpu_hours": 0.15358065240912966,
          "collection_time": 38.39722394943237,
          "train_time": 0.5625729560852051,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.980517625808716,
          "throughput": 0.05130768193482928
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 591.713775396347,
          "cumulative_gpu_hours": 0.1643649376100964,
          "collection_time": 38.308119773864746,
          "train_time": 0.49369025230407715,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.82333993911743,
          "throughput": 0.0515154029286607
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 630.7706983089447,
          "cumulative_gpu_hours": 0.17521408286359574,
          "collection_time": 38.39999508857727,
          "train_time": 0.6354458332061768,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.05681228637695,
          "throughput": 0.05120745608564684
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 669.508288860321,
          "cumulative_gpu_hours": 0.18597452468342252,
          "collection_time": 38.202197790145874,
          "train_time": 0.5150489807128906,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.737502336502075,
          "throughput": 0.05162955480780737
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 708.6133143901825,
          "cumulative_gpu_hours": 0.19683703177505069,
          "collection_time": 38.455729484558105,
          "train_time": 0.6289200782775879,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.10493612289429,
          "throughput": 0.05114443848507106
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 747.4094114303589,
          "cumulative_gpu_hours": 0.20761372539732192,
          "collection_time": 38.24004626274109,
          "train_time": 0.5356240272521973,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.79600715637207,
          "throughput": 0.0515516968521723
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 786.4943242073059,
          "cumulative_gpu_hours": 0.21847064561314053,
          "collection_time": 38.4444739818573,
          "train_time": 0.6197903156280518,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.08482313156128,
          "throughput": 0.05117075733636838
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 825.591254234314,
          "cumulative_gpu_hours": 0.2293309039539761,
          "collection_time": 38.51547193527222,
          "train_time": 0.5609323978424072,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.096771240234375,
          "throughput": 0.05115511937573519
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 864.2622046470642,
          "cumulative_gpu_hours": 0.2400728346241845,
          "collection_time": 38.213104009628296,
          "train_time": 0.43722081184387207,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.67085862159729,
          "throughput": 0.05171853098919867
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 903.0923352241516,
          "cumulative_gpu_hours": 0.2508589820067088,
          "collection_time": 38.33436393737793,
          "train_time": 0.47365665435791016,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.83003544807434,
          "throughput": 0.05150652006677949
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 942.5964162349701,
          "cumulative_gpu_hours": 0.26183233784304727,
          "collection_time": 38.45828557014465,
          "train_time": 1.0251390933990479,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.50397872924805,
          "throughput": 0.05062781178846766
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 981.5071859359741,
          "cumulative_gpu_hours": 0.27264088498221506,
          "collection_time": 38.43752908706665,
          "train_time": 0.4524405002593994,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.91067814826965,
          "throughput": 0.05139977238070675
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "eta": 8.0,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "avg_staleness": 0.0
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        },
        "curriculum": {
          "current_step": 50,
          "progress": 1.0
        }
      },
      "method": "sync_curriculum",
      "seed": 42,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 1023.9635426998138,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 1012.0563995838165,
      "avg_throughput": 0.04940436127923432,
      "train_metrics": [
        {
          "policy_loss": -0.0,
          "kl_loss": 0.0,
          "total_loss": 0.0,
          "approx_kl": 40.526649475097656,
          "clip_fraction": 1.0,
          "grad_norm": 0.0,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -543.6922607421875,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 41.447509765625,
          "cumulative_gpu_hours": 0.011513197157118056,
          "collection_time": 36.29312610626221,
          "train_time": 5.130678415298462,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 41.447508096694946,
          "throughput": 0.048253805640959185
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 80.6310522556305,
          "cumulative_gpu_hours": 0.022397514515452913,
          "collection_time": 38.67457985877991,
          "train_time": 0.48790621757507324,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 39.18343496322632,
          "throughput": 0.05104197735285335
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 120.11341667175293,
          "cumulative_gpu_hours": 0.03336483796437582,
          "collection_time": 38.90828037261963,
          "train_time": 0.5532681941986084,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.48226284980774,
          "throughput": 0.05065565789904413
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 159.99281644821167,
          "cumulative_gpu_hours": 0.04444244901339213,
          "collection_time": 39.02956032752991,
          "train_time": 0.8288445472717285,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.87930774688721,
          "throughput": 0.05015132190092018
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 199.6528890132904,
          "cumulative_gpu_hours": 0.055459135837025116,
          "collection_time": 38.86193656921387,
          "train_time": 0.7777419090270996,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 39.65997934341431,
          "throughput": 0.05042866973484967
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 239.3427791595459,
          "cumulative_gpu_hours": 0.06648410532209609,
          "collection_time": 38.82939839363098,
          "train_time": 0.8397009372711182,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.68979573249817,
          "throughput": 0.05039078592088575
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 278.6243646144867,
          "cumulative_gpu_hours": 0.07739565683735741,
          "collection_time": 38.70534634590149,
          "train_time": 0.5550310611724854,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.28149461746216,
          "throughput": 0.050914559628566725
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 318.284729719162,
          "cumulative_gpu_hours": 0.08841242492198945,
          "collection_time": 38.75081491470337,
          "train_time": 0.8882565498352051,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.66026735305786,
          "throughput": 0.05042830352594174
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 358.26651310920715,
          "cumulative_gpu_hours": 0.09951847586366866,
          "collection_time": 38.94044208526611,
          "train_time": 1.020009994506836,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.98168587684631,
          "throughput": 0.05002290314021537
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 398.73348808288574,
          "cumulative_gpu_hours": 0.11075930224524604,
          "collection_time": 39.485069274902344,
          "train_time": 0.9608893394470215,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 40.466883420944214,
          "throughput": 0.04942312901133551
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 440.1478052139282,
          "cumulative_gpu_hours": 0.12226327922609118,
          "collection_time": 40.51067304611206,
          "train_time": 0.8823792934417725,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 41.41415500640869,
          "throughput": 0.04829266707700561
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 480.65059757232666,
          "cumulative_gpu_hours": 0.13351405488120185,
          "collection_time": 40.028098821640015,
          "train_time": 0.45420289039611816,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 40.50270080566406,
          "throughput": 0.04937942310554046
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 521.4042592048645,
          "cumulative_gpu_hours": 0.1448345164457957,
          "collection_time": 40.20435452461243,
          "train_time": 0.526233434677124,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 40.7535617351532,
          "throughput": 0.04907546518258895
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 561.938351392746,
          "cumulative_gpu_hours": 0.156093986497985,
          "collection_time": 40.004608392715454,
          "train_time": 0.506528377532959,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 40.53390049934387,
          "throughput": 0.049341414849340105
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 602.4374470710754,
          "cumulative_gpu_hours": 0.16734373529752095,
          "collection_time": 40.01622748374939,
          "train_time": 0.4619138240814209,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 40.49898409843445,
          "throughput": 0.04938395479597507
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 642.3742880821228,
          "cumulative_gpu_hours": 0.17843730224503412,
          "collection_time": 39.03699588775635,
          "train_time": 0.8781254291534424,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.9367516040802,
          "throughput": 0.05007918570411888
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 682.5441107749939,
          "cumulative_gpu_hours": 0.1895955863263872,
          "collection_time": 39.15393686294556,
          "train_time": 0.9950098991394043,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 40.16972374916077,
          "throughput": 0.04978874170230718
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 721.5788419246674,
          "cumulative_gpu_hours": 0.2004385672012965,
          "collection_time": 38.54448127746582,
          "train_time": 0.4694240093231201,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.034637212753296,
          "throughput": 0.051236546380571076
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 761.4223501682281,
          "cumulative_gpu_hours": 0.21150620838006337,
          "collection_time": 38.8619601726532,
          "train_time": 0.9608285427093506,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.84341502189636,
          "throughput": 0.05019650044808858
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 800.7430644035339,
          "cumulative_gpu_hours": 0.22242862900098165,
          "collection_time": 38.86814570426941,
          "train_time": 0.4314284324645996,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.32062482833862,
          "throughput": 0.05086389162764747
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 840.2194256782532,
          "cumulative_gpu_hours": 0.2333942849106259,
          "collection_time": 38.9593346118927,
          "train_time": 0.4963223934173584,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.476200580596924,
          "throughput": 0.05066343697176943
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 879.7821838855743,
          "cumulative_gpu_hours": 0.24438393996821509,
          "collection_time": 38.78478407859802,
          "train_time": 0.7569880485534668,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.56266808509827,
          "throughput": 0.05055270781278078
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 919.1231114864349,
          "cumulative_gpu_hours": 0.2553119754128986,
          "collection_time": 38.83051013946533,
          "train_time": 0.4894406795501709,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.34083533287048,
          "throughput": 0.05083776140180069
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 958.3379137516022,
          "cumulative_gpu_hours": 0.26620497604211174,
          "collection_time": 38.724732875823975,
          "train_time": 0.4694955348968506,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.214709997177124,
          "throughput": 0.05100126968027993
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 997.8276841640472,
          "cumulative_gpu_hours": 0.27717435671223534,
          "collection_time": 39.02595567703247,
          "train_time": 0.4407515525817871,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.489681243896484,
          "throughput": 0.05064614190344014
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "eta": 8.0,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "avg_staleness": 0.0
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        },
        "curriculum": {
          "current_step": 50,
          "progress": 1.0
        }
      },
      "method": "sync_curriculum",
      "seed": 123,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 1040.5272896289825,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 931.9767556190491,
      "avg_throughput": 0.05364940670305493,
      "train_metrics": [
        {
          "policy_loss": -0.0,
          "kl_loss": 0.0,
          "total_loss": 0.0,
          "approx_kl": 2.8644962310791016,
          "clip_fraction": 0.5,
          "grad_norm": 0.0,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -581.8814697265625,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 2.8926761150360107,
          "cumulative_gpu_hours": 0.0008035211430655585,
          "collection_time": 2.4440295696258545,
          "train_time": 0.4279470443725586,
          "broadcast_time": 2.1457672119140625e-06,
          "update_time": 2.892674446105957,
          "throughput": 0.6914016897727112
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 40.787099838256836,
          "cumulative_gpu_hours": 0.011329749955071343,
          "collection_time": 37.36449360847473,
          "train_time": 0.5091276168823242,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 37.89431428909302,
          "throughput": 0.05277836629374377
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 78.81943678855896,
          "cumulative_gpu_hours": 0.021894287996821932,
          "collection_time": 37.355435848236084,
          "train_time": 0.6559512615203857,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.032238483428955,
          "throughput": 0.05258696515776795
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 116.80654621124268,
          "cumulative_gpu_hours": 0.0324462628364563,
          "collection_time": 37.48507738113403,
          "train_time": 0.48134732246398926,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 37.98701214790344,
          "throughput": 0.052649573812569066
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 154.87841367721558,
          "cumulative_gpu_hours": 0.04302178157700433,
          "collection_time": 37.60809588432312,
          "train_time": 0.4433140754699707,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.07177805900574,
          "throughput": 0.05253235078488559
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 192.87908720970154,
          "cumulative_gpu_hours": 0.05357752422491709,
          "collection_time": 37.32064771652222,
          "train_time": 0.6597826480865479,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.00058078765869,
          "throughput": 0.05263077454462308
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 230.9667329788208,
          "cumulative_gpu_hours": 0.06415742582745022,
          "collection_time": 37.58119797706604,
          "train_time": 0.4864058494567871,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.08755326271057,
          "throughput": 0.052510592796678544
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 269.22487807273865,
          "cumulative_gpu_hours": 0.07478468835353852,
          "collection_time": 37.54963707923889,
          "train_time": 0.6876733303070068,
          "broadcast_time": 2.1457672119140625e-06,
          "update_time": 38.25805401802063,
          "throughput": 0.0522765742099152
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 306.9954912662506,
          "cumulative_gpu_hours": 0.08527652535173628,
          "collection_time": 37.26327395439148,
          "train_time": 0.4865903854370117,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 37.77052044868469,
          "throughput": 0.052951348730214474
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 345.11454463005066,
          "cumulative_gpu_hours": 0.09586515128612519,
          "collection_time": 37.62630367279053,
          "train_time": 0.47197532653808594,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.118961572647095,
          "throughput": 0.05246732642987667
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 383.4321222305298,
          "cumulative_gpu_hours": 0.10650892284181383,
          "collection_time": 37.83719277381897,
          "train_time": 0.45926785469055176,
          "broadcast_time": 2.1457672119140625e-06,
          "update_time": 38.31742024421692,
          "throughput": 0.052195580685050195
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 421.81491327285767,
          "cumulative_gpu_hours": 0.11717080924246047,
          "collection_time": 37.87348008155823,
          "train_time": 0.48899197578430176,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.382694482803345,
          "throughput": 0.05210681602606255
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 459.82657647132874,
          "cumulative_gpu_hours": 0.12772960457536908,
          "collection_time": 37.342872858047485,
          "train_time": 0.6486361026763916,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.01155734062195,
          "throughput": 0.05261557641740326
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 498.1526765823364,
          "cumulative_gpu_hours": 0.13837574349509346,
          "collection_time": 37.75869655609131,
          "train_time": 0.5469915866851807,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.326009035110474,
          "throughput": 0.05218388374766074
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 536.4422216415405,
          "cumulative_gpu_hours": 0.14901172823376127,
          "collection_time": 37.74401879310608,
          "train_time": 0.5244853496551514,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.28944993019104,
          "throughput": 0.052233709380687915
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 574.53564620018,
          "cumulative_gpu_hours": 0.15959323505560558,
          "collection_time": 37.58288025856018,
          "train_time": 0.4895782470703125,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.093332052230835,
          "throughput": 0.05250262689695257
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 612.8409552574158,
          "cumulative_gpu_hours": 0.1702335986826155,
          "collection_time": 37.8127760887146,
          "train_time": 0.47194814682006836,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.305213928222656,
          "throughput": 0.05221221329680221
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 650.8866713047028,
          "cumulative_gpu_hours": 0.1808018531401952,
          "collection_time": 37.578712701797485,
          "train_time": 0.446270227432251,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.045559883117676,
          "throughput": 0.05256855218176141
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 689.1160094738007,
          "cumulative_gpu_hours": 0.19142111374272242,
          "collection_time": 37.73352313041687,
          "train_time": 0.47563815116882324,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.229241371154785,
          "throughput": 0.0523159740624376
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 727.2510359287262,
          "cumulative_gpu_hours": 0.20201417664686838,
          "collection_time": 37.648786544799805,
          "train_time": 0.46559667587280273,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.13493537902832,
          "throughput": 0.05244534912991795
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 765.2583401203156,
          "cumulative_gpu_hours": 0.2125717611445321,
          "collection_time": 37.52528095245361,
          "train_time": 0.46112966537475586,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.0071439743042,
          "throughput": 0.052621686105963565
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 803.32279753685,
          "cumulative_gpu_hours": 0.2231452215380139,
          "collection_time": 37.466025829315186,
          "train_time": 0.5773169994354248,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.06436491012573,
          "throughput": 0.05254258161727448
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 841.5460603237152,
          "cumulative_gpu_hours": 0.23376279453436533,
          "collection_time": 37.71944737434387,
          "train_time": 0.48369336128234863,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.22317695617676,
          "throughput": 0.052324274413218436
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 879.3400771617889,
          "cumulative_gpu_hours": 0.24426113254494136,
          "collection_time": 37.24717426300049,
          "train_time": 0.5257387161254883,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 37.79392743110657,
          "throughput": 0.052918554274247916
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 917.734884262085,
          "cumulative_gpu_hours": 0.25492635673946806,
          "collection_time": 37.53956055641174,
          "train_time": 0.8342750072479248,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.3947114944458,
          "throughput": 0.052090507316074536
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "eta": 8.0,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "avg_staleness": 0.0
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        },
        "curriculum": {
          "current_step": 50,
          "progress": 1.0
        }
      },
      "method": "sync_curriculum",
      "seed": 456,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 960.547337770462,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 971.9605810642242,
      "avg_throughput": 0.051442415437520865,
      "train_metrics": [
        {
          "policy_loss": 0.2828420400619507,
          "kl_loss": 0.0,
          "total_loss": 0.2828420400619507,
          "approx_kl": 30.88092041015625,
          "clip_fraction": 1.0,
          "grad_norm": 0.00015643723600078374,
          "mean_reward": 0.5,
          "mean_advantage": 0.0,
          "std_advantage": 1.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.5,
          "avg_reward": 0.5,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -580.6595458984375,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 18.725044012069702,
          "cumulative_gpu_hours": 0.005201401114463806,
          "collection_time": 17.07735252380371,
          "train_time": 1.6254186630249023,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 18.725042581558228,
          "throughput": 0.1068088358832222
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 57.74206209182739,
          "cumulative_gpu_hours": 0.016039461692174274,
          "collection_time": 38.35756039619446,
          "train_time": 0.6383144855499268,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.016911029815674,
          "throughput": 0.05125982419447951
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 96.96778464317322,
          "cumulative_gpu_hours": 0.026935495734214783,
          "collection_time": 38.49288773536682,
          "train_time": 0.7117583751678467,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.22562098503113,
          "throughput": 0.05098708318125082
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 135.73616671562195,
          "cumulative_gpu_hours": 0.03770449075433943,
          "collection_time": 38.191030979156494,
          "train_time": 0.5558805465698242,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.7682900428772,
          "throughput": 0.051588553371531926
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 175.1147677898407,
          "cumulative_gpu_hours": 0.04864299105273353,
          "collection_time": 38.73393130302429,
          "train_time": 0.623746395111084,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.37851524353027,
          "throughput": 0.05078911654315336
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 214.25575041770935,
          "cumulative_gpu_hours": 0.059515486227141486,
          "collection_time": 38.56089735031128,
          "train_time": 0.559765100479126,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.14088821411133,
          "throughput": 0.051097460769399375
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 253.0358440876007,
          "cumulative_gpu_hours": 0.07028773446877798,
          "collection_time": 38.252339124679565,
          "train_time": 0.5054082870483398,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.780004262924194,
          "throughput": 0.05157297009149917
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 292.26939821243286,
          "cumulative_gpu_hours": 0.08118594394789802,
          "collection_time": 38.578418016433716,
          "train_time": 0.6341679096221924,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.2334566116333,
          "throughput": 0.050976900144122664
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 331.78660798072815,
          "cumulative_gpu_hours": 0.09216294666131337,
          "collection_time": 38.64693284034729,
          "train_time": 0.8492600917816162,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.517120599746704,
          "throughput": 0.05061097493051707
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 371.13228130340576,
          "cumulative_gpu_hours": 0.10309230036205716,
          "collection_time": 38.69123816490173,
          "train_time": 0.6334519386291504,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.345582008361816,
          "throughput": 0.05083162830263777
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 410.3897194862366,
          "cumulative_gpu_hours": 0.11399714430173238,
          "collection_time": 38.53904318809509,
          "train_time": 0.6971850395202637,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.257282733917236,
          "throughput": 0.05094596112410128
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 449.04214882850647,
          "cumulative_gpu_hours": 0.12473393023014069,
          "collection_time": 38.1888382434845,
          "train_time": 0.44304704666137695,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 38.65232038497925,
          "throughput": 0.05174333597775992
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 488.1622576713562,
          "cumulative_gpu_hours": 0.13560062713093227,
          "collection_time": 38.522053956985474,
          "train_time": 0.5765895843505859,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.12001657485962,
          "throughput": 0.05112472271510475
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 527.1740293502808,
          "cumulative_gpu_hours": 0.146437230375078,
          "collection_time": 38.503329277038574,
          "train_time": 0.4874989986419678,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.01167821884155,
          "throughput": 0.051266699904082974
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 566.144583940506,
          "cumulative_gpu_hours": 0.15726238442791832,
          "collection_time": 38.46689486503601,
          "train_time": 0.48258185386657715,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 38.97046113014221,
          "throughput": 0.05132092210356407
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 605.4802830219269,
          "cumulative_gpu_hours": 0.1681889675060908,
          "collection_time": 38.7626748085022,
          "train_time": 0.5503106117248535,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.335609436035156,
          "throughput": 0.05084451540663839
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 644.7695517539978,
          "cumulative_gpu_hours": 0.17910265326499938,
          "collection_time": 38.81250238418579,
          "train_time": 0.45638132095336914,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.28916001319885,
          "throughput": 0.050904626093510714
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 683.9757542610168,
          "cumulative_gpu_hours": 0.1899932650725047,
          "collection_time": 38.50837969779968,
          "train_time": 0.676119327545166,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.206111669540405,
          "throughput": 0.05101245481463592
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 722.9205613136292,
          "cumulative_gpu_hours": 0.20081126703156366,
          "collection_time": 38.12396025657654,
          "train_time": 0.7998149394989014,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.94471526145935,
          "throughput": 0.05135484972923269
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 761.8200418949127,
          "cumulative_gpu_hours": 0.21161667830414244,
          "collection_time": 38.41260123252869,
          "train_time": 0.4661111831665039,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.89939308166504,
          "throughput": 0.05141468392067758
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 801.1586720943451,
          "cumulative_gpu_hours": 0.22254407558176253,
          "collection_time": 38.604900598526,
          "train_time": 0.712317943572998,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.338473081588745,
          "throughput": 0.050840814178322626
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 840.0960772037506,
          "cumulative_gpu_hours": 0.2333600214454863,
          "collection_time": 38.42869973182678,
          "train_time": 0.48761558532714844,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.937312841415405,
          "throughput": 0.051364612862362546
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 879.3033916950226,
          "cumulative_gpu_hours": 0.24425094213750628,
          "collection_time": 38.695995807647705,
          "train_time": 0.490556001663208,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.20722436904907,
          "throughput": 0.05101100708314455
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 918.3165776729584,
          "cumulative_gpu_hours": 0.25508793824248843,
          "collection_time": 38.49410367012024,
          "train_time": 0.49844884872436523,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.01309657096863,
          "throughput": 0.05126483606246956
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 957.361396074295,
          "cumulative_gpu_hours": 0.2659337211317486,
          "collection_time": 38.469578981399536,
          "train_time": 0.5544013977050781,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.04472541809082,
          "throughput": 0.05122330810587103
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "eta": 8.0,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "avg_staleness": 0.0
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        }
      },
      "method": "async",
      "seed": 42,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 999.6050074100494,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 965.8208720684052,
      "avg_throughput": 0.05176943411144122,
      "train_metrics": [
        {
          "policy_loss": 0.2821888029575348,
          "kl_loss": 0.0,
          "total_loss": 0.2821888029575348,
          "approx_kl": 21.816896438598633,
          "clip_fraction": 1.0,
          "grad_norm": 0.17889602482318878,
          "mean_reward": 0.5,
          "mean_advantage": 0.0,
          "std_advantage": 1.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.5,
          "avg_reward": 0.5,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -591.099609375,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 1.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 21.628735065460205,
          "cumulative_gpu_hours": 0.006007981962627835,
          "collection_time": 20.439167976379395,
          "train_time": 1.1687414646148682,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 21.62873363494873,
          "throughput": 0.092469583922764
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 60.347792863845825,
          "cumulative_gpu_hours": 0.01676327579551273,
          "collection_time": 37.981048822402954,
          "train_time": 0.7172932624816895,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.718950271606445,
          "throughput": 0.05165429294881088
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 99.2721962928772,
          "cumulative_gpu_hours": 0.027575610081354777,
          "collection_time": 38.016358852386475,
          "train_time": 0.8872931003570557,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.924304723739624,
          "throughput": 0.05138177840798312
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 137.93100380897522,
          "cumulative_gpu_hours": 0.03831416772471534,
          "collection_time": 38.05691385269165,
          "train_time": 0.5801301002502441,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.65871739387512,
          "throughput": 0.05173477380594291
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 177.19448614120483,
          "cumulative_gpu_hours": 0.04922069059477912,
          "collection_time": 38.34739947319031,
          "train_time": 0.8958802223205566,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.263394594192505,
          "throughput": 0.0509380307197336
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 215.74127411842346,
          "cumulative_gpu_hours": 0.05992813169956207,
          "collection_time": 38.098915338516235,
          "train_time": 0.4274272918701172,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.54669427871704,
          "throughput": 0.05188512367724018
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 254.21230912208557,
          "cumulative_gpu_hours": 0.07061453031169043,
          "collection_time": 37.96096920967102,
          "train_time": 0.48999857902526855,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.470947265625,
          "throughput": 0.05198728240796563
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 293.028596162796,
          "cumulative_gpu_hours": 0.08139683226744333,
          "collection_time": 38.17286825180054,
          "train_time": 0.6226062774658203,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.816197872161865,
          "throughput": 0.05152488161222912
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 331.4479088783264,
          "cumulative_gpu_hours": 0.09206886357731289,
          "collection_time": 37.92111396789551,
          "train_time": 0.4777200222015381,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.419227600097656,
          "throughput": 0.052057267283398385
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 370.0625047683716,
          "cumulative_gpu_hours": 0.10279514021343655,
          "collection_time": 38.01673436164856,
          "train_time": 0.5774643421173096,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.61450743675232,
          "throughput": 0.05179400522655509
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 408.63329315185547,
          "cumulative_gpu_hours": 0.11350924809773763,
          "collection_time": 38.029701471328735,
          "train_time": 0.5206747055053711,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.57063674926758,
          "throughput": 0.0518529163259919
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 447.1461675167084,
          "cumulative_gpu_hours": 0.12420726875464122,
          "collection_time": 37.93659806251526,
          "train_time": 0.5560815334320068,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.51277780532837,
          "throughput": 0.051930816574941877
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 485.69953894615173,
          "cumulative_gpu_hours": 0.13491653859615327,
          "collection_time": 38.014965534210205,
          "train_time": 0.518242597579956,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.55327892303467,
          "throughput": 0.05187626204226815
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 524.3851573467255,
          "cumulative_gpu_hours": 0.14566254370742374,
          "collection_time": 38.233699798583984,
          "train_time": 0.43190765380859375,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 38.6855309009552,
          "throughput": 0.051698915677815274
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 562.8779499530792,
          "cumulative_gpu_hours": 0.15635498609807758,
          "collection_time": 38.00675439834595,
          "train_time": 0.46547389030456543,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.492703914642334,
          "throughput": 0.05195789842238688
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 601.90469622612,
          "cumulative_gpu_hours": 0.1671957489517,
          "collection_time": 38.31376838684082,
          "train_time": 0.6931636333465576,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.026657819747925,
          "throughput": 0.05124702220818862
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 641.1015939712524,
          "cumulative_gpu_hours": 0.17808377610312567,
          "collection_time": 38.04315900802612,
          "train_time": 1.1334238052368164,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.196810483932495,
          "throughput": 0.051024559787073424
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 679.7013876438141,
          "cumulative_gpu_hours": 0.18880594101217058,
          "collection_time": 38.09280800819397,
          "train_time": 0.48702120780944824,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.599703311920166,
          "throughput": 0.051813869755376336
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 718.4929270744324,
          "cumulative_gpu_hours": 0.19958136863178677,
          "collection_time": 38.27747297286987,
          "train_time": 0.49361681938171387,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.791441440582275,
          "throughput": 0.051557764437896565
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 757.1426832675934,
          "cumulative_gpu_hours": 0.21031741201877593,
          "collection_time": 38.08735799789429,
          "train_time": 0.5419549942016602,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.64966869354248,
          "throughput": 0.051746886004592235
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 795.9821655750275,
          "cumulative_gpu_hours": 0.2211061571041743,
          "collection_time": 38.21381902694702,
          "train_time": 0.6048882007598877,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.839324951171875,
          "throughput": 0.05149420085221268
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 834.9914910793304,
          "cumulative_gpu_hours": 0.23194208085536958,
          "collection_time": 38.44797158241272,
          "train_time": 0.5414001941680908,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.009230852127075,
          "throughput": 0.05126991628164709
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 873.3397541046143,
          "cumulative_gpu_hours": 0.24259437614017063,
          "collection_time": 37.87140893936157,
          "train_time": 0.4557347297668457,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.3481764793396,
          "throughput": 0.052153718471529324
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 912.0273025035858,
          "cumulative_gpu_hours": 0.2533409173621072,
          "collection_time": 37.955846548080444,
          "train_time": 0.7117671966552734,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.687437772750854,
          "throughput": 0.05169636748104011
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 951.5200040340424,
          "cumulative_gpu_hours": 0.2643111122316784,
          "collection_time": 39.02614450454712,
          "train_time": 0.44436001777648926,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.492613792419434,
          "throughput": 0.05064238114277202
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "eta": 8.0,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "avg_staleness": 0.0
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        }
      },
      "method": "async",
      "seed": 123,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 995.8305740356445,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 965.2586200237274,
      "avg_throughput": 0.05179958921140837,
      "train_metrics": [
        {
          "policy_loss": -0.0,
          "kl_loss": 0.0,
          "total_loss": 0.0,
          "approx_kl": 41.778480529785156,
          "clip_fraction": 1.0,
          "grad_norm": 0.0,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -442.9190979003906,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.5,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.5
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.5,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.5,
          "difficulty_4_ratio": 0.5,
          "difficulty_5_ratio": 0.0
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 23.917839288711548,
          "cumulative_gpu_hours": 0.006643844246864319,
          "collection_time": 22.207587003707886,
          "train_time": 1.6893551349639893,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 23.917838096618652,
          "throughput": 0.08361959772119817
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 62.38726186752319,
          "cumulative_gpu_hours": 0.017329794963200886,
          "collection_time": 37.82693433761597,
          "train_time": 0.6223857402801514,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 38.46931028366089,
          "throughput": 0.05198949461928518
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 100.83105039596558,
          "cumulative_gpu_hours": 0.02800862510999044,
          "collection_time": 37.94864225387573,
          "train_time": 0.4735856056213379,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.44368600845337,
          "throughput": 0.0520241477250704
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 139.36541724205017,
          "cumulative_gpu_hours": 0.03871261590056949,
          "collection_time": 38.02277660369873,
          "train_time": 0.491163969039917,
          "broadcast_time": 2.1457672119140625e-06,
          "update_time": 38.53426957130432,
          "throughput": 0.05190185313618501
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 178.29797625541687,
          "cumulative_gpu_hours": 0.04952721562650469,
          "collection_time": 38.238250732421875,
          "train_time": 0.6734859943389893,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.93246388435364,
          "throughput": 0.05137101021761352
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 217.17544651031494,
          "cumulative_gpu_hours": 0.06032651291953193,
          "collection_time": 38.2056086063385,
          "train_time": 0.6505601406097412,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.87737846374512,
          "throughput": 0.051443797885320094
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 255.70731663703918,
          "cumulative_gpu_hours": 0.07102981017695532,
          "collection_time": 38.07561707496643,
          "train_time": 0.43633222579956055,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.53176927566528,
          "throughput": 0.051905221005854485
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 294.0749032497406,
          "cumulative_gpu_hours": 0.08168747312492794,
          "collection_time": 37.83862090110779,
          "train_time": 0.5079543590545654,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.367493867874146,
          "throughput": 0.052127459950534825
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 332.56583881378174,
          "cumulative_gpu_hours": 0.09237939967049492,
          "collection_time": 37.97752499580383,
          "train_time": 0.4920496940612793,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.49084281921387,
          "throughput": 0.05196041067205833
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 371.4493019580841,
          "cumulative_gpu_hours": 0.10318036165502337,
          "collection_time": 38.39027667045593,
          "train_time": 0.4723939895629883,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.88336682319641,
          "throughput": 0.05143587511580588
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 410.46977519989014,
          "cumulative_gpu_hours": 0.11401938199996949,
          "collection_time": 38.39515471458435,
          "train_time": 0.604816198348999,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.02028465270996,
          "throughput": 0.051255392363241
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 449.08549404144287,
          "cumulative_gpu_hours": 0.12474597056706746,
          "collection_time": 37.96833062171936,
          "train_time": 0.6268258094787598,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.615625619888306,
          "throughput": 0.05179250544033488
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 487.4442021846771,
          "cumulative_gpu_hours": 0.13540116727352142,
          "collection_time": 37.83487105369568,
          "train_time": 0.5033059120178223,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.358612060546875,
          "throughput": 0.05213952988818037
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 526.0141758918762,
          "cumulative_gpu_hours": 0.1461150488588545,
          "collection_time": 38.08783769607544,
          "train_time": 0.45960330963134766,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.56988215446472,
          "throughput": 0.05185393079476875
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 564.7285220623016,
          "cumulative_gpu_hours": 0.1568690339061949,
          "collection_time": 38.220680713653564,
          "train_time": 0.47229862213134766,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.71425104141235,
          "throughput": 0.05166056287284531
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 603.7329776287079,
          "cumulative_gpu_hours": 0.1677036048968633,
          "collection_time": 38.207154750823975,
          "train_time": 0.7764232158660889,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.00436019897461,
          "throughput": 0.051276318590980974
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 642.293506860733,
          "cumulative_gpu_hours": 0.1784148630168703,
          "collection_time": 37.964823722839355,
          "train_time": 0.5748758316040039,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.560431241989136,
          "throughput": 0.05186663985806685
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 680.7536513805389,
          "cumulative_gpu_hours": 0.18909823649459415,
          "collection_time": 37.89448285102844,
          "train_time": 0.5447854995727539,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.46005439758301,
          "throughput": 0.05200200653189114
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 719.3823711872101,
          "cumulative_gpu_hours": 0.19982843644089168,
          "collection_time": 38.07900094985962,
          "train_time": 0.5288305282592773,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.62863087654114,
          "throughput": 0.051775068249042815
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 757.9720461368561,
          "cumulative_gpu_hours": 0.21054779059357112,
          "collection_time": 37.9624810218811,
          "train_time": 0.6061818599700928,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.58958029747009,
          "throughput": 0.051827461832517484
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 796.4015390872955,
          "cumulative_gpu_hours": 0.221222649746471,
          "collection_time": 37.93090319633484,
          "train_time": 0.4782383441925049,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.429335832595825,
          "throughput": 0.05204357443782821
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 835.4044137001038,
          "cumulative_gpu_hours": 0.23205678158336215,
          "collection_time": 38.44152760505676,
          "train_time": 0.5407295227050781,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.00278091430664,
          "throughput": 0.05127839485072149
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 873.6562068462372,
          "cumulative_gpu_hours": 0.24268227967951034,
          "collection_time": 37.75590229034424,
          "train_time": 0.47273826599121094,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.25170063972473,
          "throughput": 0.052285257035682806
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 912.1759874820709,
          "cumulative_gpu_hours": 0.2533822187450197,
          "collection_time": 37.79752516746521,
          "train_time": 0.7010798454284668,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.51969122886658,
          "throughput": 0.051921496154185266
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 950.6350984573364,
          "cumulative_gpu_hours": 0.2640653051270379,
          "collection_time": 37.97555088996887,
          "train_time": 0.462785005569458,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.459017515182495,
          "throughput": 0.05200340854288486
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "eta": 8.0,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "avg_staleness": 0.0
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        }
      },
      "method": "async",
      "seed": 456,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 997.9148745536804,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 983.7102336883545,
      "avg_throughput": 0.05082797584866878,
      "train_metrics": [
        {
          "policy_loss": 0.2683838903903961,
          "kl_loss": 0.0,
          "total_loss": 0.2683838903903961,
          "approx_kl": 63.463134765625,
          "clip_fraction": 1.0,
          "grad_norm": 1.4173862934112549,
          "mean_reward": 0.5,
          "mean_advantage": 0.0,
          "std_advantage": 1.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.5,
          "avg_reward": 0.5,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -418.28338623046875,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 28.486684799194336,
          "cumulative_gpu_hours": 0.007912967999776204,
          "collection_time": 26.176807403564453,
          "train_time": 2.2886486053466797,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 28.48668336868286,
          "throughput": 0.07020824341378826
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 68.67975163459778,
          "cumulative_gpu_hours": 0.01907770878738827,
          "collection_time": 39.618979930877686,
          "train_time": 0.5518584251403809,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 40.19296669960022,
          "throughput": 0.04975994966850489
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 108.93854594230652,
          "cumulative_gpu_hours": 0.030260707206196256,
          "collection_time": 39.6918089389801,
          "train_time": 0.5439674854278564,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 40.258699893951416,
          "throughput": 0.04967870311928493
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 149.19651651382446,
          "cumulative_gpu_hours": 0.04144347680939568,
          "collection_time": 39.75099277496338,
          "train_time": 0.48549485206604004,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 40.25786876678467,
          "throughput": 0.049679728740387986
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 188.6359465122223,
          "cumulative_gpu_hours": 0.05239887403117286,
          "collection_time": 38.510833740234375,
          "train_time": 0.9079840183258057,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.43933391571045,
          "throughput": 0.050710795579722266
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 227.57577180862427,
          "cumulative_gpu_hours": 0.06321549216906229,
          "collection_time": 38.248607873916626,
          "train_time": 0.6710553169250488,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.93973970413208,
          "throughput": 0.05136141163747354
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 266.2869830131531,
          "cumulative_gpu_hours": 0.07396860639254252,
          "collection_time": 38.15528464317322,
          "train_time": 0.5355300903320312,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.711127042770386,
          "throughput": 0.0516647318945346
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 305.6305799484253,
          "cumulative_gpu_hours": 0.08489738331900702,
          "collection_time": 38.47445797920227,
          "train_time": 0.8483366966247559,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.34351205825806,
          "throughput": 0.050834302668213566
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 344.30269479751587,
          "cumulative_gpu_hours": 0.09563963744375441,
          "collection_time": 38.10075569152832,
          "train_time": 0.5500402450561523,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.67202949523926,
          "throughput": 0.051716965106426885
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 383.4129512310028,
          "cumulative_gpu_hours": 0.10650359756416744,
          "collection_time": 38.37712049484253,
          "train_time": 0.7115130424499512,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.11017107963562,
          "throughput": 0.05113759272307007
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 423.57145380973816,
          "cumulative_gpu_hours": 0.11765873716937171,
          "collection_time": 38.56651711463928,
          "train_time": 1.5714514255523682,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 40.15829586982727,
          "throughput": 0.04980291012554369
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 462.30913186073303,
          "cumulative_gpu_hours": 0.12841920329464807,
          "collection_time": 38.04477882385254,
          "train_time": 0.6711521148681641,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 38.737590312957764,
          "throughput": 0.05162943755257275
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 501.12339997291565,
          "cumulative_gpu_hours": 0.13920094443692102,
          "collection_time": 38.31622910499573,
          "train_time": 0.4774744510650635,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.81418251991272,
          "throughput": 0.05152755694323708
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 540.0239977836609,
          "cumulative_gpu_hours": 0.15000666605101692,
          "collection_time": 38.31860327720642,
          "train_time": 0.5613610744476318,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.900513648986816,
          "throughput": 0.05141320287044824
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 578.7446596622467,
          "cumulative_gpu_hours": 0.1607624054617352,
          "collection_time": 38.19952988624573,
          "train_time": 0.500485897064209,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.72057557106018,
          "throughput": 0.05165212475546988
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 618.1049420833588,
          "cumulative_gpu_hours": 0.17169581724537744,
          "collection_time": 38.53133797645569,
          "train_time": 0.8071658611297607,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.36019730567932,
          "throughput": 0.050812753413495164
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 657.1031119823456,
          "cumulative_gpu_hours": 0.1825286422173182,
          "collection_time": 38.43309020996094,
          "train_time": 0.5442638397216797,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.99808478355408,
          "throughput": 0.05128456977054991
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 696.175936460495,
          "cumulative_gpu_hours": 0.19338220457235972,
          "collection_time": 38.20397210121155,
          "train_time": 0.848534107208252,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.07273292541504,
          "throughput": 0.05118659101265709
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 735.0286555290222,
          "cumulative_gpu_hours": 0.2041746265358395,
          "collection_time": 38.15866470336914,
          "train_time": 0.6723759174346924,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.85263252258301,
          "throughput": 0.05147656336639491
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 774.1130993366241,
          "cumulative_gpu_hours": 0.2150314164823956,
          "collection_time": 38.365288496017456,
          "train_time": 0.6984374523162842,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.084357261657715,
          "throughput": 0.05117136727132589
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 813.1167669296265,
          "cumulative_gpu_hours": 0.2258657685915629,
          "collection_time": 38.42447876930237,
          "train_time": 0.5578773021697998,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.003515005111694,
          "throughput": 0.05127742973262501
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 852.1765003204346,
          "cumulative_gpu_hours": 0.23671569453345404,
          "collection_time": 38.55256414413452,
          "train_time": 0.48642897605895996,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.05963897705078,
          "throughput": 0.05120375027467832
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 890.9607605934143,
          "cumulative_gpu_hours": 0.2474891001648373,
          "collection_time": 38.26281476020813,
          "train_time": 0.5007526874542236,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 38.784170627593994,
          "throughput": 0.05156742989824433
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 930.892320394516,
          "cumulative_gpu_hours": 0.2585812001095878,
          "collection_time": 38.342641830444336,
          "train_time": 1.5685136318206787,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.9314751625061,
          "throughput": 0.050085803037847994
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 969.6644740104675,
          "cumulative_gpu_hours": 0.26935124278068545,
          "collection_time": 38.27599740028381,
          "train_time": 0.4757530689239502,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.77206635475159,
          "throughput": 0.051583528762709246
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "by_difficulty": {
            "1": {
              "max_staleness": 4.852245277701067,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "2": {
              "max_staleness": 2.9430355293715387,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "3": {
              "max_staleness": 1.7850412811874385,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "4": {
              "max_staleness": 1.0826822658929016,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "5": {
              "max_staleness": 1,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            }
          }
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        },
        "curriculum": {
          "level_1": {
            "num_pulls": 44,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": NaN,
            "ucb_score": 0.2955907693914501,
            "combined_score": NaN
          },
          "level_2": {
            "num_pulls": 2,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": NaN,
            "ucb_score": 8.0,
            "combined_score": NaN
          },
          "level_3": {
            "num_pulls": 2,
            "success_rate": 0.5,
            "mean_reward": 0.5,
            "recent_gradient": 0.999999304649451,
            "ucb_score": 8.0,
            "combined_score": 5.999984948245937
          },
          "level_4": {
            "num_pulls": 2,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": NaN,
            "ucb_score": 8.0,
            "combined_score": NaN
          },
          "level_5": {
            "num_pulls": 0,
            "success_rate": 0.5,
            "mean_reward": 0.5,
            "recent_gradient": 1.0,
            "ucb_score": 10.0,
            "combined_score": 7.349984948350595
          },
          "total_steps": 50
        }
      },
      "method": "aceas",
      "seed": 42,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 1012.1280241012573,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 966.4831073284149,
      "avg_throughput": 0.05173396163975559,
      "train_metrics": [
        {
          "policy_loss": -0.0,
          "kl_loss": 0.0,
          "total_loss": 0.0,
          "approx_kl": 50.88136672973633,
          "clip_fraction": 1.0,
          "grad_norm": 0.0,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -537.9049682617188,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 39.70850348472595,
          "cumulative_gpu_hours": 0.01103013985686832,
          "collection_time": 34.901065826416016,
          "train_time": 4.786993980407715,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 39.70850229263306,
          "throughput": 0.05036704696795002
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 77.41043639183044,
          "cumulative_gpu_hours": 0.02150289899773068,
          "collection_time": 37.19734477996826,
          "train_time": 0.48159170150756836,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 37.70182394981384,
          "throughput": 0.05304783139039286
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 115.66160011291504,
          "cumulative_gpu_hours": 0.03212822225358751,
          "collection_time": 37.7083101272583,
          "train_time": 0.522148847579956,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.25106406211853,
          "throughput": 0.05228612717157522
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 154.75688815116882,
          "cumulative_gpu_hours": 0.04298802448643579,
          "collection_time": 38.24657392501831,
          "train_time": 0.8279335498809814,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.09519982337952,
          "throughput": 0.051157175536521236
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 193.03898000717163,
          "cumulative_gpu_hours": 0.05362193889088101,
          "collection_time": 37.486812591552734,
          "train_time": 0.774707555770874,
          "broadcast_time": 2.1457672119140625e-06,
          "update_time": 38.28200054168701,
          "throughput": 0.052243873666479604
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 230.9890148639679,
          "cumulative_gpu_hours": 0.06416361523999108,
          "collection_time": 37.081695318222046,
          "train_time": 0.8476877212524414,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 37.949928998947144,
          "throughput": 0.052701020865032096
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 268.5659329891205,
          "cumulative_gpu_hours": 0.07460164805253347,
          "collection_time": 37.00433969497681,
          "train_time": 0.5525307655334473,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 37.57682919502258,
          "throughput": 0.05322428855346101
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 306.750990152359,
          "cumulative_gpu_hours": 0.08520860837565528,
          "collection_time": 37.2626736164093,
          "train_time": 0.9026780128479004,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.18496632575989,
          "throughput": 0.05237663385474256
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 344.90949416160583,
          "cumulative_gpu_hours": 0.09580819282266828,
          "collection_time": 37.10297465324402,
          "train_time": 1.0346004962921143,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.158411502838135,
          "throughput": 0.052413083281814404
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 383.33634996414185,
          "cumulative_gpu_hours": 0.10648231943448384,
          "collection_time": 37.43346929550171,
          "train_time": 0.9723725318908691,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.42671275138855,
          "throughput": 0.0520471270321641
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 421.7917456626892,
          "cumulative_gpu_hours": 0.11716437379519144,
          "collection_time": 37.5359001159668,
          "train_time": 0.8986945152282715,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.455241441726685,
          "throughput": 0.05200851496487699
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 459.3815929889679,
          "cumulative_gpu_hours": 0.1276059980524911,
          "collection_time": 37.146113872528076,
          "train_time": 0.42357802391052246,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 37.58975863456726,
          "throughput": 0.05320598143348585
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 497.0344204902649,
          "cumulative_gpu_hours": 0.13806511680285136,
          "collection_time": 37.08117413520813,
          "train_time": 0.5513873100280762,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 37.65273857116699,
          "throughput": 0.05311698633075052
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 534.8705401420593,
          "cumulative_gpu_hours": 0.14857515003946092,
          "collection_time": 37.33279204368591,
          "train_time": 0.4826195240020752,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 37.83602571487427,
          "throughput": 0.05285967440321701
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 572.568156003952,
          "cumulative_gpu_hours": 0.1590467100010978,
          "collection_time": 37.19763898849487,
          "train_time": 0.4791748523712158,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 37.69751596450806,
          "throughput": 0.053053893574393225
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 610.8516163825989,
          "cumulative_gpu_hours": 0.1696810045507219,
          "collection_time": 37.261874198913574,
          "train_time": 1.0008654594421387,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 38.283363342285156,
          "throughput": 0.05224201390348946
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 649.4035561084747,
          "cumulative_gpu_hours": 0.18038987669679854,
          "collection_time": 37.438443183898926,
          "train_time": 1.092810869216919,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.55184841156006,
          "throughput": 0.05187818697171172
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 687.0815372467041,
          "cumulative_gpu_hours": 0.19085598256852893,
          "collection_time": 37.188536643981934,
          "train_time": 0.46912288665771484,
          "broadcast_time": 1.9073486328125e-06,
          "update_time": 37.67788791656494,
          "throughput": 0.053081531651372305
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 725.4269549846649,
          "cumulative_gpu_hours": 0.20150748749574027,
          "collection_time": 37.28272294998169,
          "train_time": 1.0414113998413086,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 38.34532594680786,
          "throughput": 0.05215759549871538
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 763.1270263195038,
          "cumulative_gpu_hours": 0.2119797295331955,
          "collection_time": 37.23005032539368,
          "train_time": 0.4495728015899658,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 37.699979305267334,
          "throughput": 0.05305042699905582
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 800.9208602905273,
          "cumulative_gpu_hours": 0.2224780167473687,
          "collection_time": 37.24700999259949,
          "train_time": 0.5264859199523926,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 37.79367470741272,
          "throughput": 0.05291890813696735
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 839.0156192779541,
          "cumulative_gpu_hours": 0.23305989424387613,
          "collection_time": 37.2758469581604,
          "train_time": 0.7982096672058105,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 38.09467148780823,
          "throughput": 0.052500780867478475
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 876.8120677471161,
          "cumulative_gpu_hours": 0.24355890770753225,
          "collection_time": 37.24733805656433,
          "train_time": 0.5286769866943359,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 37.79635548591614,
          "throughput": 0.052915154762613284
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 914.6287152767181,
          "cumulative_gpu_hours": 0.2540635320213106,
          "collection_time": 37.30453944206238,
          "train_time": 0.49123430252075195,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 37.81655526161194,
          "throughput": 0.05288689004495936
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 952.4761488437653,
          "cumulative_gpu_hours": 0.264576708012157,
          "collection_time": 37.39167022705078,
          "train_time": 0.43484926223754883,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 37.8473379611969,
          "throughput": 0.05284387509764904
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "by_difficulty": {
            "1": {
              "max_staleness": 4.852245277701067,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "2": {
              "max_staleness": 2.9430355293715387,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "3": {
              "max_staleness": 1.7850412811874385,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "4": {
              "max_staleness": 1.0826822658929016,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "5": {
              "max_staleness": 1,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            }
          }
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        },
        "curriculum": {
          "level_1": {
            "num_pulls": 44,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": NaN,
            "ucb_score": 0.2955907693914501,
            "combined_score": NaN
          },
          "level_2": {
            "num_pulls": 2,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": NaN,
            "ucb_score": 8.0,
            "combined_score": NaN
          },
          "level_3": {
            "num_pulls": 2,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": 0.0,
            "ucb_score": 8.0,
            "combined_score": 5.8425062437563176
          },
          "level_4": {
            "num_pulls": 2,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": NaN,
            "ucb_score": 8.0,
            "combined_score": NaN
          },
          "level_5": {
            "num_pulls": 0,
            "success_rate": 0.5,
            "mean_reward": 0.5,
            "recent_gradient": 1.0,
            "ucb_score": 10.0,
            "combined_score": 7.349984948350595
          },
          "total_steps": 50
        }
      },
      "method": "aceas",
      "seed": 123,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 994.9485988616943,
      "device": "NVIDIA A10G"
    },
    {
      "total_timesteps": 50,
      "total_updates": 25,
      "total_time": 975.8461065292358,
      "avg_throughput": 0.05123758722349529,
      "train_metrics": [
        {
          "policy_loss": -0.0,
          "kl_loss": 0.0,
          "total_loss": 0.0,
          "approx_kl": 27.59598159790039,
          "clip_fraction": 1.0,
          "grad_norm": 0.0,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 2,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 1.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": -587.095703125,
          "clip_fraction": 1.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 4,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 1.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 6,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 0.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 1.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 8,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 10,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 12,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 14,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 16,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 18,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 20,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 22,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 24,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 26,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 28,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 30,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 32,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 34,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 36,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 38,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 40,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 42,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 44,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 46,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 48,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        },
        {
          "policy_loss": NaN,
          "kl_loss": 0.0,
          "total_loss": NaN,
          "approx_kl": NaN,
          "clip_fraction": 0.0,
          "grad_norm": NaN,
          "mean_reward": 0.0,
          "mean_advantage": 0.0,
          "std_advantage": 0.0,
          "timestep": 50,
          "num_experiences": 2,
          "success_rate": 0.0,
          "avg_reward": 0.0,
          "difficulty_1_ratio": 1.0,
          "difficulty_2_ratio": 0.0,
          "difficulty_3_ratio": 0.0,
          "difficulty_4_ratio": 0.0,
          "difficulty_5_ratio": 0.0
        }
      ],
      "eval_metrics": [],
      "timing_metrics": [
        {
          "timestep": 2,
          "update": 1,
          "cumulative_wall_time": 7.533906936645508,
          "cumulative_gpu_hours": 0.0020927519268459744,
          "collection_time": 6.897461652755737,
          "train_time": 0.6159405708312988,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 7.533905029296875,
          "throughput": 0.26546657971167126
        },
        {
          "timestep": 4,
          "update": 2,
          "cumulative_wall_time": 46.59180688858032,
          "cumulative_gpu_hours": 0.0129421685801612,
          "collection_time": 38.53165078163147,
          "train_time": 0.5047502517700195,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.057796001434326,
          "throughput": 0.051206166367568556
        },
        {
          "timestep": 6,
          "update": 3,
          "cumulative_wall_time": 85.87119889259338,
          "cumulative_gpu_hours": 0.02385311080349816,
          "collection_time": 38.673258543014526,
          "train_time": 0.5851125717163086,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.27929949760437,
          "throughput": 0.05091740498381289
        },
        {
          "timestep": 8,
          "update": 4,
          "cumulative_wall_time": 125.05779337882996,
          "cumulative_gpu_hours": 0.03473827593856388,
          "collection_time": 38.67570781707764,
          "train_time": 0.4899609088897705,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.186506271362305,
          "throughput": 0.051037976852292395
        },
        {
          "timestep": 10,
          "update": 5,
          "cumulative_wall_time": 164.19628286361694,
          "cumulative_gpu_hours": 0.04561007857322693,
          "collection_time": 38.664082765579224,
          "train_time": 0.4533967971801758,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.1384015083313,
          "throughput": 0.0511007073085053
        },
        {
          "timestep": 12,
          "update": 6,
          "cumulative_wall_time": 203.87892317771912,
          "cumulative_gpu_hours": 0.056633034216033086,
          "collection_time": 39.201639890670776,
          "train_time": 0.4605998992919922,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.68255138397217,
          "throughput": 0.05039998513824901
        },
        {
          "timestep": 14,
          "update": 7,
          "cumulative_wall_time": 243.21719646453857,
          "cumulative_gpu_hours": 0.06756033235126072,
          "collection_time": 38.82981014251709,
          "train_time": 0.4881255626678467,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.33818507194519,
          "throughput": 0.0508411864030387
        },
        {
          "timestep": 16,
          "update": 8,
          "cumulative_wall_time": 282.7857689857483,
          "cumulative_gpu_hours": 0.07855160249604119,
          "collection_time": 38.739505767822266,
          "train_time": 0.8079512119293213,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.568482398986816,
          "throughput": 0.05054527944319673
        },
        {
          "timestep": 18,
          "update": 9,
          "cumulative_wall_time": 323.50278306007385,
          "cumulative_gpu_hours": 0.08986188418335385,
          "collection_time": 40.18816614151001,
          "train_time": 0.5045099258422852,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 40.71692681312561,
          "throughput": 0.049119620672237845
        },
        {
          "timestep": 20,
          "update": 10,
          "cumulative_wall_time": 364.3433072566986,
          "cumulative_gpu_hours": 0.10120647423797184,
          "collection_time": 40.30570673942566,
          "train_time": 0.5123772621154785,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 40.840420961380005,
          "throughput": 0.04897109170082412
        },
        {
          "timestep": 22,
          "update": 11,
          "cumulative_wall_time": 405.45840191841125,
          "cumulative_gpu_hours": 0.11262733386622535,
          "collection_time": 40.381558895111084,
          "train_time": 0.7126352787017822,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 41.114940881729126,
          "throughput": 0.0486441171289333
        },
        {
          "timestep": 24,
          "update": 12,
          "cumulative_wall_time": 446.09227108955383,
          "cumulative_gpu_hours": 0.12391451974709829,
          "collection_time": 40.087648153305054,
          "train_time": 0.5248570442199707,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 40.633779525756836,
          "throughput": 0.04922013219893181
        },
        {
          "timestep": 26,
          "update": 13,
          "cumulative_wall_time": 486.93142890930176,
          "cumulative_gpu_hours": 0.1352587302525838,
          "collection_time": 40.11626124382019,
          "train_time": 0.7010507583618164,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 40.839051961898804,
          "throughput": 0.04897273330110404
        },
        {
          "timestep": 28,
          "update": 14,
          "cumulative_wall_time": 527.197179555893,
          "cumulative_gpu_hours": 0.14644366098774805,
          "collection_time": 39.62077617645264,
          "train_time": 0.6247601509094238,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 40.26565623283386,
          "throughput": 0.04967012057211024
        },
        {
          "timestep": 30,
          "update": 15,
          "cumulative_wall_time": 566.6765706539154,
          "cumulative_gpu_hours": 0.1574101585149765,
          "collection_time": 38.93233060836792,
          "train_time": 0.5264718532562256,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.47930407524109,
          "throughput": 0.05065945428491666
        },
        {
          "timestep": 32,
          "update": 16,
          "cumulative_wall_time": 606.3154878616333,
          "cumulative_gpu_hours": 0.16842096885045368,
          "collection_time": 39.098795890808105,
          "train_time": 0.5181403160095215,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.638829469680786,
          "throughput": 0.05045557668471955
        },
        {
          "timestep": 34,
          "update": 17,
          "cumulative_wall_time": 646.0199046134949,
          "cumulative_gpu_hours": 0.17944997350374858,
          "collection_time": 39.0007426738739,
          "train_time": 0.6828820705413818,
          "broadcast_time": 9.5367431640625e-07,
          "update_time": 39.704328536987305,
          "throughput": 0.050372341598394314
        },
        {
          "timestep": 36,
          "update": 18,
          "cumulative_wall_time": 685.1123020648956,
          "cumulative_gpu_hours": 0.19030897279580433,
          "collection_time": 38.59080123901367,
          "train_time": 0.4810338020324707,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.092308044433594,
          "throughput": 0.05116095979103446
        },
        {
          "timestep": 38,
          "update": 19,
          "cumulative_wall_time": 724.4899458885193,
          "cumulative_gpu_hours": 0.20124720719125536,
          "collection_time": 38.86122512817383,
          "train_time": 0.49582529067993164,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.37755608558655,
          "throughput": 0.050790353663722275
        },
        {
          "timestep": 40,
          "update": 20,
          "cumulative_wall_time": 763.8791041374207,
          "cumulative_gpu_hours": 0.2121886400381724,
          "collection_time": 38.880664348602295,
          "train_time": 0.48734164237976074,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.389068603515625,
          "throughput": 0.05077550881265297
        },
        {
          "timestep": 42,
          "update": 21,
          "cumulative_wall_time": 803.3369557857513,
          "cumulative_gpu_hours": 0.22314915438493094,
          "collection_time": 38.95671606063843,
          "train_time": 0.48007655143737793,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.457701206207275,
          "throughput": 0.05068719005062998
        },
        {
          "timestep": 44,
          "update": 22,
          "cumulative_wall_time": 842.9186849594116,
          "cumulative_gpu_hours": 0.23414407915539212,
          "collection_time": 38.91073822975159,
          "train_time": 0.649930477142334,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.58164072036743,
          "throughput": 0.050528476424951854
        },
        {
          "timestep": 46,
          "update": 23,
          "cumulative_wall_time": 882.3525667190552,
          "cumulative_gpu_hours": 0.24509793519973755,
          "collection_time": 38.881173849105835,
          "train_time": 0.532292366027832,
          "broadcast_time": 1.1920928955078125e-06,
          "update_time": 39.43379092216492,
          "throughput": 0.050717923720487175
        },
        {
          "timestep": 48,
          "update": 24,
          "cumulative_wall_time": 921.9145359992981,
          "cumulative_gpu_hours": 0.2560873711109161,
          "collection_time": 38.870237588882446,
          "train_time": 0.671231746673584,
          "broadcast_time": 1.6689300537109375e-06,
          "update_time": 39.561882734298706,
          "throughput": 0.050553711344634086
        },
        {
          "timestep": 50,
          "update": 25,
          "cumulative_wall_time": 961.672122001648,
          "cumulative_gpu_hours": 0.26713114500045776,
          "collection_time": 38.84033226966858,
          "train_time": 0.8967452049255371,
          "broadcast_time": 1.430511474609375e-06,
          "update_time": 39.757489919662476,
          "throughput": 0.0503049866589007
        }
      ],
      "scheduler_stats": {
        "total_experiences": 0,
        "total_updates": 25,
        "policy_version": 25,
        "buffer_size": 0,
        "time_predictor": {
          "global": {
            "count": 0,
            "mean": 0.0,
            "std": 0.0
          },
          "by_difficulty": {}
        },
        "staleness": {
          "current_policy_version": 25,
          "total_samples_seen": 0,
          "samples_discarded": 0,
          "discard_rate": 0.0,
          "by_difficulty": {
            "1": {
              "max_staleness": 4.852245277701067,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "2": {
              "max_staleness": 2.9430355293715387,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "3": {
              "max_staleness": 1.7850412811874385,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "4": {
              "max_staleness": 1.0826822658929016,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            },
            "5": {
              "max_staleness": 1,
              "seen": 0,
              "discarded": 0,
              "discard_rate": 0.0,
              "avg_staleness": 0.0
            }
          }
        },
        "workers": {
          "0": {
            "completed_tasks": 0,
            "avg_throughput": 1.0
          }
        },
        "curriculum": {
          "level_1": {
            "num_pulls": 44,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": NaN,
            "ucb_score": 0.2955907693914501,
            "combined_score": NaN
          },
          "level_2": {
            "num_pulls": 2,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": NaN,
            "ucb_score": 8.0,
            "combined_score": NaN
          },
          "level_3": {
            "num_pulls": 2,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": 0.0,
            "ucb_score": 8.0,
            "combined_score": 5.8425062437563176
          },
          "level_4": {
            "num_pulls": 2,
            "success_rate": 0.0,
            "mean_reward": 0.0,
            "recent_gradient": NaN,
            "ucb_score": 8.0,
            "combined_score": NaN
          },
          "level_5": {
            "num_pulls": 0,
            "success_rate": 0.5,
            "mean_reward": 0.5,
            "recent_gradient": 1.0,
            "ucb_score": 10.0,
            "combined_score": 7.349984948350595
          },
          "total_steps": 50
        }
      },
      "method": "aceas",
      "seed": 456,
      "model": "Qwen/Qwen3-1.7B",
      "wall_time": 1003.8942251205444,
      "device": "NVIDIA A10G"
    }
  ],
  "aggregated_results": {
    "sync": {
      "n_runs": 3,
      "reward_mean": null,
      "reward_std": null,
      "pass_at_1_mean": 0.0,
      "pass_at_1_std": 0.0,
      "wall_time_mean": 996.9196831385294,
      "seeds_completed": [
        42,
        123,
        456
      ]
    },
    "sync_curriculum": {
      "n_runs": 3,
      "reward_mean": null,
      "reward_std": null,
      "pass_at_1_mean": 0.0,
      "pass_at_1_std": 0.0,
      "wall_time_mean": 1008.3460566997528,
      "seeds_completed": [
        42,
        123,
        456
      ]
    },
    "async": {
      "n_runs": 3,
      "reward_mean": null,
      "reward_std": null,
      "pass_at_1_mean": 0.0,
      "pass_at_1_std": 0.0,
      "wall_time_mean": 997.7834853331248,
      "seeds_completed": [
        42,
        123,
        456
      ]
    },
    "aceas": {
      "n_runs": 3,
      "reward_mean": null,
      "reward_std": null,
      "pass_at_1_mean": 0.0,
      "pass_at_1_std": 0.0,
      "wall_time_mean": 1003.6569493611654,
      "seeds_completed": [
        42,
        123,
        456
      ]
    }
  }
}