Step,GRPO-Qwen2.5-math-7b-math - val-core/hiyouga/math/reward/mean@1
0,0.476
5,0.7
10,0.72
15,0.74
20,0.734
25,0.758
30,0.766
35,0.748
40,0.758
45,0.76
50,0.766
55,0.776
60,0.786
65,0.78
70,0.764
75,0.79
80,0.778
85,0.784
90,0.784
95,0.794
100,0.794
105,0.786
110,0.788
115,0.788
120,0.784
125,0.776
130,0.786
135,0.78
140,0.792
145,0.776
150,0.782
155,0.784
160,0.786
165,0.788
170,0.792
175,0.808
180,0.812
185,0.792
190,0.784
195,0.792
200,0.8
205,0.782
210,0.798
215,0.806
220,0.792