Step,GRPO-Qwen2.5-math-1.5b-math - val-core/hiyouga/math/reward/mean@1
0,0.32
5,0.614
10,0.648
15,0.656
20,0.658
25,0.686
30,0.676
35,0.696
40,0.68
45,0.704
50,0.696
55,0.688
60,0.686
65,0.698
70,0.708
75,0.718
80,0.71
85,0.692
90,0.692
95,0.7
100,0.704
105,0.708
110,0.72
115,0.724
120,0.728
125,0.746
130,0.748
135,0.758
140,0.746
145,0.742
150,0.736
155,0.748
160,0.75
165,0.746
170,0.748
175,0.758
180,0.76
185,0.742
190,0.736
195,0.748
200,0.746
205,0.75
210,0.741
215,0.754
220,0.748