Step,GRPO-Qwen2.5-math-1.5b-math - actor/entropy
1,0.45470195
2,0.38079175
3,0.31794711
4,0.29794711
5,0.27794711
6,0.23794711
7,0.248281599
8,0.223704081
9,0.218848184
10,0.204221547
11,0.190957606
12,0.189484
13,0.184227303
14,0.181331366
15,0.173856318
16,0.180128634
17,0.180188328
18,0.174669981
19,0.169067442
20,0.173108399
21,0.171382502
22,0.169144288
23,0.16626595
24,0.16350992
25,0.158443183
26,0.162523389
27,0.159792587
28,0.157823831
29,0.156132162
30,0.152095199
31,0.150455311
32,0.150277972
33,0.14726761
34,0.150919214
35,0.146668911
36,0.149561256
37,0.145749837
38,0.145141974
39,0.139093876
40,0.139480174
41,0.137317538
42,0.136133805
43,0.136680618
44,0.134507328
45,0.133194581
46,0.132055655
47,0.135345995
48,0.134152949
49,0.127789259
50,0.127523378
51,0.128797352
52,0.125817105
53,0.125961572
54,0.126681596
55,0.126319483
56,0.121518813
57,0.124167256
58,0.121930033
59,0.121602654
60,0.118999541
61,0.118914507
62,0.121699378
63,0.117702067
64,0.119397871
65,0.114839591
66,0.109981202
67,0.11542324
68,0.114887282
69,0.11258921
70,0.106780268
71,0.104943313
72,0.107063659
73,0.10524632
74,0.103814639
75,0.105469391
76,0.104549751
77,0.102115571
78,0.102775373
79,0.101013668
80,0.096074186
81,0.097463459
82,0.100310169
83,0.09700875
84,0.094454132
85,0.093433723
86,0.09364745
87,0.092298128
88,0.0948838
89,0.092043623
90,0.092952244
91,0.091845743
92,0.093339927
93,0.091095962
94,0.08871983
95,0.090041749
96,0.089385733
97,0.087602027
98,0.086754493
99,0.085305624
100,0.08695697
101,0.083116606
102,0.083270364
103,0.08182551
104,0.081548877
105,0.080996208
106,0.082140312
107,0.078722179
108,0.07877554
109,0.078912571
110,0.07895074
111,0.077086374
112,0.075961612
113,0.077016748
114,0.075132184
115,0.077885054
116,0.076421142
117,0.075395308
118,0.072747506
119,0.073174931
120,0.07244195
121,0.073005959
122,0.071677372
123,0.070885807
124,0.067692645
125,0.068505682
126,0.07064341
127,0.068439357
128,0.066929445
129,0.06578055
130,0.066017255
131,0.065271661
132,0.064252794
133,0.065732352
134,0.063226677
135,0.06307137
136,0.061976846
137,0.061617661
138,0.059929192
139,0.060474165
140,0.05855418
141,0.059469074
142,0.058501892
143,0.058305472
144,0.05663611
145,0.058172867
146,0.059816957
147,0.058934864
148,0.056808464
149,0.055569172
150,0.0564119
151,0.054748274
152,0.054504715
153,0.054715618
154,0.054215632
155,0.052500851
156,0.054720886
157,0.053117022
158,0.053028077
159,0.051654451
160,0.0505432
161,0.053315759
162,0.050717484
163,0.051913202
164,0.049771409
165,0.048771409
166,0.049501409
167,0.046707141
168,0.045431409
169,0.045771409
170,0.044971409
171,0.045001409
172,0.043871409
173,0.043771409
174,0.044001409
175,0.043121409
176,0.042101409
177,0.041071409
178,0.040371409
179,0.040071409
180,0.039771409
181,0.039085963
182,0.037747987
183,0.038250056
184,0.038151312
185,0.038444158
186,0.037598035
187,0.038821597
188,0.038089742
189,0.038388804
190,0.037538334
191,0.038001369
192,0.037293487
193,0.037156232
194,0.037428408
195,0.037179026
196,0.037054767
197,0.036821053
198,0.036441429
199,0.036789088
200,0.036336194
201,0.036788289
202,0.037016307
203,0.036731691
204,0.035520098
205,0.036183023
206,0.03602188
207,0.036209044
208,0.03602703
209,0.035678354
210,0.035611975
211,0.035808309
212,0.035571446
213,0.035867679
214,0.035876225
215,0.035024402
216,0.035246602
217,0.035064089
218,0.035136406
219,0.034638506