Step,GRPO-Qwen2.5-math-7b-math - actor/entropy
1,0.354701949
2,0.317917535
3,0.29794711
4,0.255338639
5,0.20526813
6,0.177296847
7,0.153671876
8,0.140259057
9,0.132240474
10,0.127538919
11,0.122181438
12,0.117554456
13,0.11265929
14,0.110520877
15,0.105808198
16,0.106499635
17,0.102925546
18,0.100364603
19,0.096134089
20,0.094999395
21,0.092485897
22,0.090561911
23,0.087972164
24,0.085912615
25,0.081636757
26,0.081570707
27,0.080484949
28,0.077421874
29,0.075567767
30,0.072330847
31,0.070740841
32,0.070073798
33,0.066877998
34,0.067281954
35,0.064195924
36,0.062860295
37,0.060751367
38,0.060223818
39,0.057928458
40,0.05601253
41,0.053939402
42,0.053193204
43,0.052282102
44,0.051915385
45,0.049521606
46,0.04925685
47,0.048567053
48,0.047008619
49,0.044629969
50,0.043611296
51,0.043296594
52,0.041547503
53,0.040384069
54,0.040296379
55,0.039035704
56,0.03853656
57,0.038960557
58,0.036625355
59,0.03673638
60,0.034899756
61,0.03474443
62,0.034886718
63,0.032871883
64,0.03399507
65,0.031469051
66,0.030132456
67,0.031751741
68,0.030737
69,0.029931793
70,0.029091608
71,0.027968923
72,0.029476505
73,0.027595233
74,0.027279092
75,0.02761062
76,0.027517842
77,0.027716225
78,0.027143527
79,0.026995879
80,0.025692634
81,0.025937496
82,0.026075322
83,0.02490608
84,0.024328692
85,0.023515923
86,0.02283572
87,0.023200179
88,0.02334442
89,0.02349484
90,0.022761537
91,0.02174291
92,0.02273383
93,0.021713341
94,0.02217715
95,0.022187715
96,0.021814687
97,0.020989399
98,0.021336714
99,0.020275451
100,0.021085963
101,0.019747987
102,0.020250056
103,0.020151312
104,0.020444158
105,0.019598035
106,0.020821597
107,0.020089742
108,0.020388804
109,0.019538334
110,0.020001369
111,0.019293487
112,0.019156232
113,0.019428408
114,0.019179026
115,0.019054767
116,0.018821053
117,0.018441429
118,0.018789088
119,0.018336194
120,0.018788289
121,0.019016307
122,0.018731691
123,0.017520098
124,0.018183023
125,0.01802188
126,0.018209044
127,0.01802703
128,0.017678354
129,0.017611975
130,0.017808309
131,0.017571446
132,0.017867679
133,0.017876225
134,0.017024402
135,0.017246602
136,0.017064089
137,0.017136406
138,0.016638506
139,0.017061096
140,0.016939517
141,0.01684301
142,0.016968496
143,0.017118024
144,0.016400743
145,0.016687948
146,0.016379898
147,0.016422939
148,0.016051063
149,0.016271558
150,0.016187385
151,0.016489295
152,0.015863089
153,0.015586107
154,0.01606426
155,0.015952392
156,0.016013436
157,0.016432781
158,0.015654299
159,0.015718764
160,0.015729267
161,0.015672101
162,0.016183021
163,0.016385933
164,0.016665503
165,0.015955709
166,0.015701711
167,0.015811387
168,0.016055832
169,0.015998423
170,0.015926335
171,0.015403329
172,0.01615417
173,0.016272895
174,0.015105614
175,0.01618829
176,0.015539962
177,0.015155662
178,0.015117247
179,0.015301499
180,0.015053093
181,0.015005905
182,0.015046734
183,0.015113305
184,0.015018577
185,0.015560765
186,0.015633743
187,0.015257893
188,0.014620256
189,0.014923365
190,0.015290814
191,0.015217398
192,0.014698748
193,0.013898748
194,0.014098748
195,0.013798748
196,0.013588748
197,0.013298748
198,0.013898748
199,0.013698748
200,0.013198748
201,0.013078748
202,0.012898748
203,0.012698748
204,0.012998748
205,0.013008748
206,0.012878748
207,0.012698748
208,0.012708748
209,0.012558748
210,0.012538748
211,0.012888748
212,0.012498748
213,0.012378748
214,0.012188748
215,0.012698748
216,0.012098748
217,0.011998748
218,0.012008748
219,0.012198748
220,0.011798748