_step,avg_score/16384/four_sets,critic/acc/mean,critic/score/mean,response_length/mean
1,,0.125,0.037529151886701584,2881.4619140625
2,,0.09765625,-0.003507521701976657,2994.697265625
3,,0.1083984375,0.019244639202952385,3099.1064453125
4,,0.125,0.02760167233645916,2921.009765625
5,,0.1357421875,0.03823510929942131,3342.0302734375
6,,0.1103515625,0.013289814814925194,3110.53515625
7,,0.0966796875,0.010577903129160404,3546.4482421875
8,,0.1455078125,0.06039663776755333,3583.24609375
9,,0.123046875,0.042074356228113174,3783.6611328125
10,0.14183578759482374,0.107421875,0.03750212490558624,4397.5888671875
11,,0.1064453125,0.035410381853580475,4576.0576171875
12,,0.119140625,0.05465089902281761,4936.591796875
13,,0.1376953125,0.09051895886659622,5394.9716796875
14,,0.126953125,0.07495149970054626,6151.798828125
15,,0.1357421875,0.09279178082942963,6384.248046875
16,,0.12109375,0.07190480083227158,6641.373046875
17,,0.16796875,0.1254752278327942,6870.7685546875
18,,0.126953125,0.08846697211265564,7298.833984375
19,,0.1884765625,0.15186622738838196,7397.6982421875
20,0.19974230254350736,0.15625,0.12308067083358765,7497.984375
21,,0.1220703125,0.09520607441663742,7504.744140625
22,,0.1416015625,0.1100585013628006,7582.771484375
23,,0.16015625,0.12608695030212402,7729.1826171875
24,,0.1572265625,0.11819079518318176,7564.916015625
25,,0.13671875,0.09824059158563614,7560.9462890625
26,,0.1240234375,0.08728186041116714,7587.98828125
27,,0.1552734375,0.11399474740028381,7459.5751953125
28,,0.181640625,0.142524853348732,7409.298828125
29,,0.1328125,0.09705261886119843,7607.435546875
30,0.206045515394913,0.1240234375,0.09565260261297226,7573.03515625
31,,0.16015625,0.13040806353092194,7592.787109375
32,,0.1787109375,0.1424541175365448,7568.267578125
33,,0.1484375,0.11172223091125488,7792.5029296875
34,,0.1591796875,0.13165223598480225,7831.376953125
35,,0.1240234375,0.09368579089641571,7744.8857421875
36,,0.16015625,0.12936192750930786,7474.916015625
37,,0.189453125,0.15722061693668365,7330.1533203125
38,,0.2216796875,0.18117685616016388,6900.431640625
39,,0.212890625,0.1741369068622589,6890.748046875
40,0.21597450914770192,0.2470703125,0.21924787759780884,6898.21875
41,,0.208984375,0.18549744784832,6826.73046875
42,,0.1767578125,0.14185240864753723,6813.1767578125
43,,0.2119140625,0.17759013175964355,6876.83984375
44,,0.2412109375,0.20360395312309265,6121.3232421875
45,,0.1904296875,0.14638105034828186,6446.6884765625
46,,0.294921875,0.26456916332244873,6046.4853515625
47,,0.294921875,0.2501734495162964,5790.2060546875
48,,0.2783203125,0.24746277928352356,6430.9404296875
49,,0.2763671875,0.24936021864414215,6421.4111328125
50,0.23589814814814813,0.2978515625,0.2750938832759857,6408.8828125
51,,0.2861328125,0.2597227096557617,6161.66015625
52,,0.24609375,0.21732467412948608,6509.490234375
53,,0.34375,0.32083484530448914,6104.8212890625
54,,0.3330078125,0.2977584898471832,5179.1708984375
55,,0.267578125,0.23796287178993225,6016.005859375
56,,0.2568359375,0.21742337942123413,5485.9296875
57,,0.2705078125,0.2230101227760315,5703.4228515625
58,,0.3193359375,0.2653917670249939,5222.689453125
59,,0.2744140625,0.21289753913879395,5561.78125
60,0.1877946229361892,0.2880859375,0.22107836604118347,5530.71484375
61,,0.2705078125,0.21497488021850586,5724.8935546875
62,,0.244140625,0.19515077769756317,5712.51171875
63,,0.2646484375,0.21670012176036835,6582.42578125
64,,0.3212890625,0.27369359135627747,5802.3994140625
65,,0.2685546875,0.2357277125120163,5890.0849609375
66,,0.2919921875,0.25876590609550476,5773.1259765625
67,,0.298828125,0.2764207720756531,6251.22265625
68,,0.33984375,0.3205288052558899,5906.1806640625
69,,0.3076171875,0.2840428352355957,6291.701171875
70,0.2491078759482374,0.3154296875,0.2923267185688019,6017.0732421875
71,,0.2626953125,0.23674800992012024,6287.3876953125
72,,0.2998046875,0.27445998787879944,6421.56640625
73,,0.326171875,0.29872721433639526,6126.634765625
74,,0.310546875,0.2883082628250122,6698.9423828125
75,,0.3310546875,0.30439990758895874,6027.0732421875
76,,0.291015625,0.2602299451828003,6247.328125
77,,0.3076171875,0.28210383653640747,6144.962890625
78,,0.2666015625,0.2414214015007019,6777.083984375
79,,0.2451171875,0.21913623809814453,6412.818359375
80,0.25364513609995537,0.314453125,0.2885490655899048,6298.7431640625
81,,0.3134765625,0.2958044707775116,6278.2607421875
82,,0.2841796875,0.2652549147605896,6400.7607421875
83,,0.3505859375,0.32791024446487427,5869.005859375
84,,0.25,0.2340652346611023,6889.3203125
85,,0.2685546875,0.24114295840263367,6339.970703125
86,,0.294921875,0.26858383417129517,6384.1826171875
87,,0.384765625,0.37061578035354614,5607.8740234375
88,,0.3037109375,0.27838027477264404,6192.1943359375
89,,0.2939453125,0.2720814347267151,6523.533203125
90,0.2611807786702365,0.3251953125,0.3044203519821167,6432.416015625
91,,0.265625,0.23510463535785675,6614.1640625
92,,0.306640625,0.2813871502876282,6254.6005859375
93,,0.3505859375,0.33020347356796265,5723.3046875
94,,0.353515625,0.3292648494243622,6286.8173828125
95,,0.3505859375,0.3174581229686737,6044.6435546875
96,,0.3544921875,0.3205222487449646,6138.6884765625
97,,0.330078125,0.3037545084953308,6269.58984375
98,,0.34375,0.3221079707145691,6382.0361328125
99,,0.4013671875,0.3755296468734741,5704.69921875
100,0.281726182507809,0.3173828125,0.2909078896045685,6226.62109375
101,,0.3994140625,0.3729717433452606,5026.50390625
102,,0.3798828125,0.35125869512557983,5345.0439453125
103,,0.3408203125,0.3070819675922394,6178.818359375
104,,0.25390625,0.22736528515815735,6716.322265625
105,,0.31640625,0.2892308235168457,6085.6455078125
106,,0.357421875,0.32901227474212646,5568.216796875
107,,0.35546875,0.3305279612541199,5175.3291015625
108,,0.3603515625,0.3404693007469177,6059.3525390625
109,,0.3525390625,0.32414600253105164,5409.26171875
110,0.2837324297188755,0.3056640625,0.27466124296188354,6186.232421875
111,,0.37109375,0.34584206342697144,5900.2958984375
112,,0.3203125,0.29304346442222595,6028.060546875
113,,0.2958984375,0.27418655157089233,6230.7861328125
114,,0.3828125,0.358262836933136,5649.501953125
115,,0.412109375,0.39433032274246216,5552.7236328125
116,,0.3330078125,0.30730879306793213,5849.70703125
117,,0.4033203125,0.3832758963108063,5459.9189453125
118,,0.3466796875,0.3249105215072632,5292.5185546875
119,,0.3271484375,0.29776862263679504,5840.3623046875
120,0.2959095269968764,0.3681640625,0.3505028784275055,5786.365234375
121,,0.3583984375,0.3316696882247925,5819.72265625
122,,0.392578125,0.3709806203842163,5549.6181640625
123,,0.3525390625,0.32747581601142883,6012.5126953125
124,,0.3291015625,0.29698240756988525,6012.9794921875
125,,0.369140625,0.34328243136405945,5809.2802734375
126,,0.3623046875,0.3454953730106354,5869.1796875
127,,0.3583984375,0.3369852304458618,5940.6025390625
128,,0.349609375,0.3271465301513672,6166.6240234375
129,,0.3818359375,0.36575397849082947,6093.7607421875
130,0.2947606537260152,0.3544921875,0.3302916884422302,5922.1396484375
131,,0.3837890625,0.3682529926300049,6554.279296875
132,,0.3935546875,0.3713325262069702,5961.6953125
133,,0.376953125,0.353156179189682,5989.3642578125
134,,0.2783203125,0.2546400725841522,6604.224609375
135,,0.44140625,0.41940584778785706,6046.1181640625
136,,0.3662109375,0.35069090127944946,6117.6337890625
137,,0.29296875,0.2769029140472412,6457.19921875
138,,0.3388671875,0.3061472773551941,6026.794921875
139,,0.35546875,0.3322247564792633,6709.4541015625
140,0.3032877621597501,0.361328125,0.33453887701034546,6087.0888671875
141,,0.416015625,0.3923317790031433,5907.0048828125
142,,0.3662109375,0.34508562088012695,5987.7451171875
143,,0.3525390625,0.3261871039867401,5618.828125
144,,0.2822265625,0.2589218020439148,6276.892578125
145,,0.3037109375,0.28444939851760864,6425.248046875
146,,0.384765625,0.35732537508010864,5757.966796875
147,,0.3544921875,0.32476603984832764,6235.9111328125
148,,0.4345703125,0.41396021842956543,6030.8974609375
149,,0.361328125,0.33440735936164856,5627.6181640625
150,0.29250881302989734,0.3505859375,0.32820141315460205,6497.1826171875
151,,0.3828125,0.3596668839454651,6110.244140625
152,,0.3857421875,0.3655335307121277,6171.6552734375
153,,0.376953125,0.3524039387702942,6153.07421875
154,,0.384765625,0.3628476858139038,6011.447265625
155,,0.3935546875,0.3717465400695801,6119.5341796875
156,,0.3505859375,0.32706668972969055,6273.0791015625
157,,0.3701171875,0.3465220034122467,6459.2578125
158,,0.37890625,0.36270567774772644,6260.9306640625
159,,0.373046875,0.35639917850494385,6229.2119140625
160,0.30377270191878625,0.3671875,0.35100483894348145,6130.6806640625
161,,0.3642578125,0.3492489457130432,5820.076171875
162,,0.3466796875,0.3241496682167053,6005.2373046875
163,,0.431640625,0.4197123348712921,5684.453125
164,,0.40625,0.3838207721710205,6026.021484375
165,,0.4267578125,0.3991442024707794,5226.248046875
166,,0.4208984375,0.4060991108417511,5795.6640625
167,,0.396484375,0.37575146555900574,5983.548828125
168,,0.3271484375,0.30651798844337463,6234.107421875
169,,0.408203125,0.3903212249279022,5643.7138671875
170,0.29743708165997323,0.3369140625,0.3200356364250183,6755.275390625
171,,0.3486328125,0.326273113489151,6284.71875
172,,0.3564453125,0.3393096327781677,6391.9677734375
173,,0.3994140625,0.37119752168655396,5880.3896484375
174,,0.3447265625,0.3259696364402771,6389.9208984375
175,,0.373046875,0.3597642779350281,6030.7392578125
176,,0.365234375,0.3418341875076294,5988.1044921875
177,,0.3828125,0.3662872314453125,5836.99609375
178,,0.4345703125,0.4173189401626587,6022.4423828125
179,,0.384765625,0.37091490626335144,6152.9580078125
180,0.3140928714859438,0.3642578125,0.3431825041770935,6356.51171875
181,,0.412109375,0.3986412584781647,6034.1298828125
182,,0.3740234375,0.3491668701171875,6264.8154296875
183,,0.361328125,0.3429807424545288,6004.146484375
184,,0.3359375,0.31615909934043884,6009.9404296875
185,,0.3369140625,0.3202968239784241,6265.3291015625
186,,0.33984375,0.317981094121933,6466.8203125
187,,0.3935546875,0.376645565032959,5888.1904296875
188,,0.423828125,0.3990335464477539,6105.451171875
189,,0.380859375,0.3643801510334015,6245.05859375
190,0.31515512048192773,0.3701171875,0.35070040822029114,6310.560546875
191,,0.3916015625,0.37006327509880066,5974.79296875
192,,0.4111328125,0.3979143798351288,6050.1298828125
193,,0.3828125,0.3667895197868347,6283.525390625
194,,0.3486328125,0.33965224027633667,6872.51171875
195,,0.365234375,0.35283154249191284,6448.0908203125
196,,0.31640625,0.30638086795806885,6939.482421875
197,,0.310546875,0.29737526178359985,6794.3037109375
198,,0.3466796875,0.328427255153656,6790.4453125
199,,0.4169921875,0.40062418580055237,6306.7333984375
200,0.3173438197233378,0.365234375,0.3514280915260315,6421.0107421875
201,,0.3603515625,0.3420485258102417,6553.921875
202,,0.3818359375,0.3697321116924286,6321.65234375
203,,0.3896484375,0.37671852111816406,6487.8720703125
204,,0.3564453125,0.3395766019821167,6701.9072265625
205,,0.3740234375,0.35615605115890503,6556.974609375
206,,0.3125,0.29862919449806213,7132.1376953125
207,,0.40234375,0.3893023729324341,6291.0380859375
208,,0.365234375,0.350946307182312,6401.4990234375
209,,0.4658203125,0.45214739441871643,6235.966796875
210,0.33236027443105753,0.3349609375,0.3177216053009033,6860.9951171875
211,,0.337890625,0.3266259431838989,6663.4580078125
212,,0.3681640625,0.34883740544319153,6546.546875
213,,0.466796875,0.4530941843986511,6376.0390625
214,,0.4140625,0.4029686748981476,6528.4990234375
215,,0.4453125,0.4323766529560089,6285.9111328125
216,,0.376953125,0.36063337326049805,6429.6103515625
217,,0.353515625,0.3382474482059479,6620.947265625
218,,0.4052734375,0.38803964853286743,6601.65625
219,,0.2705078125,0.25198236107826233,7035.482421875
220,0.32707134091923246,0.404296875,0.39460188150405884,7000.9599609375
221,,0.365234375,0.3470897972583771,7358.5390625
222,,0.3837890625,0.37378114461898804,7384.6318359375
223,,0.482421875,0.47362491488456726,7065.88671875
224,,0.4228515625,0.41150400042533875,6959.0712890625
225,,0.4150390625,0.396926611661911,6945.806640625
226,,0.43359375,0.4204850196838379,7028.7373046875
227,,0.400390625,0.3864644467830658,7285.4736328125
228,,0.4052734375,0.3853619694709778,7014.6123046875
