,np.tanh(std_delta_len),instruction_difficulty,not_gamed_baseline.astype(float)
mistral-7b-it-gmsimpo,-0.9157011790553026,0.6867126433340708,-0.9256756290370960
qwen-2.5-7b-it-simpo,-0.8419657101874606,0.2817396792880652,-2.6635299397099166
qwen-2.5-7b-it,-0.8490802141324586,-0.0264149852602740,-0.5495886252536496
train,-1.4825701465830805,0.3934102469529793,-4.1655362730164827
sft,-1.7408627843613165,0.5295898176434977,-2.4100625168802754
qwen-2.5-7b-it-gammapo1-enhanced,-0.9695298782582276,-0.0494130350313227,-0.4932722881398943
qwen-2.5-7b-it-gammapo1,-1.8319501686976889,0.7416419604238915,-8.4180365523937475
qwen-2.5-7b-it-gammapo2,-1.8308039739016648,0.7403632460648955,-9.1842181662821201
wspo,-1.0694078708465753,0.3390851113478359,-2.5043651312650668
qwen-2.5-7b-it-gammapo-v3,-0.8062226587262048,-0.0193199026776699,-0.3696545975885229
qwen-2.5-7b-it-gammapo-v2,-0.9390657915598052,-0.0767050124041960,-0.4043476451887216
qwen-2.5-7b-it-gammapo,-0.8507110666749360,-0.0641026993069699,-0.4693181435167142
