rm_key,run_dir,model_name,task,RSI_IQR_med,RSI_IQR_iqr,nGMD_med,nGMD_iqr,nGap_med,nGap_iqr,SEI_med,SEI_iqr,RSI_IQR_tasknorm_med,RSI_IQR_tasknorm_iqr,nGMD_tasknorm_med,nGMD_tasknorm_iqr,nGap_tasknorm_med,nGap_tasknorm_iqr,SEI_tasknorm_med,SEI_tasknorm_iqr,DCI_tasknorm,is_outlier
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,core_eval_runs,Skywork-Reward-Llama-3.1-8B,Data Management,0.6283099762241222,0.5719191558580073,0.9674315593636444,0.5670802963883368,0.5503995391723311,1.4956918984015228,0.5900219727062197,0.3582792949130586,0.7673387479824747,0.6984700953510475,1.1814991797234045,0.6925605212433212,0.6721887432326479,1.8266498895722807,0.3862062458508183,0.4349007248174093,0.4625489175548086,True
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,core_eval_runs,Skywork-Reward-Llama-3.1-8B,Development and Implementation,0.6019209572227091,0.2010591923917191,0.7864343808698768,0.447906474300771,0.8879590738987407,0.6823053648058827,0.526523220567805,0.5171505155820791,0.8326831798693017,0.2781405193717918,1.087934675082233,0.6196231960848939,1.2283815281318275,0.9438850632978288,0.3104146529966876,0.3289554230468805,0.4766877022148222,True
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,core_eval_runs,Skywork-Reward-Llama-3.1-8B-v0.2,Development and Implementation,0.7787778315607011,0.3007183358077256,0.7578836801744094,0.1883688109594902,0.7270771072042289,0.8002886392202461,0.4289348103909596,0.3930957493724418,0.8225552836306814,0.3176226209566806,0.8004866089416101,0.1989575902737812,0.7679483055381944,0.8452752787030408,0.1227983292923589,0.1633384289041723,0.6578166083771012,True
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,core_eval_runs,Skywork-Reward-Llama-3.1-8B,Discussion,0.9602147211645148,0.6887062726886658,1.1049747217283183,0.6031017654679566,0.9112065430190333,1.1243017983927956,0.5551386530888586,0.575259243953011,1.274941871577126,0.9144417856911352,1.4671494913733452,0.8007789056646276,1.2098704068409798,1.4928113550707065,0.3637743931749633,0.432187360550369,0.4733208503368919,True
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,core_eval_runs,Skywork-Reward-Llama-3.1-8B-v0.2,Discussion,0.9279758488223744,0.6719519724792657,1.0052984226600794,0.4596378462814495,0.6829927438397456,1.2090136652709558,0.4568374793143709,0.5435534215330377,1.2767397948952397,0.9244936973428098,1.3831227435329685,0.6323849164093875,0.939683954912716,1.6634008966746765,0.3174015313915916,0.3412187219644959,0.5264657319909711,True
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,core_eval_runs,beaver-7b-v2.0-reward,General Explanation,2.5860857827459887,0.7026752218343093,1.5424957028257062,0.3013955026156207,0.0851988327759909,0.0860594270464555,0.068217709174954,0.0162890354595945,2.6564151143213217,0.7217846724936305,1.5844443081122508,0.3095920382372121,0.0875158390408688,0.088399837415019,0.1584472106231425,0.0379754652711997,0.8063157521064399,True
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,core_eval_runs,QRM-Gemma-2-27B,GeneralExcerpt Language Translation,0.6889859276120346,0.467502418166398,0.9535116641509792,0.0776289394409428,0.4585260525779377,0.5407761580070138,0.3197199692051468,0.1537439502471538,0.920099163993744,0.6243212914533888,1.273357335590776,0.1036687679948558,0.6123338964389943,0.7221739529782976,0.2051242107580634,0.0533243971727118,0.8833843256285546,True
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,core_eval_runs,GRM-Llama3-8B-rewardmodel-ft,Hypothetical Scenarios,1.240404957671592,0.2165362102831876,1.253611450184379,0.334263637051587,1.6047944777302894,0.7640458587912469,0.4733239978587868,0.0617697698322274,1.070665331884953,0.1869049394023302,1.0820646201591482,0.2885222972445278,1.3851910228829865,0.6594922149566552,0.2423996608004599,0.1081701876347486,0.7161841759186249,True
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,core_eval_runs,beaver-7b-v2.0-reward,Literary and Cultural Translation,0.1949246022602217,0.2027102910508307,0.4506047694756009,0.2225730608079894,0.0445357534965407,0.3501744639734799,0.3812444367628443,0.2478265906548619,0.9638621894097716,1.0023608240447233,2.228148189757112,1.1005781476861942,0.2202201691035571,1.7315409220139744,0.5761699294796355,0.3650142345690364,0.5740202173280435,True
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,core_eval_runs,Skywork-Reward-Gemma-2-27B,Logical Deduction,0.9855449447916624,0.8636006336432966,1.033892149617591,0.1163998841931758,0.1036847284383557,0.2581317718413231,0.325103524761086,0.3420068480329955,1.6926031434098128,1.4831724873433862,1.7756360190751066,0.1999084982568257,0.1780711252244844,0.4433229055067892,0.3495157979013339,0.1129237702189632,0.8462156326361144,True
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,core_eval_runs,GRM-Llama3-8B-rewardmodel-ft,Paraphrasing,1.5488610976761326,0.0,1.0010429930280689,0.0,0.6345970987026747,0.0,0.091017746204582,0.0,2.5001301182961195,0.0,1.615856799769717,0.0,1.0243496475121914,0.0,0.2430755542375029,0.0,0.999999999998924,True
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,core_eval_runs,QRM-Gemma-2-27B,Paraphrasing,0.7532168102672387,0.0,0.7906626612837907,0.0,0.9407231136706298,0.0,0.3328342871431665,0.0,1.3513674077380389,0.0,1.418550059438057,0.0,1.6877777264016394,0.0,0.3338474556706822,0.0,0.9999999999988588,True
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,core_eval_runs,Skywork-Reward-Gemma-2-27B,Paraphrasing,1.358296943773824,0.0,0.977737088722194,0.0,0.7212028897365835,0.0,0.1155035319620029,0.0,2.3085734074141944,0.0,1.661770537593496,0.0,1.225762761396146,0.0,0.27451749298311,0.0,0.999999999998967,True
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,core_eval_runs,QRM-Gemma-2-27B,Problem Solving,0.7532168102672387,0.1097111349700692,0.8548750740509528,0.3610124316708708,0.3525384470371557,0.251604202864692,0.3334230414530251,0.2240849169585413,0.9725854535833592,0.1416636651141015,1.1038509102816785,0.4661545451717557,0.4552125772333131,0.3248820053283396,0.1946911628541097,0.1097848959371974,0.6169733884969596,True
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,core_eval_runs,Skywork-Reward-Llama-3.1-8B,Problem Solving,0.6634953348926731,0.4550535002803205,1.1556060339790457,1.0305287160732073,0.5866059015522461,1.5500603460316666,0.5880821485046459,0.1165788271927823,0.6764123855719701,0.4639125665250941,1.1781035873471084,1.0505912409350315,0.5980260544314684,1.5802372093684656,0.3016989067680465,0.5393319540209633,0.3042419944065243,True
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,core_eval_runs,Skywork-Reward-Llama-3.1-8B-v0.2,Problem Solving,0.8791681608116965,0.778482623770314,0.8964957647452365,0.6605744455653071,0.3770787509212058,0.6966903853137101,0.5846477541683137,0.1690963838529458,1.6754254452479127,1.4835496264377337,1.708446555235139,1.2588527245559495,0.7185966943711947,1.3276786524394724,0.5278978624141419,0.3835771115869122,0.481095343650894,True
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,core_eval_runs,QRM-Gemma-2-27B,Quality and Compliance Assessment,0.7771537851697994,0.1566542024178685,1.0323115944937913,0.1352032744457745,0.4421691197278547,0.134512500633,0.406191948080057,0.1425693930854693,0.8666939310424852,0.1747031914387486,1.1512498696729352,0.1507807845182429,0.4931138468029197,0.1500104228695197,0.1799888869116168,0.0397500940503581,0.8483779291894867,True
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,core_eval_runs,QRM-Gemma-2-27B,Specialized Summaries,0.7151836168109481,0.5407096664100622,1.040246258396677,0.3768522076557751,1.2469834092295018,0.6057384482286853,0.7556054202262005,0.2242920373487511,1.1113392476915225,0.8402204129999498,1.6164610976066012,0.5855987736611798,1.937714415322314,0.9412700396494762,0.5070557937875476,0.2815328005127877,0.6450287422722156,True
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,core_eval_runs,Skywork-Reward-Gemma-2-27B,Specialized Summaries,1.213705349818773,0.5451548612422921,1.3887258314528177,0.2918758107044714,1.5254345659179442,0.0870789711494002,0.5534762053748473,0.2696070626178721,1.0739766078098816,0.4823934974790103,1.2288477247499603,0.2582733882889725,1.3498177632529311,0.0770539390475388,0.3048614169944025,0.0554263328706514,0.8228635146864711,True
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,core_eval_runs,Skywork-Reward-Gemma-2-27B,Technical and Scientific Translation,0.6900502190501896,0.3092653537371919,1.015425503893251,0.1817528422280454,0.824482599704477,0.4694164072658371,0.5935923408662002,0.0771161285451056,0.8812604535722769,0.3949615816132439,1.2967959656060024,0.2321158486122994,1.0529435245714638,0.5994898698108693,0.3327885016878896,0.143796367764564,0.7763690547140826,True
