rm_key,run_dir,model_name,RSI_IQR_med,RSI_IQR_iqr,nGMD_med,nGMD_iqr,nGap_med,nGap_iqr,SEI_med,SEI_iqr,RSI_IQR_tasknorm_med,RSI_IQR_tasknorm_iqr,nGMD_tasknorm_med,nGMD_tasknorm_iqr,nGap_tasknorm_med,nGap_tasknorm_iqr,SEI_tasknorm_med,SEI_tasknorm_iqr,DCI,DCI_tasknorm
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,core_eval_runs,BTRM_Qwen2_7b_0613,1.256543779326127,0.46460122155965,0.9148542774903743,0.31479374544357563,0.2371891450870256,0.3662115326949238,0.1104119514225656,0.06273480044976673,1.295916454705972,0.5005800085301764,0.9282624222883666,0.2641651540662212,0.2375795218862734,0.3841108961363846,0.10217360029664263,0.06496078625242607,0.6514111749789335,0.6749118518866868
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,core_eval_runs,RAMO-Llama3.1-8B,0.8736749761534606,0.6590897802767275,0.8192840071145525,0.4245131427673249,0.2758551799095106,0.4501947987444385,0.1545957165149609,0.1639222433502925,0.9345307005036285,0.6737042210757201,0.8538290633254819,0.44254353903515486,0.30119278862657,0.46053567441852444,0.09066445762673953,0.08496744847695667,0.4985136790677262,0.5130095660569579
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,core_eval_runs,URM-LLaMa-3.1-8B,1.3723898745657264,0.26443892272346603,1.0856406444280284,0.13475513209440115,0.6728401702987883,0.257229318005062,0.14065676192321203,0.02909113929385182,1.363452697856824,0.2577005172926492,1.0949925981845792,0.13474016285213875,0.6734684423506615,0.2584501277443273,0.13189710824476075,0.042116669597290296,0.8562941898981002,0.8372399655923157
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,core_eval_runs,beaver-7b-v2.0-reward,1.6775133817030343,1.3211870133742805,1.0937436015712443,0.6039100766003007,0.08046556428843592,0.14189048034284357,0.08216291322598496,0.036959986406626594,1.8650037659996586,1.368839051396011,1.1845196004577678,0.6742994685017263,0.10171334273630592,0.17505032374839036,0.122474225034236,0.08208066248687054,0.6091037213805206,0.5403102666262712
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,core_eval_runs,ArmoRM-Llama3-8B-v0.1,0.8775632462007313,0.6055911657666204,0.7371450683766014,0.3415767869452111,0.1613701010575725,0.23480256277478245,0.14322433271840546,0.09079011506670187,0.9773094130588433,0.6436337767550735,0.7874470288595203,0.3933556409610719,0.17937277027975027,0.2674813294300786,0.08740034319120193,0.0838519272478975,0.5854423522823237,0.5184112113200334
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,core_eval_runs,GRM-Llama3-8B-rewardmodel-ft,1.0401141040953106,0.850516396705853,0.9993064992028579,0.7725969484502615,0.4051057305392965,1.0202122568954342,0.22807230073315315,0.3922831436973402,1.0810042182385562,0.7916565356218194,1.0464769220539478,0.6532045274143107,0.46520793334823457,1.0046366303385692,0.16610426844384862,0.24861505373541198,0.34412232510032376,0.4143751768874241
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,core_eval_runs,Gemma-2B-rewardmodel-baseline,1.2567163850889425,0.6725045290192408,0.9591273049846489,0.3842617009959409,0.25234585048673874,0.392245046926468,0.11874916058842089,0.09024625113442375,1.262474599967386,0.5414060731609349,0.9479888222473781,0.3063588017317178,0.26079122442596997,0.4019433594735186,0.10865346908454926,0.07668756023890422,0.5917469383685986,0.6418897698882589
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,core_eval_runs,Skywork-Reward-Gemma-2-27B,1.0516304585554321,0.6863544255072208,1.0901567690807257,0.3782459995305625,0.40690855665781783,0.5944321084818753,0.19747259975939868,0.20921788942036001,1.1044331609400424,0.7351876345348479,1.1397591885300384,0.4200380215437465,0.4183991196308067,0.6226787734451482,0.15505892732781917,0.1001328778323404,0.5928947256653044,0.6254636407656756
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,core_eval_runs,Skywork-Reward-Llama-3.1-8B-v0.2,0.9238823007956725,0.8629514128597616,0.9845332880118087,0.6321202413396375,0.44399251674229656,0.9441876766444697,0.25205780359220575,0.4324724950539951,1.0304512261404866,0.8613643749761724,1.065623497370221,0.623451213552043,0.4881350709710619,1.0243426703794338,0.18737957817449113,0.2793833858080293,0.3928098471873272,0.431554482880265
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,core_eval_runs,Skywork-Reward-Llama-3.1-8B,0.9551882413547219,0.8568577300756467,1.0195071119069425,0.6991301097247729,0.5279276401789402,1.099130129970317,0.278552934042873,0.46217782909923777,1.01888069077961,0.8644946783673771,1.0893940750909015,0.7272153628947777,0.525069786747699,1.2261077823771527,0.19844463033105653,0.3465523247472317,0.37892165209120987,0.3806505640003525
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,core_eval_runs,Skywork-Reward-V2-Llama-3.2-3B,1.178346336782372,0.6972209727149633,0.9665588964579039,0.431337890352937,0.3018209842893992,0.5461896555165049,0.1360766180969265,0.13318870741432834,1.2054099210757834,0.5961944399015137,0.9818453753553761,0.38383944727071606,0.31345432540503515,0.5843371594947953,0.11831044581250155,0.09657224219770932,0.5417119850900275,0.589387594714774
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,core_eval_runs,Skywork-Reward-V2-Qwen3-1.7B,1.2430153234435626,0.5775738704681451,0.968867605206936,0.38311062226061554,0.3212557608789041,0.5772618254163802,0.12785683873972264,0.09986862715825315,1.257397177141158,0.5223469150209337,0.965098784307852,0.3247533669663206,0.3176917843000223,0.5796137558519495,0.11482152941040635,0.08327464206856502,0.5915300166835606,0.6314711235855679
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,core_eval_runs,Skywork-Reward-V2-Qwen3-4B,1.175739029361599,0.5809612448321766,0.9229343248886475,0.38603599894237384,0.25177908487311257,0.43830349866672574,0.1262015469417514,0.09342420540011526,1.2367868736495458,0.5817894286872587,0.9845169592011551,0.3674980937666713,0.2875504580999173,0.4612694799343037,0.11179245164085261,0.07651623453139672,0.585948915935658,0.6168743931304116
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,core_eval_runs,Skywork-Reward-V2-Qwen3-8B,1.1845232091590154,0.6343543775560792,0.9491374092823347,0.43173873035387,0.3353714087045594,0.5502187174059177,0.13594487295848912,0.12120261402075608,1.1763756159375263,0.5527710782733303,0.9778161908444314,0.3830280557651238,0.326504717932369,0.5636191758955209,0.11399109077696468,0.09636264042932013,0.5474958226244319,0.5854589539559795
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,core_eval_runs,Llama-3.1-8B-Base-RM-RB2,1.0937308526899827,0.6212784368617681,0.8977661915212383,0.36915191614895704,0.21172607520622513,0.324562407505564,0.11851584642957486,0.085628569053211,1.1966711160569683,0.6374947184862036,0.9532917652648703,0.3608121533103197,0.2291118968884765,0.33182182171762753,0.10257629176066979,0.06981911352419531,0.5920869636097748,0.6147935830710093
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,core_eval_runs,Llama-3.1-8B-Instruct-RM-RB2,1.1943553380365148,0.5794756094941942,0.9017236907310188,0.33963575075164354,0.1643331123052973,0.3100780551672381,0.1115002396783733,0.06752101566095445,1.227987079155781,0.553622472058382,0.9274525428069758,0.31925846034822936,0.17061671952350216,0.33909645938070215,0.09959187689754345,0.06227058197575036,0.6284917414840023,0.6414576077776203
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,core_eval_runs,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,1.145290684019621,0.574148502442409,0.9129996792660586,0.31419505365082845,0.18004335013917083,0.29730665897048525,0.11802941718912563,0.07851322538420427,1.2086909276394233,0.5635994406347424,0.9524853298635592,0.3300052118050838,0.1801151459379047,0.3196618631074473,0.10421147252229962,0.06222505933722913,0.635336048542747,0.6450056904726301
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,core_eval_runs,Llama-3.1-Tulu-3-8B-RL-RM-RB2,1.1495035882551459,0.5597360345473261,0.9215529172482713,0.33466028563659334,0.18880387074500052,0.3003495133411197,0.11992497501983651,0.0823399415012131,1.1909628373854875,0.556023881500237,0.9560332507237418,0.3416470615780688,0.19350416339744092,0.32162764207943023,0.10410605928751931,0.06373478721408926,0.6218603067237881,0.6368068662519691
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,core_eval_runs,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,1.1345536195059447,0.6770243224295541,0.926536986010096,0.3691254883507382,0.20847603727204086,0.2854883079849958,0.12614625637916382,0.09273375045633628,1.1585333035593957,0.6749717340267518,0.9545418876381999,0.3790192611266119,0.21306835543200717,0.309583685151291,0.10198648501597452,0.07016847265956272,0.596460859747239,0.6043905299092712
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,core_eval_runs,tulu-v2.5-13b-uf-rm,0.9796042264809208,0.5652422561810904,0.7720817427415623,0.2955238567561689,0.14473331127128264,0.19865904307699223,0.11311619139814683,0.059643140845306114,1.108932497974525,0.6662893471086864,0.86968820274767,0.332891429084454,0.16102757214356875,0.24110338615816546,0.085574288224813,0.054115801744158254,0.6417593448104293,0.6207127972700408
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,core_eval_runs,QRM-Gemma-2-27B,1.1271987973212716,0.5733570405132768,1.0813786990664318,0.37090374933821657,0.3976529955687872,0.5506169143558444,0.18435144326816905,0.17686060399340522,1.1265965127612354,0.5708324878692046,1.092482302270866,0.34313645092060563,0.4036160342622255,0.5758632591542145,0.14604926458486212,0.09733239983379052,0.6033123717256772,0.6524936472840387
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,core_eval_runs,QRM-Llama3.1-8B-v2,0.8675307042671709,0.6848321850903758,0.8434645471289277,0.3797601294838411,0.2619827822157814,0.42055130829375437,0.16590603769335605,0.15306010707933465,0.9244920448944112,0.7004439275880027,0.8748837136870602,0.3887923044662619,0.2869637964460998,0.4032048431482814,0.10115489618314744,0.09139748939562617,0.545994158061202,0.5511410691632721
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,core_eval_runs,RM-Mistral-7B,1.2784981549687975,0.5406421387172697,0.9555005356465156,0.34376639395509756,0.23751765453881335,0.36352077027165375,0.10911009383508324,0.06411666962045126,1.2829213264121093,0.532190720345773,0.9487169462584508,0.305834671251047,0.23067191612902976,0.3525913188624396,0.10641304043714273,0.06973676350157568,0.6399902325174538,0.6491106499857692
