rm_key,run_dir,model_name,nGMD_med,nGap_med,SEI_med,DCI,RSI_IQR_med,coverage,valid_prompts,total_prompts,Composite,nGMD_med_rank,SEI_med_rank,DCI_rank,Composite_rank
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,core_eval_runs,URM-LLaMa-3.1-8B,1.0856406444280284,0.6728401702987883,0.140656761923212,0.8562941898981002,1.3723898745657264,1.0,354,354,2.208028837119389,3.0,9.0,1.0,1.0
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,core_eval_runs,Skywork-Reward-Gemma-2-27B,1.0901567690807257,0.4069085566578178,0.1974725997593986,0.5928947256653044,1.051630458555432,1.0,354,354,1.69139216832608,2.0,4.0,11.0,2.0
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,core_eval_runs,QRM-Gemma-2-27B,1.0813786990664318,0.3976529955687872,0.184351443268169,0.6033123717256772,1.1271987973212716,1.0,354,354,1.511957924627135,4.0,5.0,9.0,3.0
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,core_eval_runs,Skywork-Reward-Llama-3.1-8B,1.0195071119069423,0.5279276401789402,0.278552934042873,0.3789216520912098,0.955188241354722,1.0,354,354,1.2255455124335772,5.0,1.0,22.0,4.0
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,core_eval_runs,Skywork-Reward-Llama-3.1-8B-v0.2,0.9845332880118088,0.4439925167422965,0.2520578035922057,0.3928098471873272,0.9238823007956724,1.0,354,354,0.9489924746818492,7.0,2.0,21.0,5.0
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,core_eval_runs,GRM-Llama3-8B-rewardmodel-ft,0.999306499202858,0.4051057305392965,0.2280723007331531,0.3441223251003237,1.0401141040953106,1.0,354,354,0.4150294797586789,6.0,3.0,23.0,6.0
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,core_eval_runs,beaver-7b-v2.0-reward,1.0937436015712445,0.0804655642884359,0.0821629132259849,0.6091037213805206,1.6775133817030343,1.0,354,354,0.10565357637242093,1.0,23.0,8.0,7.0
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,core_eval_runs,Skywork-Reward-V2-Qwen3-1.7B,0.968867605206936,0.3212557608789041,0.1278568387397226,0.5915300166835606,1.2430153234435626,1.0,354,354,0.09066297155056041,8.0,12.0,14.0,8.0
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,core_eval_runs,RM-Mistral-7B,0.9555005356465156,0.2375176545388133,0.1091100938350832,0.6399902325174538,1.2784981549687977,1.0,354,354,0.004928676452651451,11.0,22.0,4.0,9.0
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,core_eval_runs,Skywork-Reward-V2-Llama-3.2-3B,0.966558896457904,0.3018209842893992,0.1360766180969265,0.5417119850900275,1.178346336782372,1.0,354,354,-0.05900614433885812,9.0,10.0,19.0,10.0
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,core_eval_runs,Gemma-2B-rewardmodel-baseline,0.9591273049846488,0.2523458504867387,0.1187491605884208,0.5917469383685986,1.2567163850889425,1.0,354,354,-0.08764430792425387,10.0,16.0,13.0,11.0
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,core_eval_runs,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,0.9129996792660586,0.1800433501391708,0.1180294171891256,0.635336048542747,1.145290684019621,1.0,354,354,-0.09030821197100194,17.0,18.0,5.0,12.0
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,core_eval_runs,Llama-3.1-Tulu-3-8B-RL-RM-RB2,0.9215529172482712,0.1888038707450005,0.1199249750198365,0.6218603067237881,1.149503588255146,1.0,354,354,-0.09207089397767357,15.0,15.0,7.0,13.0
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,core_eval_runs,BTRM_Qwen2_7b_0613,0.9148542774903744,0.2371891450870256,0.1104119514225656,0.6514111749789335,1.256543779326127,1.0,354,354,-0.10939283999056348,16.0,21.0,2.0,14.0
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,core_eval_runs,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,0.926536986010096,0.2084760372720408,0.1261462563791638,0.596460859747239,1.134553619505945,1.0,354,354,-0.10945709501097867,13.0,14.0,10.0,15.0
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,core_eval_runs,Skywork-Reward-V2-Qwen3-8B,0.9491374092823348,0.3353714087045594,0.1359448729584891,0.5474958226244319,1.1845232091590154,1.0,354,354,-0.11347697883910235,12.0,11.0,17.0,16.0
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,core_eval_runs,Skywork-Reward-V2-Qwen3-4B,0.9229343248886476,0.2517790848731125,0.1262015469417514,0.585948915935658,1.175739029361599,1.0,354,354,-0.1803778858498553,14.0,13.0,15.0,17.0
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,core_eval_runs,QRM-Llama3.1-8B-v2,0.8434645471289277,0.2619827822157814,0.165906037693356,0.545994158061202,0.8675307042671709,1.0,354,354,-0.18403232859061786,20.0,6.0,18.0,18.0
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,core_eval_runs,Llama-3.1-8B-Instruct-RM-RB2,0.9017236907310188,0.1643331123052973,0.1115002396783733,0.6284917414840023,1.1943553380365148,1.0,354,354,-0.2748812182124741,18.0,20.0,6.0,19.0
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,core_eval_runs,Llama-3.1-8B-Base-RM-RB2,0.8977661915212383,0.2117260752062251,0.1185158464295748,0.5920869636097748,1.0937308526899827,1.0,354,354,-0.3802572841476011,19.0,17.0,12.0,20.0
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,core_eval_runs,RAMO-Llama3.1-8B,0.8192840071145525,0.2758551799095106,0.1545957165149609,0.4985136790677262,0.8736749761534606,1.0,354,354,-0.7109935447912055,21.0,7.0,20.0,21.0
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,core_eval_runs,tulu-v2.5-13b-uf-rm,0.7720817427415623,0.1447333112712826,0.1131161913981468,0.6417593448104293,0.9796042264809208,1.0,354,354,-0.7970146474746456,22.0,19.0,3.0,22.0
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,core_eval_runs,ArmoRM-Llama3-8B-v0.1,0.7371450683766014,0.1613701010575725,0.1432243327184054,0.5854423522823237,0.8775632462007313,1.0,354,354,-0.814954382546896,23.0,8.0,16.0,23.0
