rm_key,model_name,task,nGMD_med,SEI_med,DCI_tasknorm,TaskComposite
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,General Character,0.964924719788367,0.1233784783039887,0.8223580229085273,0.5542371215837004
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Interpretative Analysis,1.156857888182976,0.1678628672734197,0.5609328328148987,0.33011819188085223
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Professional Content Generation,0.9868067304967882,0.1118890814036498,0.7572873751527838,0.2756574418604917
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Literary and Cultural Translation,1.533010498760183,0.0897467992449835,0.9412117776671238,0.2734305048874355
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Problem Solving,0.9770296618823872,0.1100050674069973,0.9017771700146744,0.17271836448567687
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Human Decision Making,0.7730656200953823,0.1288164202204948,0.8321807936299938,0.1516554615449504
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Technical and Scientific Translation,0.9686140465454608,0.1571163962301067,0.9889785373617708,0.11903900037593185
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Data Management,0.9017123735736146,0.1076686671236296,0.6849111665138973,0.09343564904541876
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Documentation,0.8079978306137902,0.1424492861020777,0.8548568664411569,0.0843794830128144
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Factual,0.8433956158281221,0.0981548739988917,0.6913658524887039,0.08255744378833385
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,ContextBased,0.7609042447048864,0.0931965742098537,0.8501070500265829,0.023833654528726766
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Creative Writing,1.0028620033441018,0.1122080164176382,0.6674852479438877,-0.04947119230425473
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Critical Thinking,0.9093661394081204,0.1630071756845348,0.6495573484666443,-0.11598805805054424
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Technical and Practical Support,1.0751107311570127,0.109787257780792,0.7901624215947761,-0.1255375879807368
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Specific Character,1.0591401082544123,0.0733222738846817,0.8332354573621092,-0.1513783520824734
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Development and Implementation,0.7982489786476271,0.0935528404980325,0.8829894226533698,-0.18857075719191063
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Logical Deduction,0.9133164701614136,0.1097983272971918,0.8644035597759414,-0.21084992945762737
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Analytical Reasoning,0.9001675120825945,0.1091396927220676,0.719858582981359,-0.22081063405698786
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Idea Development,0.9759856458975884,0.1213719362627602,0.6625089093055815,-0.23018277232095882
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,General Explanation,1.001959070600492,0.1147097488268193,0.6046277505473042,-0.27447430583722215
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Paraphrasing,1.0603110991562816,0.088106560290715,0.9999999999979654,-0.27661751652057026
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Quality and Compliance Assessment,0.6423519971337304,0.1190819900584131,0.7627715166265069,-0.2850896878369275
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Specialized Summaries,0.9273542526597238,0.0879738364401824,0.6974325777191769,-0.2893493737519021
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Tone Adjustment,1.1662236109722852,0.1041463883416467,0.9764457442235888,-0.330075405140269
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Hypothetical Scenarios,0.8420130000644694,0.1108046235178091,0.738122724733404,-0.3322126789710478
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Quality and Optimization,0.7502806766433512,0.1353915720909695,0.6759848079203364,-0.38879598669244747
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,PostQuality Assessment Rewriting,1.1905468026384056,0.116109785891079,0.612356058584306,-0.41615512808038035
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Casual Conversation,0.5195249273545461,0.0844661156315872,0.6934555825457379,-0.4235546458577501
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Standard Summaries,0.8407996841902436,0.0772407914213352,0.7653014976675662,-0.4307516022902554
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,GeneralExcerpt Language Translation,0.9953069957784282,0.1010124705670241,0.4484751134111508,-0.6066516701862965
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Textual ExpansionReduction,0.9184801167889328,0.0976899689326093,0.6595747210686813,-0.6566577299685922
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Personal Opinion and Advice,0.9759856458975884,0.1221523662209645,0.9999999999979748,-0.6968676411822318
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Content Categorization,0.7262965256412133,0.0821457462177832,0.8040599753657803,-0.7130517259962789
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Supportive Conversation,0.8539204256208249,0.0905792161146156,0.9999999999978706,-0.7182262140825693
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,OptionBased,0.7000550427800505,0.0972430975329906,0.7221222222877376,-0.7321804007786376
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,BTRM_Qwen2_7b_0613,Discussion,0.7862286864983203,0.0854986161938301,0.6496693417468106,-0.8889423827278717
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Hypothetical Scenarios,1.0648329332646629,0.1771559302731115,0.7230541320788783,0.9059050361328046
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,OptionBased,1.1746834561958954,0.119387205187623,0.9099533954148636,0.7656635447711926
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Literary and Cultural Translation,1.5582816722424,0.1141322181626118,0.9578734837982924,0.7512948948006589
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Development and Implementation,0.98639880502388,0.1375248480989878,0.629129328034497,0.6693553715411377
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Human Decision Making,1.0775134735252072,0.1392132152880753,0.730899721530203,0.43858270967220286
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Problem Solving,0.889804670857533,0.1358571943712323,0.9012516517952508,0.3899652285021715
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Documentation,0.7611708008362432,0.6053073874611998,0.5979001494340407,0.33077705021395654
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Analytical Reasoning,1.029353689912785,0.1667741264209965,0.6219525287028395,0.3215568704640609
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Technical and Scientific Translation,1.4950689415802856,0.1646716426251629,0.8198165845754672,0.2353641190616637
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Specific Character,1.029925828053052,0.1017116717943391,0.8676781063683892,0.1643844458747159
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Discussion,0.8422216633404709,0.1362555602324122,0.7616618205238468,0.0788900396993846
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Interpretative Analysis,0.6979254189702744,0.3237359898669316,0.4869048662738121,0.06704217622338104
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Factual,0.7159518517891617,0.1595706711697719,0.6235618070895699,0.04255367172617057
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,General Explanation,0.8141904193132664,0.224573905354426,0.6211848543206611,0.03909077917760197
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,GeneralExcerpt Language Translation,0.6163834897808049,0.2104297616379694,0.5091168421078262,0.038040684499569655
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Quality and Optimization,0.8832684935041345,0.1353059993935025,0.7008237071065702,0.001570214025636285
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Paraphrasing,0.7009204248511006,0.0906396147785543,0.9999999999994632,-0.0515998896356854
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Specialized Summaries,0.7916974502216607,0.1033568221742468,0.8244219118920032,-0.07193760260714875
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Professional Content Generation,0.7757747938329376,0.1954949304290419,0.560023168545295,-0.09729560651638036
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Data Management,0.713244770160506,0.1933279758033098,0.5858171431723384,-0.11624911223717219
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Logical Deduction,1.0682049121354442,0.1238300304257291,0.6904644110634666,-0.1757156082097619
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,PostQuality Assessment Rewriting,1.1749209194966548,0.1438802998068932,0.5789542585983446,-0.19900068027956788
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Supportive Conversation,1.0121160804961076,0.0808036037345679,0.999999999998108,-0.3674107238854362
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,General Character,0.7615655835737556,0.1137641704178299,0.6887454191465051,-0.4034171772405191
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Casual Conversation,0.4610112520940226,0.1391085751728106,0.5231115262007732,-0.42760435782557016
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Content Categorization,0.5560203187278016,0.1293757837699272,0.8458794652601255,-0.46689894761872996
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Textual ExpansionReduction,1.04045658336891,0.1118024735654993,0.5144658494826672,-0.4835733415555007
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,ContextBased,0.6352855685212417,0.1453168563063011,0.6322985884283111,-0.4944419242205012
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Quality and Compliance Assessment,0.6690053572290563,0.1186973124673911,0.6838620224542501,-0.5294278319637379
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Critical Thinking,0.984807800908793,0.1029262497675886,0.5892739441923945,-0.5641317473558316
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Tone Adjustment,1.4097305678699277,0.1191801171793925,0.7579652575415208,-0.6241905051993726
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Personal Opinion and Advice,0.8570050524401607,0.1080164346175198,0.9999999999981378,-0.681518189759265
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Standard Summaries,0.7048860619737802,0.1112870166707089,0.6460091031598395,-0.7211574264205722
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Creative Writing,0.8602345533304865,0.1636984535415091,0.4561067914474058,-0.8440332638019384
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Technical and Practical Support,0.792593132109212,0.1399123242641188,0.6537875584969063,-0.9511664130034869
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,RAMO-Llama3.1-8B,Idea Development,0.783133187865215,0.1956315014314373,0.4603915278010059,-1.1086464742207136
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Development and Implementation,1.1692256884428232,0.1557196783142058,0.8929269370075773,2.464261564314051
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Content Categorization,1.1372726161269933,0.1383773549864952,0.8801583871645368,1.6420696761188578
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,ContextBased,1.120837582975243,0.1388073226993507,0.8104592686393652,1.5733694231192832
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Casual Conversation,1.100951838925571,0.1384326505691486,0.7971162509422115,1.5406416235104916
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Factual,1.0806940870877193,0.1306454932420138,0.8642843476922264,1.487914342894706
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,General Explanation,1.0571940320764164,0.1434591689013452,0.8807214116780079,1.4720927087671474
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Standard Summaries,1.0823488444200158,0.1495793578251066,0.8173236424376897,1.4504000805919657
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Human Decision Making,1.08432142353213,0.1518760919024165,0.9405724393411844,1.3883561384513943
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Creative Writing,1.0804888054791155,0.1459570803268946,0.877546428814037,1.3430543754687023
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Quality and Optimization,1.1089166611355257,0.1383262416534391,0.8925237303968159,1.267177172342338
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Professional Content Generation,1.1041529815734394,0.1493758678054138,0.8419522280920383,1.266266696854074
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Specific Character,1.1936260648238388,0.1368175125792201,0.9924298656013736,1.2595400945263253
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Data Management,1.0806375564924569,0.1430043473162015,0.8436650680897092,1.2452596614661835
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Quality and Compliance Assessment,1.210267421622815,0.1296998767323114,0.9409669665417044,1.2163064971077895
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Specialized Summaries,1.0297842481540451,0.166602472252876,0.9606838355195484,1.182488402000661
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Analytical Reasoning,1.1320853478649204,0.1418160769700004,0.8350442792838735,1.1315024600576804
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,General Character,1.0923253221904226,0.1430970909271002,0.8630657196593068,1.0828731105012532
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,GeneralExcerpt Language Translation,1.0546524998764977,0.1339019494229197,0.9291783199769044,1.0324938332556761
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Idea Development,1.058394460569367,0.1397261392800419,0.8463081987746333,1.0298338678995245
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Hypothetical Scenarios,1.0374995021139464,0.1193184716717682,0.9687551605056046,0.9367909184405314
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Literary and Cultural Translation,1.1829514211764556,0.1390477816001372,0.996411677930914,0.8619529219341084
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Discussion,1.109426218021856,0.1371648633640106,0.8873164213264727,0.8235189898519542
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,OptionBased,1.0845871433808298,0.1440983010691043,0.8423896771520818,0.7893129449537475
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Problem Solving,1.158238433208734,0.1345789151017872,0.928345683793208,0.7706794882189136
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Interpretative Analysis,1.097799932602609,0.1246438923225624,0.8668459232933557,0.7673221455484396
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Critical Thinking,1.1036449877450425,0.1463678360451037,0.8802025202853416,0.6693097338166614
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Paraphrasing,1.156558666949501,0.1408628292115435,0.9999999999981914,0.5861399666852254
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Textual ExpansionReduction,1.10470682510122,0.1387020698551744,0.8381451145150388,0.5314214118165628
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Documentation,1.0905184272192625,0.1316528621670838,0.8910027352246096,0.4903404885114793
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Logical Deduction,1.1356011861767392,0.1341226835576083,0.8892526274332332,0.46418259968564846
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Technical and Practical Support,1.0845308732952228,0.1435694474112332,0.8509727568372608,0.3806096541455722
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Supportive Conversation,1.0528883284888542,0.1137200318214413,0.9999999999987405,0.3479262345779941
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,PostQuality Assessment Rewriting,1.14681508481119,0.1398213500066986,0.8367029826224112,0.31554148770974016
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Technical and Scientific Translation,1.097608458005752,0.1585166564432297,0.9136960263159598,0.012511649775943563
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Personal Opinion and Advice,1.1408905742238025,0.1385557614484583,0.9999999999981376,-0.19484113507962578
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,URM-LLaMa-3.1-8B,Tone Adjustment,1.0905538565324226,0.1231092254469582,0.8836646662996519,-0.6102578304058589
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,General Explanation,1.5424957028257062,0.068217709174954,0.8063157521064399,2.0209436755390473
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Personal Opinion and Advice,1.2865167181553046,0.079260341470549,0.9999999999994256,1.3956082599593338
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,ContextBased,1.207721543440444,0.078696428332334,0.8011187669453252,1.181818883756601
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Factual,1.1862096748533804,0.0760126211315831,0.7931705900302908,1.0789445240642257
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Casual Conversation,0.9993471675296632,0.1003664339283348,0.8176867659634345,1.0421834616554717
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Standard Summaries,1.2695199813136295,0.0812233419386512,0.7994280113855726,0.9999722316869869
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Interpretative Analysis,1.3861006232160746,0.0655991505678219,0.8565670404638773,0.9907633256527268
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Supportive Conversation,1.4187852764464264,0.0632674792305942,0.9999999999993372,0.9862957417235083
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Human Decision Making,1.1861260059659742,0.0829994435918967,0.8753089211470776,0.6190941508943851
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Problem Solving,1.4598069366719035,0.0693728061583132,0.8432127137351851,0.24991059995323597
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Professional Content Generation,1.192592415692659,0.0710956697039519,0.6829136400230119,0.23239872619468793
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Discussion,1.1919170882443084,0.0697085029708768,0.8782269795223584,0.17068118150302114
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Technical and Practical Support,1.2713487441383668,0.06835390142326,0.8286453953061479,0.09027187272416604
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Idea Development,1.1680774316826203,0.078581709859671,0.6721638917745906,0.08404000835492775
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Paraphrasing,0.159466923047262,0.2127281269301166,0.9999999999989736,0.044121756769636394
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,General Character,1.1052361208914063,0.0846863795033366,0.6762237043981357,-0.06176386972732599
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Hypothetical Scenarios,0.9905440053047032,0.0632731739812,0.8635798908774003,-0.14898275545459494
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Quality and Compliance Assessment,0.3062729505252242,0.1266991700361479,0.8590320053528795,-0.2123615917393249
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Content Categorization,1.0016361092351351,0.098489682336132,0.4425731242430393,-0.43903949004075926
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Literary and Cultural Translation,0.4506047694756009,0.3812444367628443,0.5740202173280435,-0.6420416364460494
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,PostQuality Assessment Rewriting,0.2029448627530233,0.1180070783786817,0.7989841227079971,-0.7495129426488152
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Critical Thinking,1.0260913297541694,0.0792331625369275,0.5412966422751924,-0.7693084481189629
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Specialized Summaries,0.5601213667413161,0.0851986665124477,0.720080055912041,-0.7776900113042933
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Documentation,0.5643810095626606,0.108628140917095,0.557569466533257,-0.8265866222493188
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,GeneralExcerpt Language Translation,0.639728898051759,0.1317202015657921,0.5723869043910429,-0.8395723449471142
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,OptionBased,0.7696581425521338,0.1020470917249205,0.5569832059313401,-0.8964845937057359
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Technical and Scientific Translation,0.171851412470666,0.3409746859903674,0.4849414213775415,-1.1276834365801536
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Data Management,0.7773123516625331,0.0889977550729739,0.4144945727789505,-1.148752995231583
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Development and Implementation,0.8719612864536078,0.0865724051219913,0.3244026962323527,-1.2888121412807865
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Logical Deduction,0.8853004976458084,0.0912955730953595,0.4563336306905736,-1.3657997597795375
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Specific Character,0.8158433684003984,0.0744836538961071,0.6415120138269982,-1.3718208744719225
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Textual ExpansionReduction,0.2506579796009993,0.1242733582496646,0.3841075242373736,-1.8194178614800798
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Analytical Reasoning,0.7982938192669318,0.082199935652902,0.3892300620155825,-2.1075971927298816
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Tone Adjustment,0.164265931374922,0.1377111885128835,0.7187174510178558,-2.344323004502169
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Creative Writing,0.634788378315042,0.1035765228573504,0.2502032058429462,-3.023623906194188
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,beaver-7b-v2.0-reward,Quality and Optimization,0.3657525649474359,0.0975379493185241,0.2015956900821057,-3.4445296692065765
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Supportive Conversation,0.2762027572408637,0.4068176136860061,0.9999999999992446,1.327639739675361
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Quality and Compliance Assessment,0.6805345834862421,0.2044701973955237,0.6488784646648738,1.054870656438467
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Specialized Summaries,0.8145060157000819,0.1516518798204412,0.7619708156291778,0.35552633105687403
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Paraphrasing,0.7800562188450945,0.1461387301167795,0.9999999999987872,0.31187181642229606
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Tone Adjustment,0.9877623039766268,0.2191335968904987,0.9789529367092534,0.27696309246725287
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Hypothetical Scenarios,0.5920932921450757,0.1732429658723473,0.880422508407815,0.25600851839811234
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Specific Character,1.117301598583392,0.1267484635842688,0.7798873418988969,0.17391132508442136
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Textual ExpansionReduction,0.771091213230785,0.1561972583976111,0.7507305152266666,0.09210545179000518
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Human Decision Making,0.5431383176669357,0.2561239536164207,0.58429529292591,0.07745453743111075
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,GeneralExcerpt Language Translation,0.8145060157000819,0.1387342746523148,0.7409663712646641,0.07163880930163814
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Problem Solving,0.8799807758045901,0.1155380099205406,0.7853507841670337,-0.003958679879254551
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Literary and Cultural Translation,1.539865627257872,0.0814319703117221,0.8716909340018809,-0.17714115426340904
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Creative Writing,0.8891472422192213,0.1890586683253481,0.5340352810475771,-0.2002980041308655
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,OptionBased,0.6527329930418663,0.1618129243749826,0.6118910115198697,-0.20761095934606344
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Analytical Reasoning,0.8243775949158385,0.1466497655538246,0.6494599275627528,-0.24283331462518498
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Professional Content Generation,0.7669612668241929,0.1520762472661537,0.6562307286210896,-0.27079456540336166
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Development and Implementation,0.8188531620320207,0.1449039791678531,0.540198505909548,-0.299789970031867
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Standard Summaries,0.8118870252959016,0.1250593875657537,0.6156837905893673,-0.30376991827488464
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Documentation,0.6273489322013492,0.0981329782472907,0.8013666048844392,-0.31698018620635227
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Casual Conversation,0.4641656839408827,0.1639757646215027,0.4555612027539734,-0.34226277691208606
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Interpretative Analysis,0.6126045198778147,0.2045779325826722,0.6196743060834898,-0.3807553781627276
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,PostQuality Assessment Rewriting,0.7486283339949306,0.1365755213345605,0.6776499228914513,-0.39113756941291367
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,ContextBased,0.5737603593158134,0.1412858600121846,0.7409747943855564,-0.40616775338910405
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Content Categorization,0.5592551816926606,0.1344484318833391,0.8265553762934926,-0.42736398513275886
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Factual,0.7311012443669545,0.1298291268107545,0.501161026502656,-0.5550680257078495
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Idea Development,0.7754226204376985,0.1314536100498097,0.6962580499904499,-0.6797852716488227
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Personal Opinion and Advice,0.933569348690126,0.0990882966955588,0.9999999999980972,-0.7118367549304275
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Critical Thinking,0.6559563658470113,0.1747144303171233,0.5602289044947822,-0.7568729243222336
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Discussion,0.5765808105203152,0.1347800884733171,0.6073840355537278,-0.757481258442892
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Data Management,0.7041054971238649,0.1172522005831983,0.5363511460556354,-0.782199395166332
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Logical Deduction,0.7790489148434867,0.1432820478987317,0.3840583313341308,-0.791865468537133
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Quality and Optimization,0.5137250408199874,0.2442349791798276,0.4061647473210721,-0.8303168213585289
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Technical and Scientific Translation,1.1865663399939497,0.1286572395456435,0.6826340665908025,-1.0476212839363557
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,General Explanation,0.7514487851994324,0.127297888372356,0.5592074085512168,-1.5932836667128234
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,General Character,0.7238486555553783,0.1400769926604198,0.3388317982194835,-1.668970433591164
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ArmoRM-Llama3-8B-v0.1,Technical and Practical Support,0.6017634105605106,0.1413045234072351,0.5606273295902862,-1.672743664341965
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Textual ExpansionReduction,1.5076764908347855,0.2925505540176196,0.6689275486387795,2.7263973711232876
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Logical Deduction,2.0383067339037075,0.2587833673199485,0.6262809390646965,2.718671833638159
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Personal Opinion and Advice,1.2984518334998538,0.4819915100426483,0.9999999999987,2.654787287388178
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Hypothetical Scenarios,1.253611450184379,0.4733239978587868,0.7161841759186249,2.582198921557944
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Quality and Optimization,1.1914175529182784,0.3058286823426449,0.7360023425864376,2.570520080540997
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Specific Character,1.1484621793472758,0.2820565661093028,0.7931111099915843,1.5457691656181871
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Supportive Conversation,0.905581529847404,0.2836893426092853,0.9999999999990612,1.5445183525387693
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,PostQuality Assessment Rewriting,1.2510067094465631,0.3012551207555114,0.5114389727557277,1.4353511401176366
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Data Management,1.0444553386583373,0.2686323891528913,0.549757675265817,1.0912923944554775
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Human Decision Making,1.1900694853434437,0.2294983496169085,0.5889525449257549,1.0319190485030623
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,General Explanation,1.0065266577392604,0.3370868176569661,0.3719602291256373,1.023973881495577
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Technical and Practical Support,1.116062860345317,0.3081899975450906,0.5752804926489847,0.9428698394269226
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Documentation,1.3897091537352713,0.5868973540397947,0.5086728067761659,0.9319745857456747
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Analytical Reasoning,1.5248015187557784,0.1360020561964869,0.5836098637557827,0.8878159889178109
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Technical and Scientific Translation,1.3052835657858803,0.1725492135935628,0.9859514753574292,0.7009895687451833
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Interpretative Analysis,0.9731791217143252,0.3546664937233421,0.4351877387978547,0.6282287393568107
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Creative Writing,1.0158774485019908,0.2218122083852133,0.5015563662050933,0.46793489614174866
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Problem Solving,0.7440419068968782,0.2214078798402394,0.0484663506233681,0.46517881598668365
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Development and Implementation,0.8400060393427353,0.1448002259766153,0.7841644299338543,0.4092681539299866
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,OptionBased,1.277842393626958,0.1068551553499201,0.7024972626604205,0.3455688222993858
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Literary and Cultural Translation,1.8949146138567816,0.1040814835644704,0.808848003084366,0.2884835380489983
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Critical Thinking,1.1578301065622283,0.1867771546268162,0.4656100526088124,0.1979578470676776
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Paraphrasing,1.0010429930280689,0.091017746204582,0.999999999998924,0.18087174424966113
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Discussion,1.030186386107757,0.2583338205850458,0.2530113437711453,0.09438914700309109
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Idea Development,1.0978982209894943,0.278407082436573,0.3165428469966692,0.09321766033021528
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Factual,0.7521760095518127,0.2568484172277198,0.3162257181972067,-0.08084319615386515
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Quality and Compliance Assessment,0.7035541824459118,0.1401511790974691,0.6660484643384853,-0.1258502301737285
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Casual Conversation,0.4035977227437187,0.211908858081891,0.4288853795737695,-0.1619421018008957
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Tone Adjustment,1.3065116781951578,0.1690688818848512,0.8201353210711545,-0.18917951324589707
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Standard Summaries,0.6145360253008978,0.1767098793747046,0.5988990519715746,-0.2210830146877333
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,ContextBased,0.7333030635041276,0.1766931372184346,0.4858012010053433,-0.28449050565341005
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,GeneralExcerpt Language Translation,0.9142525746719764,0.1181303900459666,0.5650228390160965,-0.3311854279694161
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,General Character,0.9497935765813884,0.2260131448792033,0.3947222449528749,-0.425678486368821
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Content Categorization,0.5384958746390377,0.1785170520157882,0.5637894504610366,-0.47021618805310056
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Specialized Summaries,0.6868747022827033,0.0754013906498779,0.8070509600408529,-0.5451916134909833
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GRM-Llama3-8B-rewardmodel-ft,Professional Content Generation,0.635693831644913,0.1812303923275159,0.440973853312046,-1.02299780829875
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Specific Character,0.9563331831706324,0.2696036509453354,0.8541549220103889,1.3351503594109295
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,General Character,1.256736089756178,0.0910081118398409,0.8484357900556239,0.9343947219397857
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Interpretative Analysis,1.392258222780626,0.1378354568049574,0.6592320975617785,0.8778174752308953
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,General Explanation,1.202868455700696,0.1463285371377412,0.5506837437800784,0.8578844066145523
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Problem Solving,1.5256772109887884,0.1025259055570234,0.6391143162150298,0.5729000215155285
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Critical Thinking,0.9280635539098742,0.1795694721830447,0.8322642268869485,0.4547851332506365
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Human Decision Making,1.0096586104660132,0.136954015709939,0.7603785611437889,0.40179980074459143
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Personal Opinion and Advice,1.0008604765452915,0.1806310470790965,0.9999999999984568,0.3599623276750953
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Supportive Conversation,1.397843839119694,0.087284788293704,0.9999999999980992,0.33351747896904627
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Technical and Practical Support,1.201758754524208,0.114219627563166,0.8309233550980916,0.3297573543284307
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Idea Development,1.0633971789511012,0.1279142562919714,0.6941686893177711,0.26946675579235824
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Data Management,0.8969406888333926,0.125823896605181,0.7076449164116432,0.26778774878778866
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Discussion,0.957863578992602,0.0986595152879966,0.8801778652996021,0.18467791673297515
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Documentation,0.7368875587450316,0.0924198941195134,0.9178315281258884,0.025905496854067234
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Professional Content Generation,0.9102873167743454,0.1202690871103321,0.705278469050208,-0.024036119617525525
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Creative Writing,1.0119397541096613,0.1019192221099271,0.6866628807815236,-0.046984128580658396
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Paraphrasing,1.1606942280050954,0.0675046778547773,0.9999999999983348,-0.1183776974217858
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Standard Summaries,0.8335021694904308,0.0845346250362243,0.8270245241497147,-0.1640216793597966
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Literary and Cultural Translation,1.112530766703216,0.0992204282275519,0.9147816771468796,-0.21443426880783473
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Quality and Optimization,0.7456535083759351,0.1578500621580043,0.6638225125766105,-0.21533901677393413
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,PostQuality Assessment Rewriting,1.072165099048728,0.1329300994216978,0.6516852530584841,-0.23580481639892747
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Technical and Scientific Translation,1.2111907219076763,0.1201132574477251,0.9027622457778752,-0.24612243898685446
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Specialized Summaries,0.900544344059674,0.0979134596949561,0.6780689328130857,-0.2566415093345929
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Factual,0.8717765151289742,0.1149471689333011,0.5133645444917659,-0.28843756352266314
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Quality and Compliance Assessment,0.610681792389603,0.106968622031372,0.824767909691832,-0.3506336599641764
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Content Categorization,0.6030884346647817,0.1063470565429766,0.9313506105675404,-0.4506241565284558
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Logical Deduction,0.8429446460297584,0.1147593314710437,0.7376456273237033,-0.46317134546440286
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Development and Implementation,0.8466596326259213,0.0926371649699158,0.6426611876081848,-0.5524321943054751
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Textual ExpansionReduction,0.8645875956991647,0.115229708632477,0.6113693097438591,-0.5694916431310282
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Hypothetical Scenarios,0.996916915809195,0.0820864665041878,0.5793037836279522,-0.800861630284157
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Casual Conversation,0.5560300767837022,0.1080885662527091,0.3027600780741064,-0.8104598122636875
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Analytical Reasoning,0.8202698286194449,0.1292796215195469,0.5614472420105546,-0.8123479420745691
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,Tone Adjustment,1.1914526798918876,0.0967651981362617,0.8350271687388088,-0.8294352956334661
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,OptionBased,0.5830059304348792,0.1266041710831182,0.5551594078105536,-0.9031046160555609
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,GeneralExcerpt Language Translation,0.7482867087408602,0.1296309898424039,0.3600872098695898,-1.0452902453886066
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Gemma-2B-rewardmodel-baseline,ContextBased,0.7655099015493083,0.1051583412914912,0.4472556970902025,-1.067924157686317
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Specialized Summaries,1.3887258314528177,0.5534762053748473,0.8228635146864711,2.777906539090569
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Content Categorization,1.2107652157368989,0.1946632408952328,0.7717427417367259,2.456979992742883
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Standard Summaries,1.1895129965506943,0.2464848210453403,0.5745510287576094,2.3824498565917094
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,ContextBased,1.0875120949015755,0.2393186373459638,0.7385938062849047,2.3266635814466485
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Logical Deduction,1.033892149617591,0.325103524761086,0.8462156326361144,2.1044055194107436
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Professional Content Generation,1.1113294310673745,0.3083012934008589,0.5776765371976711,2.0992048700324157
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,GeneralExcerpt Language Translation,0.8761979938475971,0.2430146829695502,0.8924868721049174,1.981417258925675
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,OptionBased,1.1907168014490133,0.2312916408210517,0.7491374130544378,1.906895493210549
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Casual Conversation,1.0502728966451786,0.2073455068806564,0.6714605088854418,1.7396896244930034
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Hypothetical Scenarios,0.8246738584266396,0.2952913841778748,0.7545002780153599,1.7085393059520109
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Development and Implementation,1.0949899359210378,0.1594913294857517,0.6733836999447995,1.5948685653462096
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,General Character,1.087212081219752,0.3152683956075201,0.6649204868830397,1.538212339644971
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Data Management,1.0644091663156208,0.2787809079452075,0.6492438771876262,1.5052714751312761
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Critical Thinking,1.2855286252474258,0.2572758109557735,0.6172653998292951,1.3601514150938776
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Technical and Scientific Translation,1.015425503893251,0.5935923408662002,0.7763690547140826,1.2143302860190321
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Documentation,1.2143541294057174,0.4476405922195862,0.9063703833291856,1.2060423012267898
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Idea Development,1.141008284541808,0.1915188674684512,0.6896071319491491,1.094738600501913
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Creative Writing,1.1679063862028365,0.1909936142209121,0.5994570720813873,1.082446076395664
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Factual,1.0393974006790605,0.1860616076183004,0.618190532374585,1.035228731879457
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Personal Opinion and Advice,0.7416525723239082,0.1714571404634879,0.9999999999991313,0.9716280781899831
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Analytical Reasoning,1.0964225012517472,0.1817068102800864,0.6963453005928775,0.9441112023680387
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,General Explanation,1.0068796677403675,0.193327044728126,0.6637557305162177,0.8513383472743589
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Human Decision Making,1.012197410250697,0.2134043867911914,0.6673449079589581,0.8351459201829858
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Discussion,1.2040700889837666,0.1772332765006685,0.6841723184043611,0.7609465155345525
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Interpretative Analysis,0.8712622330428756,0.2660190459125679,0.7427532685596464,0.7505447735123454
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Supportive Conversation,0.921319515855201,0.145920960964538,0.9999999999992772,0.7022551753088189
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Quality and Optimization,1.00330575475564,0.1426858510701102,0.7780638536283403,0.6395866138867712
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Paraphrasing,0.977737088722194,0.1155035319620029,0.999999999998967,0.4278359542634429
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Problem Solving,1.2252858779371774,0.1227595863645657,0.5868909928587392,0.3712081252643029
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Technical and Practical Support,1.0053608484761338,0.2180598667112611,0.6723677854555946,0.24898156185453002
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,PostQuality Assessment Rewriting,0.986857504649642,0.1462485061344366,0.7589264587859886,0.08519034098572772
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Textual ExpansionReduction,1.0294341338142383,0.1162574853443047,0.8217358308455001,0.05583812603504473
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Quality and Compliance Assessment,0.9766195377573998,0.1213148047964318,0.7185751293847917,0.011453112134079596
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Specific Character,0.8000239842937797,0.1525012271457902,0.6764935096331334,-0.626248275290432
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Tone Adjustment,1.493807498597166,0.1284524828846163,0.6855500055031324,-0.6854788498996734
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Skywork-Reward-Gemma-2-27B,Literary and Cultural Translation,1.0771953744178548,0.1575100202497508,0.6088945233914825,-0.844965726696388
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Hypothetical Scenarios,1.3303331334982145,0.275482281855426,0.7179684189963755,2.6445842888995115
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Quality and Optimization,1.4758323064825554,0.2205908623278507,0.6816147945635264,2.1232659258889655
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Human Decision Making,1.2773313081396656,0.2613387139228523,0.6757706932146721,1.8568335841464372
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Problem Solving,0.8964957647452365,0.5846477541683137,0.481095343650894,1.8307439597928283
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Textual ExpansionReduction,1.1955784307217798,0.2499061810107422,0.5480066486277005,1.8247482082899644
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Logical Deduction,1.4520440253882827,0.2207644527877964,0.7085675749890398,1.8197035781806414
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,General Explanation,0.9838160424173866,0.3293994251656639,0.5609625042494693,1.7002255493821454
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Development and Implementation,0.7578836801744094,0.4289348103909596,0.6578166083771012,1.5946633345825536
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Data Management,0.9750691449244336,0.3713873805917358,0.5153757552165664,1.4836836775387527
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Content Categorization,0.6436666927114287,0.2353796293982367,0.8783169976135525,1.4689078335435926
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Discussion,1.0052984226600794,0.4568374793143709,0.5264657319909711,1.3253496673100336
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Documentation,1.6943877540524133,0.4110312125192368,0.6692820278414688,1.3172243876138932
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Supportive Conversation,1.0972807966959737,0.2246316991097096,0.99999999999869,1.2025967295470572
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Creative Writing,0.8982735716606791,0.3326913848341154,0.5041642261629734,1.1275361679562461
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,General Character,0.7948218282871502,0.3481974540114441,0.625419288056884,1.1077509040353961
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Idea Development,1.0782912822387725,0.3663868466914526,0.3901983196964045,1.0891391227439184
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,PostQuality Assessment Rewriting,1.2465466024132172,0.2189331738776563,0.7569070013175697,1.0819800774106783
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,GeneralExcerpt Language Translation,1.0825750752859462,0.1387025812979077,0.8615447926507902,1.0243510811716243
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Critical Thinking,1.04331353252664,0.2687768047605341,0.5706315157712629,0.8348105669479899
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,ContextBased,0.8932703462947099,0.2130551823200479,0.547088285664755,0.8196006942059916
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Technical and Practical Support,1.1751281843832555,0.2706695388317135,0.5721084559073893,0.7474755880106163
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Analytical Reasoning,1.209814006391561,0.1809056411454933,0.5881892528591023,0.7380701875194416
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Interpretative Analysis,0.9062048209624144,0.4125513179522785,0.3840785989029376,0.6703744998619899
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Factual,0.6912738690411988,0.3503123209773897,0.3380330509994907,0.5905724006651855
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Literary and Cultural Translation,1.7030870902902482,0.10862705551728,0.8931453203355437,0.5314041002436615
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Specialized Summaries,0.6317720054831991,0.1718682313387325,0.7973833501937967,0.3877504382793219
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Tone Adjustment,1.3889264130791337,0.1637787996354161,0.9020778828767496,0.1640759103539978
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Personal Opinion and Advice,1.188002524130696,0.1662172919224656,0.9999999999981788,0.08159164909131551
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Quality and Compliance Assessment,0.7714107571472614,0.1323122763064458,0.7035707343743487,-0.07055323754146008
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Casual Conversation,0.469144220483283,0.1942865898342853,0.4635543909022405,-0.08416151824200896
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Paraphrasing,0.9578202630683302,0.1114896687934163,0.9999999999980694,-0.1467746540614555
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Professional Content Generation,0.7950754883144459,0.2065428560440147,0.4463835022713847,-0.2785860840126026
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Specific Character,0.8551535537447938,0.14484877924277,0.733279935456371,-0.3555488335004133
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,OptionBased,1.1665299841476828,0.1139318771075693,0.4098305539637435,-0.3680760114716892
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Technical and Scientific Translation,1.0238374718963932,0.1475420844551672,0.7541988231755412,-0.8063543144634525
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Skywork-Reward-Llama-3.1-8B-v0.2,Standard Summaries,0.6936989463711201,0.1186569913790269,0.5825981160368892,-0.843706184346691
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Quality and Optimization,1.4660566111896185,0.3273896197406978,0.7671092932920991,3.5224302781898853
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Hypothetical Scenarios,1.653886387970841,0.3399089831869918,0.6365647274471572,3.2609635490547357
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,ContextBased,1.032481276589598,0.2824402797202154,0.5463514222888988,2.02121351762799
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Problem Solving,1.1556060339790457,0.5880821485046459,0.3042419944065243,2.0137654298651486
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,General Explanation,1.1358404493103282,0.3121566563366246,0.4616759103669589,1.9471273659229023
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Data Management,0.9674315593636444,0.5900219727062197,0.4625489175548086,1.9290698007934068
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Critical Thinking,1.0426105933243504,0.45653843139791,0.5555796965577807,1.829506413376196
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Interpretative Analysis,1.0138305058023065,0.5364221272323001,0.4601963659229678,1.7156804881681884
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Human Decision Making,1.4041855315864376,0.2662644951548044,0.5588444704015757,1.694976820007005
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Technical and Practical Support,1.425443352448687,0.313885814096714,0.5503076477230461,1.6040662452911842
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Supportive Conversation,1.4883070745490277,0.1947971797322731,0.9999999999986108,1.6018708822128902
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Tone Adjustment,1.7101026777880044,0.2819258897756057,0.9267497110511966,1.5608534212108516
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,PostQuality Assessment Rewriting,1.3682584180848445,0.2468224934387223,0.6992660153205266,1.35355432735149
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Personal Opinion and Advice,1.5023938714812468,0.3040625765607154,0.9999999999982446,1.3486484332231992
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Documentation,1.6998402815096774,0.4755803580963607,0.5956730941996524,1.2974238454731868
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Discussion,1.1049747217283183,0.5551386530888586,0.4733208503368919,1.2932008415779495
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Specialized Summaries,0.7479637573213036,0.2225568229095542,0.907009199757144,1.2920093117860976
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Textual ExpansionReduction,1.1763173560598916,0.2235035214994549,0.4727592477364087,1.2901569421168009
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Development and Implementation,0.7864343808698768,0.526523220567805,0.4766877022148222,1.2891728042019657
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Logical Deduction,1.716089075617029,0.1812319773606463,0.5553152586320529,1.1539044965922958
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Idea Development,1.1454805351000517,0.4291180277905357,0.3419902069604532,1.107917729205826
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Creative Writing,0.8904984933859799,0.3381012614465508,0.5037072315167013,1.0973353929455096
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Analytical Reasoning,1.1308096062368116,0.2360470885618116,0.5202867288096631,0.9220615431969574
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Literary and Cultural Translation,1.76734869784398,0.1017730614132051,0.9296373268233056,0.6800969590100753
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Content Categorization,0.8464033501934731,0.1721704095226055,0.6618157777925866,0.6732616407989733
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,GeneralExcerpt Language Translation,1.1408438661547242,0.1277069884258792,0.593372077031581,0.44653126466145204
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Paraphrasing,1.240111544149328,0.1325478585669474,0.9999999999978348,0.44587674134683186
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Professional Content Generation,0.9316615233561074,0.2002651803256547,0.5643936943253638,0.4231245180658752
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Factual,0.8760560904602726,0.2395276353547791,0.3845055325712607,0.30621430587971776
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,General Character,0.8546019227304531,0.2375397852772664,0.5520061908450733,0.11626666241175876
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Specific Character,0.8434363308613037,0.148155936879519,0.8456731467005328,0.10535670135621462
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Technical and Scientific Translation,1.0415482749596017,0.1372954436435389,0.8755765466218131,-0.41182254115771477
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,OptionBased,0.896545976907137,0.1521274173144279,0.3137303859930342,-0.5390007419661254
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Quality and Compliance Assessment,0.8899596303160932,0.1145063041972214,0.6209414239772396,-0.5599052641408946
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Casual Conversation,0.4557123531026111,0.1285179992402864,0.212115659388295,-1.042758504655538
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Skywork-Reward-Llama-3.1-8B,Standard Summaries,0.5540646807003051,0.1663351642910013,0.3243097703736289,-1.376142617034923
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,PostQuality Assessment Rewriting,1.5620128699873626,0.2870934892372553,0.6197734721382084,1.7682557606015274
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Specific Character,0.97610113898479,0.3109684297367679,0.8040032059194011,1.5041577925515508
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Tone Adjustment,1.6824150360579735,0.1679576255093037,0.9796465169221586,0.8641377223862947
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Personal Opinion and Advice,1.6304669754446903,0.158558996828517,0.9999999999983992,0.8106678761900923
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Casual Conversation,0.824988432687619,0.1478251588285007,0.5734440406578508,0.5849665623854657
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Specialized Summaries,0.9972413280782072,0.1113064343407308,0.9748611887507104,0.5764017422536983
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Standard Summaries,0.9015587933186174,0.1315327705477902,0.7755336174347093,0.5362822035970973
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Supportive Conversation,1.285920909645594,0.1286420017897673,0.9999999999982218,0.5231723159235337
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Quality and Compliance Assessment,1.1196589231705445,0.113828058498702,0.860490456097708,0.5201188459293653
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Problem Solving,1.2457264417618443,0.1323064872921724,0.5195647590948601,0.4577981245905639
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Technical and Practical Support,1.2161897504950658,0.1646110113191392,0.7224910669460046,0.43392543697768365
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Factual,0.8648388847933055,0.1808379669382999,0.569518425164434,0.4055565867274007
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Critical Thinking,1.219836317742535,0.1290863683392139,0.6933352395534342,0.3702790141972184
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Creative Writing,1.033289147241986,0.1287207417875354,0.704416996398379,0.36129664952488183
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Interpretative Analysis,1.0961966013544855,0.2209209277828913,0.4899609269134096,0.30895457298969303
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Idea Development,1.1066563853540674,0.1895666602523965,0.5403088846799481,0.3011639452170732
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Logical Deduction,1.1438239339222978,0.1257406120436928,0.8288073745750396,0.2157841007737803
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Discussion,1.2171815228113356,0.0961842176931372,0.7671054765939874,0.15126424573248107
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Content Categorization,0.7773816833809183,0.0993288558252962,0.9627755353379858,0.08822243393682433
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Documentation,0.5769522176412107,0.3633483603801882,0.7901905849003678,0.044105781496932325
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Technical and Scientific Translation,1.454891391075736,0.1166654325174583,0.8953215308787852,0.015168278766743995
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,General Character,0.997060510029006,0.1282263379135274,0.6400507337023104,-0.07871336461184647
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Literary and Cultural Translation,1.7622334090716123,0.0828765878944434,0.8310944463688744,-0.08355401183621125
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Human Decision Making,1.0392431373910986,0.1264057751388676,0.6478839159541525,-0.08421281429156668
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Data Management,0.6949336873222799,0.1942382950400489,0.5804992541084051,-0.17192022208881688
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Hypothetical Scenarios,0.979052123110273,0.1102164867456136,0.6873449893369715,-0.17984467754694805
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Paraphrasing,1.1082342430873544,0.0774523781072817,0.9999999999981868,-0.18499561513348373
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,General Explanation,0.9994887579844076,0.1376529149594244,0.5563575826049145,-0.23987911786630978
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Textual ExpansionReduction,1.1850655522719569,0.110950347086703,0.4767223066867557,-0.3171758634706675
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,GeneralExcerpt Language Translation,0.8726824445331456,0.1097850860413442,0.6345917355240357,-0.4025409223854871
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,ContextBased,0.7824361981298267,0.0987399522610995,0.6674639009556564,-0.40611887471500313
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Analytical Reasoning,0.8906954462976816,0.124878632105344,0.6322352647570885,-0.41962044085798594
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,OptionBased,0.8523939042561526,0.0950764813565742,0.6674184291985024,-0.6128587008660368
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Professional Content Generation,0.8959077048667751,0.1224499744862973,0.5066094426406982,-0.6579464324687774
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Quality and Optimization,0.6361439080399066,0.1572414502073591,0.5949634460651645,-0.728497592060422
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Skywork-Reward-V2-Llama-3.2-3B,Development and Implementation,0.7087192805652875,0.1196000030826651,0.6980452736387838,-0.7736923984607637
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Casual Conversation,0.8274012038656968,0.2047455285573021,0.7563266340526077,1.334595266287263
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,General Explanation,1.0377781519257727,0.1837808739541807,0.6819817502428593,0.9800515943164022
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,PostQuality Assessment Rewriting,1.3220380427594345,0.2204208469452953,0.6675023069188467,0.9559644849388467
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Problem Solving,1.1398515555252624,0.1587818581524355,0.4871591129994327,0.6259420467799628
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Textual ExpansionReduction,1.1734268857502026,0.1424312289086516,0.698074880824994,0.4742583304218687
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Logical Deduction,1.243187486230743,0.1274917411449545,0.8687578578316894,0.4299531154836297
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,ContextBased,0.9057842906041966,0.1168210694743059,0.7158923449294208,0.34153636401039
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Documentation,0.6304439863938716,0.3667541158318503,0.9012602737303612,0.32143275302798385
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,OptionBased,0.9831069023966608,0.1211574410596917,0.8428508757739727,0.31819954917720195
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Personal Opinion and Advice,1.468362064963969,0.2345027549419552,0.9999999999978308,0.3096075891798524
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Tone Adjustment,1.49864358630917,0.1416980867151631,0.9452810634483167,0.3001659520320702
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Interpretative Analysis,1.0860602491306288,0.1826351163324326,0.5706077880835592,0.29480742991611225
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Critical Thinking,1.125954192760077,0.1347388270077713,0.7086140811219368,0.2490990156373138
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Human Decision Making,1.1295807824439432,0.1298701314830354,0.677932523242687,0.23607963164613677
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Technical and Practical Support,1.2289266686926208,0.1228921718144896,0.7605932975182071,0.2340289376848395
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Data Management,0.8266049509749869,0.1527505878117936,0.6932071865208508,0.2243159838261695
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Specialized Summaries,0.8168646021752652,0.1350313729518707,0.7302865229994274,0.12228583210601998
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Paraphrasing,1.375178956402497,0.0675208395963211,0.9999999999980644,0.1086313147480315
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Supportive Conversation,1.114339182499945,0.142299266534396,0.9999999999978474,0.09015377396769747
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Standard Summaries,0.8160637164317751,0.1367841160376266,0.6762145252200552,0.05507576590589447
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Literary and Cultural Translation,1.4235853933980946,0.0826152151455,0.9418592753219313,0.03786498553692094
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Technical and Scientific Translation,1.5214647202098806,0.1304691200015317,0.8418010205634571,0.028501369017385314
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Content Categorization,0.918636720897189,0.1130405107477273,0.6584255362030264,0.02319535682413143
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Analytical Reasoning,0.9856020672358676,0.1222665808595133,0.6914977246038907,-0.00033560995145973105
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Idea Development,1.020298846727303,0.1456642006090426,0.6249620709082027,-0.022237222666271893
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Quality and Compliance Assessment,1.055329191618901,0.098864610265282,0.7944474333674219,-0.07729289208280857
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,General Character,0.9713711390705198,0.0945962008025786,0.7126440876035592,-0.07977616992500688
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Creative Writing,1.0064684361571323,0.1090495130402609,0.6591307231921963,-0.09822904876215668
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Development and Implementation,0.7247479464343837,0.1320048359796199,0.8309576410375219,-0.21083001054637174
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Factual,0.8193370510911635,0.1188206428939379,0.5519554188613996,-0.261713774588874
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Hypothetical Scenarios,1.0013083348413598,0.1006038905190372,0.6801988733381179,-0.2620077957354569
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Quality and Optimization,0.7137295280548285,0.1731066863131164,0.6288086449768454,-0.26376303548042596
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Professional Content Generation,0.942428661625612,0.10213826896855,0.63937594235439,-0.319077143642055
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Discussion,1.0240577342474118,0.1143029967189462,0.6044328328920846,-0.4028820679977034
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,GeneralExcerpt Language Translation,0.8782460158922301,0.0938744243685801,0.6956567829386074,-0.4736446465285309
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Skywork-Reward-V2-Qwen3-1.7B,Specific Character,0.8425168574651668,0.1125136134019009,0.7287058974231748,-0.6599854109713668
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,PostQuality Assessment Rewriting,1.4887104093430008,0.2360112763780417,0.556016863019178,1.0064623025702306
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Casual Conversation,0.7200832100360118,0.1858721172313448,0.6958441332731976,0.8342604592990063
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Tone Adjustment,1.5037768915975287,0.1977929251748547,0.9377005214715962,0.6939741552861647
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Quality and Compliance Assessment,1.3597690721245184,0.1076685029766231,0.8057975459868268,0.5020066294662806
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Technical and Scientific Translation,1.5413368257992612,0.1212473418084958,0.9577991509495076,0.4149047091579151
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Critical Thinking,1.0584795294621054,0.1570138536279348,0.7648764902680856,0.40551439480933255
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Problem Solving,1.0459675718804904,0.1413278636202606,0.613284686978858,0.3926002337673451
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Creative Writing,1.0440546860579167,0.1273870145832441,0.7045471968749829,0.3880973774917594
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Textual ExpansionReduction,1.2583540415855894,0.1185582990111268,0.7658180453413485,0.3781960637641709
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Personal Opinion and Advice,1.5178929463049915,0.1292401958160606,0.9999999999982916,0.36212823768540003
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Standard Summaries,0.8838675419493214,0.1233205362405703,0.771255610393404,0.3565220121215399
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Technical and Practical Support,1.1203655965773238,0.1586041377856525,0.7776857152726232,0.3485264317140918
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Specialized Summaries,0.9815229698495443,0.0940625048383673,0.949708342818264,0.3241364387760598
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Factual,0.770495972458602,0.1620732227651262,0.6570787477015051,0.3055310328854874
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,General Character,0.9649847284820507,0.1180001983783931,0.7605575897295788,0.26961622027202115
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Data Management,0.7715827482613641,0.1514283277752454,0.750079753400241,0.24901262864840526
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Supportive Conversation,1.142566300799061,0.1207264820819883,0.9999999999981796,0.19330599448271343
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Discussion,0.9704486843169708,0.1076343538859527,0.8296857847551963,0.1440995985514381
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Paraphrasing,1.3452660289854448,0.0729753143519993,0.9999999999980916,0.13000496720974364
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Interpretative Analysis,1.0870444890306448,0.1454456876554122,0.5557240199413166,0.06548554382582703
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Idea Development,0.9850199015838932,0.1525043110640215,0.6431283029774385,-0.0013767040927806535
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,OptionBased,1.0161514165537897,0.0874604255761672,0.8270691432620665,-0.0949494222723514
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Logical Deduction,1.2277438581815936,0.0968766392286518,0.8558107348540299,-0.10320564221828096
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Human Decision Making,0.9511774624713136,0.107227934194875,0.7315199932072509,-0.1245002260238316
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,GeneralExcerpt Language Translation,0.8689434233147124,0.1005453944501449,0.7630400915119168,-0.26130610160261447
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,ContextBased,0.8246743528841204,0.1010698930577984,0.6581151359031826,-0.2712520483195558
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Development and Implementation,0.752642371447913,0.1214839975270278,0.791745121560859,-0.2981007279774822
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Analytical Reasoning,0.8861394071638476,0.1118346572962192,0.6903243826436023,-0.34139903479386924
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Professional Content Generation,0.8850696122330037,0.1156306787177945,0.6121779325162785,-0.4334020294297374
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Content Categorization,0.7154914820132717,0.1187428513528571,0.6945885755588875,-0.46735862468988465
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Documentation,0.4318550282208165,0.2236949085528119,0.653327953693466,-0.6253828025613413
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Hypothetical Scenarios,0.8498506572743058,0.1111234309607691,0.6286925820489309,-0.6509480769590745
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Literary and Cultural Translation,1.460562515027182,0.0730297134415433,0.8181140019829172,-0.6558243006095283
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Quality and Optimization,0.7489208661159455,0.1381996735324387,0.5709553323096643,-0.7639118084374601
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,General Explanation,0.8679909906462402,0.1310422739818982,0.5674615745918864,-0.9294056322121163
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Skywork-Reward-V2-Qwen3-4B,Specific Character,0.848519256659852,0.0969168736414985,0.6621463092115911,-1.04534801918976
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Casual Conversation,0.8774348817969867,0.1057349440955345,0.9502557256902092,1.0171603400623461
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,PostQuality Assessment Rewriting,1.5468512670340766,0.2171669405081231,0.5524084618844555,0.8371703355303695
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Tone Adjustment,1.4822455372598298,0.2107230584691722,0.9268227523689058,0.7223319607610135
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Textual ExpansionReduction,1.3253332383978065,0.1339364844245143,0.7476065398427625,0.6822630234825692
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,General Character,1.025648253391287,0.1587779464004005,0.7505246063579265,0.6326387857012671
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Technical and Scientific Translation,1.6239867681542748,0.1289801721767071,0.9444498555943626,0.5467480689232395
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Data Management,0.7810911775809717,0.163860616010148,0.8096105252546093,0.5320385150588731
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Human Decision Making,1.1612952225186055,0.1328062561051046,0.7264295013588346,0.5122300233470384
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Quality and Compliance Assessment,1.406457675802783,0.102770811726905,0.8196676385902794,0.5091080829309939
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Specialized Summaries,0.9833238849718886,0.106888280256459,0.951266012585216,0.46598858286802464
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Critical Thinking,1.1714555387200467,0.1568287605928946,0.6824950714469327,0.4638264366101077
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Problem Solving,1.1454590105112747,0.1406869943627642,0.5398402723045107,0.4490193296152833
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Supportive Conversation,1.2444555790713472,0.1093009420616413,0.999999999998434,0.446783640905694
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Logical Deduction,1.356662486647031,0.1241061935363108,0.839618202015489,0.4363853514269057
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Paraphrasing,1.460756564891469,0.0716666706517163,0.9999999999981156,0.3317935533298043
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Personal Opinion and Advice,1.4808689782906026,0.0998862779845569,0.9999999999983966,0.2902319846282937
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Content Categorization,0.7893428195813827,0.1133244372592237,0.9112856094696205,0.2115608343433163
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Technical and Practical Support,1.251072316301583,0.1608538803855471,0.6229430540076174,0.1546773254554814
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Literary and Cultural Translation,1.623552978563132,0.0832253814611925,0.9058969823597288,0.1142498669621885
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Standard Summaries,0.8257796992720999,0.1291775014371237,0.7077440338259872,0.07345577468427959
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Idea Development,1.0301731608153442,0.1644507438468506,0.6000283240037276,0.07273202100191298
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Discussion,1.082012944910684,0.0969576125772004,0.7862643444509796,0.0412674162284314
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,ContextBased,0.8423538355728089,0.1108953775191163,0.7034011365852271,0.03147336887576819
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Hypothetical Scenarios,0.8780672506231865,0.1485633833965174,0.6793984880577721,0.006229296844960569
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,OptionBased,1.119215704223986,0.0867163103398042,0.7931712569140235,0.001661449408141863
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Specific Character,0.946617573746228,0.1514793625320651,0.7633867469543741,-0.009611047075267276
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Creative Writing,0.953997780656876,0.1515288253092291,0.6156113028549254,-0.03195739452254673
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Interpretative Analysis,1.051660205229966,0.1806275524774959,0.4700583290223086,-0.03714173479125438
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Development and Implementation,0.7740116109564075,0.1296895007475246,0.805985335126552,-0.05429219046707551
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Analytical Reasoning,0.8987349146989825,0.1243369773614857,0.6824742182325704,-0.20240898140311536
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Factual,0.8110954396234713,0.1179548158410955,0.5584021018612886,-0.26812749553612536
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Professional Content Generation,0.8820156998794579,0.1314251652183775,0.5948613132301117,-0.33403627720655804
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Documentation,0.4582591799910557,0.3695566805025546,0.6524445804993364,-0.3486022816648203
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,Quality and Optimization,0.8252702374967241,0.1595091094108073,0.5707101656373013,-0.37624484045901135
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,General Explanation,0.9436485249890708,0.1572579085477675,0.5331349765917943,-0.4062862848537718
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Skywork-Reward-V2-Qwen3-8B,GeneralExcerpt Language Translation,0.7726746955359263,0.0933130418280813,0.788055315589753,-0.5549821757958668
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Factual,0.9416079868858048,0.1603400852193817,0.6379293000922337,0.6513953968952314
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Content Categorization,0.8093675910823882,0.168763802369663,0.6468730484555733,0.46631102266761526
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Quality and Optimization,0.7317175366070724,0.1833804061548071,0.7541829824468377,0.36015256787408845
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Analytical Reasoning,1.1575929199266377,0.143069683364908,0.629593226551741,0.35912054770695157
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Standard Summaries,0.8380303803184799,0.1340513495793218,0.7479053174407019,0.2992247482528655
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Data Management,0.8350169393964343,0.2007707533803467,0.5842519283723815,0.23107303281592534
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,GeneralExcerpt Language Translation,1.0498366851856396,0.1037658040606502,0.739018037782758,0.19854245833545747
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Literary and Cultural Translation,1.4669191205269498,0.1020136884351753,0.8965238537485709,0.16484916329060406
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,General Explanation,1.0277107561476349,0.1256068014617931,0.6201989936351953,0.03907289453086753
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Tone Adjustment,1.3838778928403643,0.1057515436604392,0.9929190660446056,0.03877600480377974
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Specialized Summaries,0.8494497353914946,0.0997493135912917,0.8274415714186378,-0.025780785307837166
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Personal Opinion and Advice,1.2628475492676634,0.0775455462114711,0.9999999999983678,-0.11728026298265048
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Technical and Scientific Translation,1.508961691458336,0.1156905841733649,0.8373200514460938,-0.14658245211797713
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,OptionBased,0.9586850187891656,0.1351059103122827,0.5066352848462785,-0.24816273889579923
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,PostQuality Assessment Rewriting,0.7375260154852862,0.1101001302919599,0.8723964889311491,-0.2512766376321215
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Professional Content Generation,0.9169362518475904,0.1032060903330862,0.680336789233799,-0.2564423548004039
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Paraphrasing,0.9398035623820088,0.0940329571134118,0.999999999998257,-0.2629287787002866
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Interpretative Analysis,0.84311864941636,0.153095796490643,0.5962253029032601,-0.2629629701650161
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Specific Character,0.8699874142898881,0.0878782945788704,0.8619821477306013,-0.2682836269298803
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Hypothetical Scenarios,0.7849181590630132,0.1023072030800982,0.8171399862697155,-0.3169677640015636
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Logical Deduction,1.2526866112837514,0.1006441132071599,0.6961354480699827,-0.3408290865905343
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Discussion,0.6837247848731827,0.1191338579191458,0.7407863310207311,-0.37871241963852
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Technical and Practical Support,0.9257919721068544,0.1240268031455029,0.7763735325274445,-0.38539988444898676
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Human Decision Making,0.8926181216112726,0.128528196092158,0.6340858655353303,-0.3898189423734171
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Textual ExpansionReduction,0.9396488602811788,0.1055662182498866,0.7283945996382275,-0.39722575371988594
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Documentation,0.8124947835491615,0.1026943745507258,0.6067723387498607,-0.4320605957519672
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Supportive Conversation,0.8172326978892802,0.1115547713198793,0.999999999998134,-0.4588523843589903
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,ContextBased,0.7860920750046039,0.1204191313935927,0.5685225739873797,-0.4640523235792527
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Quality and Compliance Assessment,0.7807594025877655,0.10892963542249,0.7001729391254358,-0.5314629120004735
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Critical Thinking,0.907089918135954,0.1122361464154241,0.6389858423284748,-0.5466388282184459
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Creative Writing,0.905865301505856,0.1052209175686363,0.6435891701817216,-0.562298309934171
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,General Character,0.7630635622810857,0.1105743707631532,0.6530038031583636,-0.5660346474912002
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Development and Implementation,0.8024462970923232,0.1052329434492104,0.6080014846783945,-0.7043179111177054
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Idea Development,0.893663335805114,0.1154553506179936,0.6250625901577923,-0.732124599585063
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Casual Conversation,0.4431266175885666,0.1018908284910241,0.5264767321956837,-0.750283255583743
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Llama-3.1-8B-Base-RM-RB2,Problem Solving,0.7425804841241616,0.0918727430347383,0.4469639436733402,-0.8376811481141251
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Analytical Reasoning,1.0016756589190785,0.134522264804998,0.7732545061988612,0.5107327215381168
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Data Management,0.8544068755141104,0.1596767253911778,0.671351150439361,0.2731724015231372
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Textual ExpansionReduction,1.0036145837803605,0.1257008755776176,0.859270650652888,0.20996466374431316
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Quality and Optimization,0.8734671893841611,0.1555132984912299,0.7033536087648773,0.19649555195131702
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Literary and Cultural Translation,1.3706419836470258,0.0953091523323904,0.9399283429792744,0.15654269042774385
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Interpretative Analysis,0.9171864207052518,0.1148956969262454,0.7228728707881794,0.0026230159672193665
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Specialized Summaries,0.9065038733448828,0.0857569342844554,0.8755322170093405,-0.0062526677695971256
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,ContextBased,0.8048395424908155,0.0983104419251214,0.7710375864323746,-0.019607797118826098
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Paraphrasing,1.0020829114577434,0.0707787010954267,0.9999999999988568,-0.07268951505880289
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Casual Conversation,0.5171950605454831,0.1349924464313945,0.6629701887863859,-0.09189007084420155
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Professional Content Generation,0.9388916384253144,0.101803532815925,0.7151889171229802,-0.09859137007003249
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Documentation,0.8358164684194427,0.1473437176040796,0.7292598570017057,-0.1027126350704814
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Specific Character,0.9883840101296794,0.1015589520610724,0.8122444992910851,-0.13890745837590934
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Tone Adjustment,1.3749136600003866,0.1225572442321201,0.9066703964665944,-0.1428039781624113
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Creative Writing,0.9724027044640416,0.1059388590867329,0.6781535281236764,-0.18370283499938678
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Factual,0.7482805280920107,0.1255899495226486,0.6118718322113034,-0.1886156752619581
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Supportive Conversation,1.0511501294129526,0.1089447902644328,0.9999999999979146,-0.21873878396170587
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,PostQuality Assessment Rewriting,0.8883990365847773,0.1053237166260172,0.8449500349253455,-0.24718231092712795
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,GeneralExcerpt Language Translation,0.868292620726672,0.0878911134116379,0.8452120844573394,-0.2585035574651281
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Human Decision Making,0.9648271359494772,0.1015793507818272,0.704981741945537,-0.25918618175931385
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Logical Deduction,1.200917250507039,0.0974662761125138,0.7739775299794758,-0.28831830956300025
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Standard Summaries,0.7340794138977561,0.1039638600129565,0.7874120771602204,-0.3084410473326288
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Hypothetical Scenarios,0.8997349781280594,0.0939920789354243,0.7516110498831992,-0.3515307332991369
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Technical and Scientific Translation,1.1587207409033196,0.1078332060146691,0.9132981980847206,-0.3922835916001267
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Technical and Practical Support,0.9383680280184592,0.1020316397447393,0.8202101857684091,-0.39991433258578324
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Personal Opinion and Advice,1.2284884711397963,0.1161130803615698,0.9999999999980068,-0.4219920229105748
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,General Character,0.8216208085002594,0.0929572782882023,0.6847984506236575,-0.4650429618781321
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Content Categorization,0.7417187554746937,0.1064603982486341,0.7357120653297239,-0.46570276032004887
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Development and Implementation,0.8175930361398356,0.106263008209083,0.6638228221736965,-0.47738107460242424
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Discussion,0.6888452926388839,0.0928342178492414,0.7824318607828084,-0.5303001812754017
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,OptionBased,0.828033917158579,0.1083349970649227,0.614929297358813,-0.592962380064749
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Quality and Compliance Assessment,0.962639831792636,0.101250509533028,0.6524354888229986,-0.6260387055271734
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,General Explanation,0.8933778107525235,0.1169436406748593,0.6403862212330277,-0.649044934444107
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Problem Solving,0.8240262836599571,0.1070764362295308,0.3048972567919356,-0.6667069820780016
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Critical Thinking,0.8900392349959944,0.1139510003114948,0.5285768975652348,-0.8107898253678334
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Llama-3.1-8B-Instruct-RM-RB2,Idea Development,0.9012386798092844,0.1136037137004536,0.5940441002055789,-0.8595376907917396
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Technical and Scientific Translation,1.2310799967538455,0.170673788538888,0.9783210293403416,0.5552462852910693
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Documentation,0.7850099071615273,0.602339338475378,0.686707954356948,0.5189889213960842
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Hypothetical Scenarios,0.8585500019198653,0.1102398202512369,0.96883372158985,0.4175500215107063
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Quality and Compliance Assessment,1.0531341665727636,0.1261725684480266,0.7689671572209321,0.3711576538568533
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Textual ExpansionReduction,1.053324510910597,0.1255035837151133,0.8818142454261116,0.32555882934229663
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Logical Deduction,1.2845630234431968,0.1171893037254627,0.868085640222463,0.3072182366606935
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Specific Character,1.0964878886958276,0.0962723649105056,0.8824596912028406,0.3039298678016314
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,ContextBased,0.8583559253401133,0.1124760861812506,0.7518738124496033,0.2496420050965895
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Creative Writing,0.9959487559004506,0.1312468122594252,0.6948023941721986,0.21381802469019057
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Paraphrasing,1.0017299986319097,0.0820710805263577,0.9999999999991184,0.19310698120307282
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Tone Adjustment,1.5655191970998152,0.1071323046816682,0.9527624697393192,0.16394124600653032
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Personal Opinion and Advice,1.451722674480369,0.0846917685564285,0.9999999999983906,0.15815603197849978
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Idea Development,0.9843937349213694,0.1170008909929329,0.741641296274126,0.10756775807544944
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Development and Implementation,0.8047460024255393,0.1357028999208103,0.7773362165386198,0.10315157315333003
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Analytical Reasoning,0.9940900994251332,0.1449319154463744,0.6477576608080429,0.09946935373600346
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,OptionBased,1.0128856697257316,0.1022899053392908,0.8141620400924837,0.06463653409996159
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Content Categorization,0.7855942362868624,0.1193615183398311,0.8014825382797218,0.026061540634132654
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Data Management,0.7639600627422711,0.1889060098382023,0.5963459649017492,0.012986695254474379
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,PostQuality Assessment Rewriting,0.8383511086579832,0.1193376587431609,0.906992839799366,0.007205834771352282
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Critical Thinking,0.9010079859825356,0.1376886497249154,0.8057392671699706,0.0071075088956908394
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Professional Content Generation,0.8113669995886175,0.1276066465622217,0.7754708718774589,-0.022554012216135566
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Factual,0.8802492564329072,0.131569468174111,0.5396275796109394,-0.04903039473999603
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Casual Conversation,0.5776577429060981,0.1208172744614164,0.6637683583000447,-0.056417206577038435
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Specialized Summaries,0.6466258413145087,0.0924610499654926,0.989411539616328,-0.07546790986483831
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Discussion,0.768735466276746,0.1027118427626506,0.8405366154708328,-0.14040253982107548
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,General Explanation,0.9576410248509388,0.1325100092981395,0.6384963938519601,-0.16334802327643289
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,GeneralExcerpt Language Translation,0.9092005902291068,0.0921967622073666,0.7926005447268231,-0.20743109260076503
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Supportive Conversation,1.075520153598774,0.0965320561126314,0.9999999999978714,-0.2955175251762421
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Quality and Optimization,0.8175102697861522,0.1272313587826137,0.6761813549482668,-0.3218248417148691
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Technical and Practical Support,1.037380373435586,0.108722642352983,0.7268222508836089,-0.4265510941160173
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Standard Summaries,0.7334601817858593,0.1054620261922067,0.7251565685212054,-0.477878761264794
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Interpretative Analysis,0.8614891424305328,0.1032432071307337,0.5694232600984291,-0.5548246463147749
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Problem Solving,0.8835224325952632,0.1069678951453039,0.3274753494935611,-0.5636608095436703
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,General Character,0.855381328531222,0.1275313172179304,0.5802781988882708,-0.5698508306251822
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Literary and Cultural Translation,1.2191890740021163,0.089960212935178,0.8174315934678016,-0.6931151681170771
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Human Decision Making,0.8920389421226018,0.1016148444731034,0.6173379781226053,-0.7322410038231748
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Technical and Scientific Translation,1.250783218206179,0.1686933953627591,0.9707578330505569,0.5332163158562668
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Quality and Compliance Assessment,1.117134377507937,0.1233507615662574,0.7829789536044612,0.4405142658176751
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Hypothetical Scenarios,0.8657009652806777,0.1133852341691455,0.9590125152155508,0.4400027393055248
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Development and Implementation,0.8376029039251836,0.1439914781835645,0.7888156538783686,0.3987462375859784
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Documentation,0.7778220665461488,0.5409734272413046,0.6596395815844985,0.3571413513678481
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Textual ExpansionReduction,1.0614187591035162,0.1248579525965986,0.8744157475350123,0.3177385770407227
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Paraphrasing,1.0284203165262782,0.0813572650461909,0.999999999999122,0.23426638042053738
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,ContextBased,0.8415155818500936,0.1153715959573607,0.753776547832543,0.2314805857692253
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Creative Writing,1.0006349643226773,0.1267968923071408,0.696031605186873,0.1916230094063722
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Analytical Reasoning,1.009775986726195,0.1454194009166862,0.6594367913851913,0.1870640986315402
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Logical Deduction,1.296793412323177,0.1106885444639831,0.8483321780111346,0.17585201636050102
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Personal Opinion and Advice,1.4032308365173305,0.0838685816726898,0.9999999999984436,0.1757849595200226
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Tone Adjustment,1.5514282798311008,0.1067738099416542,0.9619176698968224,0.1728839818301662
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Professional Content Generation,0.8156522468727643,0.1431207732812137,0.7718295358600066,0.13776770451664208
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Specific Character,1.0588484424160023,0.0879815314516287,0.8710459527686891,0.12039052772632235
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Idea Development,0.992897319576437,0.1177128192672471,0.7300191791439403,0.09209933086532851
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,OptionBased,1.036852023048672,0.101272106572248,0.8051917564939626,0.07386763236037397
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Factual,0.860423044960916,0.1469520354429101,0.5512243263631779,0.0632184772204595
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Critical Thinking,0.920255364944226,0.1383957422808922,0.8065479220754159,0.055372548398068766
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Content Categorization,0.7883016366608202,0.121501577613362,0.7885078470070372,0.03557050957738889
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,PostQuality Assessment Rewriting,0.8389147569526387,0.1168055002376277,0.9131478331514888,-0.006259817159886494
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Casual Conversation,0.5754382288512414,0.1224796083079383,0.6775349048011915,-0.025821802917854905
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,GeneralExcerpt Language Translation,0.8945818890948318,0.1012311641732117,0.7933650156660107,-0.120254230492537
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Specialized Summaries,0.656079054777311,0.0918199861075436,0.9587531104876368,-0.12707487830194936
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,General Explanation,0.9553645942967446,0.1285050773873832,0.654683806129094,-0.1505825706136839
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Supportive Conversation,1.0899134271882414,0.0945554091439027,0.999999999997948,-0.23633052885630423
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Discussion,0.7652069217356203,0.1019359875987423,0.8037011708819273,-0.2662382715507845
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,General Character,0.8879223826963218,0.1268347033996985,0.6360302957213102,-0.296251864150087
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Quality and Optimization,0.8034642170014071,0.1294981108149282,0.6785634783799427,-0.320809382903049
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Data Management,0.7370249628027887,0.1583635594003856,0.5588425351549394,-0.3652694044462368
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Standard Summaries,0.7159361626811976,0.1079463967789196,0.7413013779418821,-0.44641378102060103
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Technical and Practical Support,1.0176165966809856,0.1067139575236926,0.7367182894670262,-0.45555043851208055
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Problem Solving,0.8903515989128594,0.1055937631926157,0.3123262697965801,-0.5871782690047872
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Literary and Cultural Translation,1.2279080193853191,0.0898953999535062,0.8316577449770487,-0.6154141407494431
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Interpretative Analysis,0.8567401319921228,0.1062075864663736,0.534086813590945,-0.638218112339318
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Llama-3.1-Tulu-3-8B-RL-RM-RB2,Human Decision Making,0.8750174098913553,0.1024870295265431,0.6326473221989733,-0.6962113470076667
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Technical and Scientific Translation,1.301980742465744,0.1782837769347535,0.929187718379731,0.5320878941321712
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,ContextBased,0.969541368553887,0.1237035136830104,0.6785789123175894,0.5102040259831165
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Specific Character,0.9997877312145188,0.1132129060190112,0.8976747050170937,0.32335278896594355
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Factual,0.8661299974422005,0.1591337760529748,0.5828595549740958,0.27750954901163394
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Analytical Reasoning,1.0102146609336184,0.1688760434662561,0.6114918548826928,0.26122808109281853
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,OptionBased,0.9478117095763936,0.1165628032630186,0.8445322761295179,0.20141499894492454
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Data Management,0.8441450831142878,0.2088277720191371,0.5413614008602433,0.17767790007050333
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Tone Adjustment,1.4478776072360144,0.1096892908973529,0.9980439502327854,0.17366990300940244
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Development and Implementation,0.8403721074204524,0.1537089781269418,0.6427501244732013,0.17091662405205688
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Paraphrasing,1.0309232985094343,0.0901350883587422,0.9999999999988172,0.16471909874596125
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,General Explanation,0.9906622128006212,0.1328722458423096,0.6735644059126314,0.15414137443712955
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Logical Deduction,1.5505896792257643,0.1043967014631201,0.7122826538984696,0.0667849107055184
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Specialized Summaries,0.6503627055872839,0.1111217124768834,0.9438960433613603,0.042525044367725584
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Personal Opinion and Advice,1.4867233383322185,0.0863746918560859,0.9999999999982582,0.020489361114539553
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Hypothetical Scenarios,0.8754778170146695,0.1064122672906183,0.8350798942889981,-0.003914721176942193
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Creative Writing,0.9346456721145008,0.1213422684248117,0.7097142543169116,-0.05375925730325068
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Critical Thinking,0.9275163957627446,0.16767758184327,0.6353476466389828,-0.0714729719920013
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,GeneralExcerpt Language Translation,0.901464551158019,0.1095434116408297,0.738593182871846,-0.10712068236809312
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Technical and Practical Support,1.1183870749489693,0.1240971347236921,0.7264755942175529,-0.11563073082206071
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,General Character,0.7954114188266752,0.1579096051244231,0.6571923255492076,-0.1484804974332264
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Quality and Optimization,0.8145918755737224,0.1884719004557629,0.5464962413910428,-0.19560731993191727
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Documentation,0.9620099393524548,0.2294768777170096,0.5155934534778222,-0.19717220949556544
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Textual ExpansionReduction,0.938848169393615,0.1093157532772469,0.7888981091606756,-0.24719507800129326
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Professional Content Generation,0.8069286296048963,0.1301367477972839,0.697272398622081,-0.2513483070947734
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Standard Summaries,0.796593301368223,0.1172632289111668,0.6796552471612891,-0.2640238407180655
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Literary and Cultural Translation,1.2108732936444575,0.0856762374495967,0.9221037991588013,-0.2656647451886849
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Discussion,0.7287143687079097,0.1028738126644859,0.7989440244969934,-0.31841484911644413
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Interpretative Analysis,0.8961082582030486,0.1357532646612932,0.5557633268358803,-0.35351170663572945
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Idea Development,0.9609330633883224,0.1175295159569088,0.6330568247676382,-0.4466395583958323
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Quality and Compliance Assessment,0.8734313058463182,0.1114393943479868,0.6450945454530432,-0.5579455377829613
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,PostQuality Assessment Rewriting,0.6980837266575873,0.1071377389796077,0.75720631805173,-0.5744472790615237
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Problem Solving,0.9841484537271188,0.103973374621578,0.1853792080605507,-0.6043085151584986
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Supportive Conversation,1.038680380143185,0.0688686194370233,0.9999999999977752,-0.6223261911282867
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Casual Conversation,0.4935856513235918,0.1000860284311143,0.451748969178059,-0.7700024235117099
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Human Decision Making,0.9333791699108336,0.1290805236934695,0.4817540594728534,-0.8939914227109864
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Content Categorization,0.6350216914633935,0.1169497085591918,0.5868790158722165,-1.013925016790794
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Quality and Compliance Assessment,0.7231271438117245,0.2015476234340492,0.7165324765421148,1.2792239151657796
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Hypothetical Scenarios,0.7065916268842541,0.2569566317788826,0.6658823337555217,0.8238633033242329
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Specialized Summaries,0.623234925733805,0.1787203254495397,0.9958163807417396,0.8224151800718799
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,GeneralExcerpt Language Translation,1.1549437944502214,0.1183469188685384,0.8005596803252051,0.802064846316648
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Literary and Cultural Translation,0.9668445973662708,0.2074743612924753,0.8174318347624127,0.7762539356778695
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,OptionBased,0.5075600398915058,0.2259959675984566,0.7475563481031156,0.6593094400617387
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Development and Implementation,0.8864527248787148,0.1463262652887187,0.7411735398963498,0.5545187004339152
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Paraphrasing,0.8514743437884181,0.119522213044767,0.9999999999993326,0.45180617236458365
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,General Character,0.8997866925301554,0.1121965799139517,0.8194748548202886,0.3472169462662902
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Interpretative Analysis,0.9264856731593496,0.0834278461547648,0.889848680511837,0.2804787976543055
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Casual Conversation,0.577970839969427,0.1301336082724983,0.7691670125663468,0.19256459711675247
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Standard Summaries,0.8617251224703972,0.1025332478648058,0.8054050806187636,0.1048251244332879
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Problem Solving,0.8264265864524977,0.12237190038606,0.8678141776408599,0.0929865843975366
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Professional Content Generation,0.9125152763184922,0.1109403790418658,0.7530829510907463,0.034657103475035256
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Content Categorization,0.7273609501876621,0.126641096681227,0.8085211701876448,-0.03877847918314567
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Factual,0.7331043355555702,0.1057736803129173,0.7071226593783912,-0.07708644891964606
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Technical and Scientific Translation,1.0765665418880492,0.1183770788141669,0.959935491284097,-0.22023355879829976
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Documentation,0.7320759591214515,0.0964057190922983,0.772143206929284,-0.24090943965882072
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,PostQuality Assessment Rewriting,0.6270127538795202,0.1372949276045968,0.7427118081833658,-0.3319814899440203
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Analytical Reasoning,0.7456893347097092,0.1386409652838371,0.6870774040140861,-0.35182937679404613
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Discussion,0.6601478187199816,0.0964262102667947,0.8398599637695155,-0.3530244396355792
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Specific Character,0.76120035586885,0.0937842918758289,0.8666001279891852,-0.4038525798511738
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Data Management,0.6524718617890882,0.1391039257852874,0.6331867771331751,-0.47851242861596177
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Critical Thinking,0.7547739733220556,0.0935560405751412,0.8621814090129075,-0.5301049658750254
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Supportive Conversation,0.6983335700849918,0.0776079835087196,0.9999999999985218,-0.6619932382372555
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Idea Development,0.8663819214173498,0.1028509045283619,0.6857989804820186,-0.6730497855845413
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Creative Writing,0.8471973650055765,0.1256459049654575,0.6117736282312864,-0.6932382445856011
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Quality and Optimization,0.570629590462846,0.1223663510796529,0.7342087228965546,-0.7005327433472863
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Textual ExpansionReduction,0.8493855754274205,0.0991690703859453,0.6812571358008815,-0.714800001686197
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Logical Deduction,0.6510204927839548,0.1127298900411968,0.7265126993203697,-0.7210402032107059
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,ContextBased,0.5735934489592918,0.1312444921968604,0.6471084084462119,-0.8028391133210299
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Technical and Practical Support,0.9415116110948496,0.1079271096872694,0.673999184418282,-0.8214167381831079
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Personal Opinion and Advice,0.9523153846517792,0.0873818163218236,0.9999999999979656,-0.9472946803951064
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Human Decision Making,0.6198509851656862,0.1068857476193437,0.5787078884198175,-1.3359423134669415
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,Tone Adjustment,1.0743312783935122,0.1082598002183219,0.5956320004767612,-1.7195083886266207
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,tulu-v2.5-13b-uf-rm,General Explanation,0.7084233011367221,0.0980306728340775,0.6559740779652701,-1.7233901439657668
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Supportive Conversation,1.0580142906931764,0.4613067101537768,0.9999999999992588,2.6727301795154927
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Development and Implementation,1.0965868048802965,0.2039441395387274,0.8517514800058785,2.593971790692093
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Textual ExpansionReduction,1.2014403592952936,0.2565788776929599,0.9180873182900132,2.5250005075607755
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Quality and Compliance Assessment,1.0323115944937913,0.406191948080057,0.8483779291894867,2.476984346766599
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,GeneralExcerpt Language Translation,0.9535116641509792,0.3197199692051468,0.8833843256285546,2.4417248039327037
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,OptionBased,1.077784458853439,0.2673539360655371,0.7420636256284479,2.1652999309325005
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Specialized Summaries,1.040246258396677,0.7556054202262005,0.6450287422722156,1.9674252493019038
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Problem Solving,0.8548750740509528,0.3334230414530251,0.6169733884969596,1.9075478628827978
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Data Management,1.0832330758258586,0.321056393541419,0.6351570758080431,1.7849877212795946
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Paraphrasing,0.7906626612837907,0.3328342871431665,0.9999999999988588,1.7716239292581861
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,ContextBased,1.2301434888071177,0.1671313443556633,0.6398640547178633,1.7135134537471386
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Personal Opinion and Advice,1.2933354402599535,0.248034836416902,0.9999999999987615,1.5069772333030471
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Discussion,1.2542069824427666,0.1958534718333121,0.8308479572576368,1.4780619602406444
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Interpretative Analysis,1.2630106545780333,0.1875245452819269,0.837417418188675,1.33878070582334
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Factual,1.1524619101852551,0.1595512630042885,0.6771734437848621,1.2917146047539325
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Casual Conversation,1.0586413433922055,0.1546538890475419,0.6334337392089532,1.2908469797836144
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Logical Deduction,0.879469576967812,0.1931567078791001,0.8847295001571721,1.1303358782584327
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Content Categorization,1.012196039427141,0.1622996717056171,0.6499396234128137,1.0384417719840693
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Standard Summaries,0.9224896401954694,0.2092333828326396,0.5588991095198625,1.024260167812934
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Critical Thinking,1.118580747560828,0.2222723992176989,0.752331993724826,1.023343078733887
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,General Character,1.0674228516618218,0.1937292603370136,0.7360669833522878,0.9025796381413788
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Quality and Optimization,0.9829161876612024,0.1798688122441294,0.7565028357501773,0.8929561471701047
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Analytical Reasoning,1.000089027815545,0.2303350303266219,0.5670669788147666,0.763899360448059
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,General Explanation,1.024831289836741,0.1756177200802295,0.6473224362962301,0.6809823355393295
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Idea Development,1.0191979739839008,0.1809455249939814,0.6987854232056832,0.6199903115049058
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Professional Content Generation,1.083713292914953,0.1550859863385842,0.6144959211939558,0.5617468811767343
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,PostQuality Assessment Rewriting,1.4886434243723274,0.1349830919773319,0.8433413373452088,0.545647118876645
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Hypothetical Scenarios,1.0452183522576102,0.1326135505218497,0.7866265590288443,0.541962160045719
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Documentation,1.3708055386637348,0.09914699006429,0.7395488752868898,0.5126673489922581
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Specific Character,1.1239517910034704,0.155517086318612,0.7442141598581469,0.27504105787907146
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Human Decision Making,1.2244519928069448,0.1095237098613475,0.6933564021064245,0.26360659140223786
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Creative Writing,1.090299654990766,0.1486677056117323,0.5527187302027916,0.20773111372834022
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Technical and Practical Support,1.1639982022563502,0.2065003997598009,0.563147867834581,0.14991466993694827
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Literary and Cultural Translation,1.2214506360001038,0.141061432506798,0.7993903682475855,-0.00645950719192413
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Tone Adjustment,1.4049392110432517,0.1896835151342177,0.7762958144281812,-0.05022901686903
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,QRM-Gemma-2-27B,Technical and Scientific Translation,1.070309695162802,0.1419086015347627,0.912566564267084,-0.18671221429637855
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Technical and Scientific Translation,1.4972086194173824,0.2805924697625375,0.937244980483901,1.7994584659291037
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Problem Solving,0.7957790845764939,0.2784937951669742,0.5215344614394039,1.7316328899787057
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Casual Conversation,0.5559473806815889,0.3529969601241031,0.652881265339337,1.658158143668556
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Quality and Compliance Assessment,1.160410091403137,0.1996275570561303,0.6036481374645412,1.3841360578067956
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Paraphrasing,1.229097584986027,0.1734495272039443,0.9999999999985174,1.2450619566869987
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Specific Character,0.6028668117070759,0.2872689998394734,0.9074621095638736,1.0376466356316982
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Documentation,0.8937034481045174,0.3956828532925313,0.9319434501428936,0.7596934455842171
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Quality and Optimization,1.216585736712564,0.1402276906578186,0.6853560294085569,0.7359709787801745
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Discussion,0.9383886205097434,0.2108057423257811,0.6703576850945029,0.7354646398503549
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Supportive Conversation,0.8490182756992916,0.2459962148354052,0.999999999998302,0.6861619208810372
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Specialized Summaries,0.6028668117070759,0.2209538674445278,0.6718968218288517,0.634391993921543
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Analytical Reasoning,0.9129180722387646,0.223817301578971,0.583330183088245,0.5660622108318465
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Development and Implementation,0.7803946180769805,0.2213621417678801,0.5581359477482809,0.4965926891947442
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,General Character,0.688406998882794,0.2574953426716289,0.6819884112567102,0.48643973098590393
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Interpretative Analysis,0.6714266333688083,0.1925727569674231,0.851233482668209,0.25781147834028284
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Professional Content Generation,0.8702756505720634,0.1537917823596056,0.7190108813245959,0.24437451598793616
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,GeneralExcerpt Language Translation,0.9743282663156608,0.1170158085605225,0.7584361972640569,0.22606602810084986
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Textual ExpansionReduction,0.5645013993991606,0.2069561077611138,0.5069116814928706,0.09313601428894532
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,ContextBased,0.7955237407341784,0.1482644577098932,0.6070726178601967,-0.014866089744981378
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Content Categorization,0.7965451161034407,0.1359665242822473,0.6524983584514079,-0.04856610121834698
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Critical Thinking,1.1899023051905862,0.1250278385141574,0.5252948764607511,-0.09310656958441647
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Hypothetical Scenarios,0.9726046953800306,0.1594874448608962,0.5147461308912371,-0.16072832564262707
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Human Decision Making,1.024886347094145,0.1464129333629323,0.5801126310297076,-0.1659945296061425
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,General Explanation,0.7936086619168116,0.1953090281315987,0.6590542554523027,-0.22410469783347015
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Logical Deduction,1.1420253347564155,0.1247951477289331,0.587221375422849,-0.2919826535114895
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,PostQuality Assessment Rewriting,1.2506741396616934,0.1411786094154609,0.5019254938113517,-0.3428772030645446
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Tone Adjustment,0.9442615288830016,0.1691098006765496,0.9102052089040656,-0.38436099789671296
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Standard Summaries,0.7713937476353563,0.1201144294638033,0.6404048755293354,-0.4179140489203649
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Creative Writing,0.778862555023087,0.200322672202339,0.5456997424360315,-0.4505850412540673
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Literary and Cultural Translation,0.8829790067272634,0.1044087202363293,0.906515965576966,-0.45096777161903295
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Personal Opinion and Advice,1.2253950992724512,0.1202160781937188,0.9999999999979192,-0.5225259602136931
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Idea Development,0.8348466924507771,0.1673369493479209,0.5996705736207854,-0.5745268016476403
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Factual,0.7044936609486754,0.1259588564267701,0.5217984608557004,-0.5849790293234804
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,OptionBased,0.8431453673260333,0.1257981855777651,0.4265346917730988,-0.73567743512746
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Data Management,0.7448379880345364,0.1362285123023459,0.4675995602059656,-0.7632869337207593
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,QRM-Llama3.1-8B-v2,Technical and Practical Support,0.8031840560036457,0.1664696650973898,0.5805016272026592,-0.9423808351002064
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,General Character,1.1003632129210004,0.0966089756296846,0.8640297590251651,0.7627981092017669
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Problem Solving,1.1480481809259802,0.1241422451913361,0.8033649120742735,0.49656455447057385
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Idea Development,1.0845550934689172,0.128028644998554,0.7222320339795153,0.46690830921913556
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Data Management,0.8982765122553442,0.123630983465517,0.7632513840550295,0.4235348311579831
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Human Decision Making,0.8472122783146949,0.1182678813365691,0.8937734984048297,0.41885263714991866
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Interpretative Analysis,1.1715261413058795,0.1445086840301396,0.5956506124818646,0.32502489287033237
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Quality and Optimization,0.8027997757722499,0.1625383247382933,0.7405216677037895,0.252835721703147
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Hypothetical Scenarios,1.0680278233405915,0.0964808146312572,0.7953377920582713,0.2029764178801896
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Creative Writing,1.015971870720835,0.1066886819016667,0.7133137803849705,0.11316864035888594
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Specific Character,1.0774592535395702,0.1214903695978094,0.7923719817900312,0.10781535947859203
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Critical Thinking,1.0215370413209317,0.1267902901326623,0.7642203283619519,0.08588364355138259
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Factual,0.8358642125978406,0.1010956421490847,0.6848455115158728,0.06652878868905726
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,ContextBased,0.8032055350987537,0.1093937311811226,0.7542996074484931,0.04311104769983056
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Documentation,0.8763659209811777,0.128991597681001,0.7922092611611505,0.033358185640867025
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Professional Content Generation,0.9603696475645612,0.1091100938350832,0.7080428946665804,0.01730570705798992
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Development and Implementation,0.867686630021572,0.091301978936427,0.823998124701742,-0.014348307694975155
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Casual Conversation,0.5226757425223805,0.0988289054971309,0.8603256267965749,-0.026051956427241618
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Technical and Practical Support,1.1639684614927068,0.1159927161114604,0.7164121949393317,-0.11482276961204185
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,GeneralExcerpt Language Translation,1.0374573218876584,0.094301980713526,0.6632008174544161,-0.12616821691510427
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,PostQuality Assessment Rewriting,1.197009806324106,0.121352326611557,0.6901433166744277,-0.17817366476566726
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Standard Summaries,0.8754240975038051,0.0871541157055839,0.7600538358719012,-0.20590700837688022
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Tone Adjustment,1.2175517845854005,0.1381173228357428,0.9054911782551872,-0.2501202395112587
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Literary and Cultural Translation,1.3706879934929874,0.0822609713195348,0.8949805684568981,-0.2557036416601737
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Specialized Summaries,0.8259247585266538,0.0900615291295563,0.7743143589161962,-0.2609537930062427
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Discussion,0.8605627498135642,0.1135229992024099,0.7017778968110922,-0.3274765009647645
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Paraphrasing,1.0927725445446976,0.0643214908035162,0.999999999998223,-0.3337025809251705
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Personal Opinion and Advice,1.050329459221137,0.1217736402137903,0.9999999999981491,-0.37709460391886884
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Quality and Compliance Assessment,0.8414772166144765,0.102353197282073,0.7587927953542235,-0.3880109505941261
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,General Explanation,0.9932683168604216,0.1257506170711326,0.5592909435796468,-0.38824139630868537
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Logical Deduction,0.9191537367894512,0.1179054508971483,0.7039699465149075,-0.40066134489736
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Content Categorization,0.7597661951686552,0.0920975642624658,0.8185121487003912,-0.4217877768360771
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,OptionBased,0.6965601082108266,0.1354130610081415,0.6172003084613639,-0.46335349729453656
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Supportive Conversation,0.9993291406215528,0.0919950588728362,0.9999999999973812,-0.7727184094700027
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Textual ExpansionReduction,0.9233201923128196,0.0872313432588755,0.54514820071845,-0.9836093018889592
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Analytical Reasoning,0.8460840694556356,0.110078419493263,0.5571066120668084,-0.9948347048187756
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,RM-Mistral-7B,Technical and Scientific Translation,1.0456351866876823,0.0977160530876218,0.8016020604315622,-1.0703755032182467
