rm_key,task,scale_task
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Analytical Reasoning,3.6398988281249998
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Casual Conversation,2.093593359375
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Content Categorization,3.7180828124999996
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,ContextBased,2.9738871093749997
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Creative Writing,4.08294140625
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Critical Thinking,3.7470398437499997
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Data Management,3.8295673828125
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Development and Implementation,3.4053468749999998
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Discussion,3.136046484375
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Documentation,3.205543359375
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Factual,3.83970234375
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,General Character,4.0192359374999995
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,General Explanation,4.3551375
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,GeneralExcerpt Language Translation,3.6804386718749997
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Human Decision Making,3.451678125
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Hypothetical Scenarios,3.379285546875
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Idea Development,4.080045703125
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Interpretative Analysis,4.511505468749999
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Literary and Cultural Translation,6.873313330078124
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Logical Deduction,3.42272109375
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,OptionBased,2.8609546875
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Paraphrasing,4.5419103515625
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Personal Opinion and Advice,4.207456640625
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,PostQuality Assessment Rewriting,4.132168359375
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Problem Solving,4.85898984375
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Professional Content Generation,4.329076171875
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Quality and Compliance Assessment,2.50767890625
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Quality and Optimization,2.3744765625
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Specialized Summaries,1.682403515625
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Specific Character,2.3397281249999997
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Standard Summaries,3.99027890625
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Supportive Conversation,3.848389453125
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Technical and Practical Support,4.219039453125
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Technical and Scientific Translation,3.419825390625
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Textual ExpansionReduction,4.24510078125
rb_CIR-AMS_BTRM_Qwen2_7b_0613/BTRM_Qwen2_7b_0613,Tone Adjustment,4.1423033203125
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Analytical Reasoning,2.0602927734375
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Casual Conversation,1.213299609375
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Content Categorization,1.33781484375
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,ContextBased,1.9958633789062499
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Creative Writing,2.5351880859375
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Critical Thinking,2.1934951171874997
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Data Management,2.0668081054687497
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Development and Implementation,2.800144921875
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Discussion,2.368323193359375
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Documentation,1.421790234375
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Factual,2.278918359375
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,General Character,1.726924951171875
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,General Explanation,2.0168572265624998
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,GeneralExcerpt Language Translation,1.9850044921874999
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Human Decision Making,2.5482187499999998
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Hypothetical Scenarios,1.7012255859374998
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Idea Development,2.234396923828125
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Interpretative Analysis,1.297275
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Literary and Cultural Translation,3.9533586914062497
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Logical Deduction,1.97486953125
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,OptionBased,2.2514091796874998
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Paraphrasing,0.45462539062499996
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Personal Opinion and Advice,2.024096484375
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,PostQuality Assessment Rewriting,2.481617578125
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Problem Solving,2.5359120117187497
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Professional Content Generation,2.17177734375
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Quality and Compliance Assessment,1.0048089843749999
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Quality and Optimization,2.2557527343749997
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Specialized Summaries,2.46134765625
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Specific Character,3.0318011718749998
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Standard Summaries,2.128341796875
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Supportive Conversation,2.3969182617187497
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Technical and Practical Support,1.7721703125
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Technical and Scientific Translation,2.556905859375
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Textual ExpansionReduction,2.3107710937499997
rb_HFXM_RAMO-Llama3.1-8B/RAMO-Llama3.1-8B,Tone Adjustment,3.4647087890624997
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Analytical Reasoning,3.2880708984375
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Casual Conversation,3.286623046875
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Content Categorization,3.520089111328125
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,ContextBased,3.3011015625
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Creative Writing,3.2583899414062496
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Critical Thinking,3.4002793945312497
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Data Management,3.28589912109375
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Development and Implementation,3.482263989257812
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Discussion,3.2191169677734375
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Documentation,2.8294639160156247
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Factual,3.2345003906249996
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,General Character,3.078132421875
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,General Explanation,3.2113347656249998
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,GeneralExcerpt Language Translation,3.024742895507812
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Human Decision Making,3.0538809082031246
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Hypothetical Scenarios,3.29313837890625
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Idea Development,3.2678009765625
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Interpretative Analysis,3.0969544921875
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Literary and Cultural Translation,3.5548375488281247
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Logical Deduction,3.1740525878906247
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,OptionBased,3.0600342773437497
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Paraphrasing,3.77672080078125
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Personal Opinion and Advice,3.856714599609375
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,PostQuality Assessment Rewriting,3.2424635742187498
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Problem Solving,3.2540463867187497
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Professional Content Generation,3.23667216796875
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Quality and Compliance Assessment,3.709033740234375
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Quality and Optimization,3.308702783203125
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Specialized Summaries,2.7393351562499997
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Specific Character,3.5881381347656247
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Standard Summaries,2.818605029296875
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Supportive Conversation,2.42297958984375
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Technical and Practical Support,3.3401935546874997
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Technical and Scientific Translation,2.84140869140625
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Textual ExpansionReduction,3.2033715820312496
rb_LxzGordon_URM-LLaMa-3.1-8B/URM-LLaMa-3.1-8B,Tone Adjustment,2.7105591064453125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Analytical Reasoning,1.659599853515625
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Casual Conversation,2.62133525390625
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Content Categorization,1.18723828125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,ContextBased,2.3585501953124997
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Creative Writing,1.572728759765625
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Critical Thinking,2.85588720703125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Data Management,1.3660479492187498
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Development and Implementation,1.7176948974609374
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Discussion,2.54170341796875
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Documentation,0.7514349609375
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Factual,2.5931021484374996
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,General Character,1.4775325195312499
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,General Explanation,2.209421484375
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,GeneralExcerpt Language Translation,1.1321294311523438
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Human Decision Making,2.46786298828125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Hypothetical Scenarios,1.3855939453125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Idea Development,2.1905994140625
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Interpretative Analysis,1.6259373046874999
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Literary and Cultural Translation,0.4589689453125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Logical Deduction,1.49490673828125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,OptionBased,1.3530172851562499
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Paraphrasing,0.20631884765625
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Personal Opinion and Advice,0.9092507812499999
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,PostQuality Assessment Rewriting,0.6442939453125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Problem Solving,3.55013203125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Professional Content Generation,2.21593681640625
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Quality and Compliance Assessment,0.6965975830078125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Quality and Optimization,0.7774962890625
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Specialized Summaries,1.172759765625
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Specific Character,1.22560634765625
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Standard Summaries,1.746108984375
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Supportive Conversation,1.1466984375
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Technical and Practical Support,1.5897410156249998
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Technical and Scientific Translation,0.9716893798828125
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Textual ExpansionReduction,0.7735146972656249
rb_PKU-Alignment_beaver-7b-v2.0-reward/beaver-7b-v2.0-reward,Tone Adjustment,0.3359015625
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Analytical Reasoning,0.7065515625
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Casual Conversation,0.440146875
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Content Categorization,0.38223281249999996
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,ContextBased,0.385128515625
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Creative Writing,0.5328093749999999
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Critical Thinking,0.5212265625
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Data Management,0.51543515625
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Development and Implementation,0.59651484375
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Discussion,0.43725117187499996
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Documentation,0.36485859374999996
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Factual,0.393815625
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,General Character,0.379337109375
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,General Explanation,0.42856406249999995
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,GeneralExcerpt Language Translation,0.35327578125
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Human Decision Making,0.46331249999999996
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Hypothetical Scenarios,0.46910390625
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Idea Development,0.52701796875
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Interpretative Analysis,0.35979111328124996
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Literary and Cultural Translation,0.9382078125
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Logical Deduction,0.671803125
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,OptionBased,0.32431875
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Paraphrasing,0.30115312499999997
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Personal Opinion and Advice,0.5328093749999999
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,PostQuality Assessment Rewriting,0.34748437499999996
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Problem Solving,0.5168830078125
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Professional Content Generation,0.5443921875
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Quality and Compliance Assessment,0.37065
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Quality and Optimization,0.3590671875
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Specialized Summaries,0.57334921875
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Specific Character,0.4517296875
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Standard Summaries,0.5675578124999999
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Supportive Conversation,0.06370546875
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Technical and Practical Support,0.36485859374999996
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Technical and Scientific Translation,1.2062413330078123
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Textual ExpansionReduction,0.344588671875
rb_RLHFlow_ArmoRM-Llama3-8B-v0.1/ArmoRM-Llama3-8B-v0.1,Tone Adjustment,0.6833859375
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Analytical Reasoning,5.9246085937499995
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Casual Conversation,3.0578624999999997
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Content Categorization,3.0462796874999998
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,ContextBased,4.2045609375
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Creative Writing,4.4883398437499995
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Critical Thinking,5.3628421875
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Data Management,5.090646093749999
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Development and Implementation,3.7759968749999997
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Discussion,4.329076171875
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Documentation,4.071358593749999
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Factual,3.521175
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,General Character,3.8339109375
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,General Explanation,4.8184499999999995
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,GeneralExcerpt Language Translation,3.9149906249999997
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Human Decision Making,4.9921921875
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Hypothetical Scenarios,5.5018359375
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Idea Development,4.80107578125
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Interpretative Analysis,4.870572656249999
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Literary and Cultural Translation,10.459279687499999
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Logical Deduction,8.3280421875
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,OptionBased,6.83965078125
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Paraphrasing,2.942034375
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Personal Opinion and Advice,4.8068671875
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,PostQuality Assessment Rewriting,5.073271875
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Problem Solving,4.656290625
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Professional Content Generation,3.6659601562499997
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Quality and Compliance Assessment,3.1852734375
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Quality and Optimization,4.6678734375
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Specialized Summaries,3.3590156249999996
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Specific Character,3.683334375
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Standard Summaries,3.9381562499999996
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Supportive Conversation,2.45555625
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Technical and Practical Support,5.8724859375
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Technical and Scientific Translation,5.478670312499999
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Textual ExpansionReduction,5.26438828125
rb_Ray2333_GRM-Llama3-8B-rewardmodel-ft/GRM-Llama3-8B-rewardmodel-ft,Tone Adjustment,7.25663203125
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Analytical Reasoning,3.99027890625
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Casual Conversation,2.69517568359375
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Content Categorization,3.13821826171875
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,ContextBased,4.614936364746094
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Creative Writing,5.713222265624999
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Critical Thinking,4.934278125
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Data Management,4.908216796875
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Development and Implementation,4.30953017578125
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Discussion,5.4721549804687495
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Documentation,3.6219816650390624
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Factual,4.8170021484375
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,General Character,6.741196875
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,General Explanation,5.707430859375
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,GeneralExcerpt Language Translation,5.029836328125
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Human Decision Making,6.0636023437499995
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Hypothetical Scenarios,3.6920214843749997
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Idea Development,5.8073326171875
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Interpretative Analysis,6.5775896484375
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Literary and Cultural Translation,7.8951345703125
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Logical Deduction,4.26175107421875
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,OptionBased,2.8833963867187498
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Paraphrasing,5.478670312499999
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Personal Opinion and Advice,4.5589226074218745
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,PostQuality Assessment Rewriting,4.53684287109375
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Problem Solving,7.328300683593749
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Professional Content Generation,5.4424740234375
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Quality and Compliance Assessment,3.6703037109374996
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Quality and Optimization,3.3459849609375
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Specialized Summaries,5.5090751953125
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Specific Character,4.559284570312499
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Standard Summaries,5.29841279296875
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Supportive Conversation,7.566472265624999
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Technical and Practical Support,6.483841259765625
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Technical and Scientific Translation,7.0307671874999995
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Textual ExpansionReduction,4.728683203125
rb_Ray2333_Gemma-2B-rewardmodel-baseline/Gemma-2B-rewardmodel-baseline,Tone Adjustment,4.8314806640625
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Analytical Reasoning,15.448576171874999
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Casual Conversation,14.55380390625
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Content Categorization,16.725581249999998
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,ContextBased,12.251719921874999
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Creative Writing,13.664823046875
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Critical Thinking,14.692797656249999
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Data Management,12.069290624999999
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Development and Implementation,15.5788828125
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Discussion,15.468846093749999
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Documentation,10.772015625
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Factual,14.962098046875
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,General Character,12.9611671875
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,General Explanation,14.915766796875
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,GeneralExcerpt Language Translation,10.452402392578124
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Human Decision Making,13.114639453125
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Hypothetical Scenarios,11.058690234375
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Idea Development,14.42639296875
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Interpretative Analysis,8.80076572265625
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Literary and Cultural Translation,13.34991533203125
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Logical Deduction,8.423600390625
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,OptionBased,13.563473437499999
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Paraphrasing,8.511919335937499
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Personal Opinion and Advice,5.559749999999999
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,PostQuality Assessment Rewriting,11.81084912109375
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Problem Solving,16.7719125
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Professional Content Generation,14.38006171875
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Quality and Compliance Assessment,15.2893125
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Quality and Optimization,14.779668749999999
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Specialized Summaries,16.34913984375
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Specific Character,12.917731640625
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Standard Summaries,14.04416015625
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Supportive Conversation,5.186204296874999
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Technical and Practical Support,13.2507375
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Technical and Scientific Translation,11.327990625
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Textual ExpansionReduction,14.981282080078124
rb_Skywork_Skywork-Reward-Gemma-2-27B/Skywork-Reward-Gemma-2-27B,Tone Adjustment,18.1965984375
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Analytical Reasoning,13.389731249999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Casual Conversation,8.131134375
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Content Categorization,11.188996874999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,ContextBased,11.090542968749999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Creative Writing,8.802937499999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Critical Thinking,11.605978125
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Data Management,10.181292187499999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Development and Implementation,8.9882625
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Discussion,9.36470390625
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Documentation,12.787424999999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Factual,8.8956
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,General Character,10.346347265624999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,General Explanation,11.779720312499999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,GeneralExcerpt Language Translation,10.447696875
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Human Decision Making,17.162108496093747
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Hypothetical Scenarios,22.569110156249998
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Idea Development,13.187032031249998
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Interpretative Analysis,11.2237453125
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Literary and Cultural Translation,28.285228125
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Logical Deduction,21.50349140625
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,OptionBased,14.802834375
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Paraphrasing,18.509334374999998
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Personal Opinion and Advice,18.567248437499998
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,PostQuality Assessment Rewriting,7.737318749999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Problem Solving,12.1967015625
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Professional Content Generation,12.9611671875
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Quality and Compliance Assessment,9.023010937499999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Quality and Optimization,8.223796875
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Specialized Summaries,24.0763236328125
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Specific Character,9.17937890625
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Standard Summaries,10.772015625
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Supportive Conversation,15.011325
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Technical and Practical Support,14.9534109375
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Technical and Scientific Translation,17.660893359375
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Textual ExpansionReduction,12.185118749999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B/Skywork-Reward-Llama-3.1-8B,Tone Adjustment,22.108693359375
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Analytical Reasoning,13.8067125
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Casual Conversation,5.8478724609374995
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Content Categorization,7.986349218749999
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,ContextBased,10.470862499999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Creative Writing,9.474740624999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Critical Thinking,11.7102234375
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Data Management,9.6253171875
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Development and Implementation,11.744971875
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Discussion,9.01649560546875
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Documentation,13.9804546875
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Factual,8.710275
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,General Character,9.483427734374999
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,General Explanation,10.74885
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,GeneralExcerpt Language Translation,11.7102234375
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Human Decision Making,15.298361572265625
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Hypothetical Scenarios,17.397384374999998
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Idea Development,12.3588609375
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Interpretative Analysis,10.01334140625
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Literary and Cultural Translation,26.0034140625
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Logical Deduction,14.883914062499999
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,OptionBased,16.8529921875
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Paraphrasing,12.51522890625
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Personal Opinion and Advice,15.14452734375
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,PostQuality Assessment Rewriting,10.3202859375
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Problem Solving,6.509540625
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Professional Content Generation,11.235328124999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Quality and Compliance Assessment,11.188996874999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Quality and Optimization,12.509437499999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Specialized Summaries,16.652826708984374
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Specific Character,9.85118203125
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Standard Summaries,11.015254687499999
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Supportive Conversation,10.401365624999999
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Technical and Practical Support,13.7024671875
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Technical and Scientific Translation,16.068256640625
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Textual ExpansionReduction,11.5943953125
rb_Skywork_Skywork-Reward-Llama-3.1-8B-v0.2/Skywork-Reward-Llama-3.1-8B-v0.2,Tone Adjustment,18.49196015625
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Analytical Reasoning,7.05972421875
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Casual Conversation,6.018899926757812
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Content Categorization,7.7199445312499995
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,ContextBased,7.690987499999999
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Creative Writing,7.572987597656249
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Critical Thinking,10.690935937499999
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Data Management,5.53006904296875
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Development and Implementation,4.0728064453125
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Discussion,9.9974150390625
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Documentation,3.698355834960937
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Factual,7.4148098144531245
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,General Character,10.193598925781249
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,General Explanation,9.21991875
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,GeneralExcerpt Language Translation,9.9713537109375
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Human Decision Making,9.51528046875
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Hypothetical Scenarios,8.026889062499999
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Idea Development,8.431744555664062
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Interpretative Analysis,7.93060693359375
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Literary and Cultural Translation,16.7139984375
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Logical Deduction,8.991158203125
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,OptionBased,5.866694531249999
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Paraphrasing,8.9535140625
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Personal Opinion and Advice,11.791303124999999
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,PostQuality Assessment Rewriting,9.3690474609375
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Problem Solving,11.745695800781249
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Professional Content Generation,7.662030468749999
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Quality and Compliance Assessment,7.7489015624999995
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Quality and Optimization,4.192978125
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Specialized Summaries,8.886550927734374
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Specific Character,5.640829687499999
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Standard Summaries,9.59636015625
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Supportive Conversation,10.33186875
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Technical and Practical Support,10.04591806640625
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Technical and Scientific Translation,10.30363564453125
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Textual ExpansionReduction,11.79057919921875
rb_Skywork_Skywork-Reward-V2-Llama-3.2-3B/Skywork-Reward-V2-Llama-3.2-3B,Tone Adjustment,12.480480468749999
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Analytical Reasoning,5.9086822265625
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Casual Conversation,4.485806103515625
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Content Categorization,5.3816642578125
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,ContextBased,5.91809326171875
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Creative Writing,5.4511611328125
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Critical Thinking,7.35436201171875
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Data Management,5.02114921875
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Development and Implementation,3.960688439941406
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Discussion,5.368633593749999
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Documentation,4.1017634765625
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Factual,5.0030510742187495
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,General Character,6.18739365234375
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,General Explanation,6.1461298828125
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,GeneralExcerpt Language Translation,6.256981018066406
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Human Decision Making,6.389006982421875
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Hypothetical Scenarios,5.14276875
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Idea Development,5.789958398437499
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Interpretative Analysis,5.81529580078125
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Literary and Cultural Translation,9.593464453125
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Logical Deduction,6.0925593749999996
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,OptionBased,4.24510078125
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Paraphrasing,8.284606640625
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Personal Opinion and Advice,9.956422741699217
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,PostQuality Assessment Rewriting,6.194451928710937
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Problem Solving,6.8034544921875
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Professional Content Generation,5.607529101562499
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Quality and Compliance Assessment,5.4265476562499995
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Quality and Optimization,3.2779359374999997
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Specialized Summaries,4.6258857421875
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Specific Character,2.554824572753906
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Standard Summaries,5.610062841796875
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Supportive Conversation,7.523036718749999
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Technical and Practical Support,6.599307421874999
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Technical and Scientific Translation,8.255649609375
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Textual ExpansionReduction,6.2083875
rb_Skywork_Skywork-Reward-V2-Qwen3-1.7B/Skywork-Reward-V2-Qwen3-1.7B,Tone Adjustment,8.9173177734375
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Analytical Reasoning,6.124412109374999
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Casual Conversation,5.090646093749999
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Content Categorization,5.2354312499999995
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,ContextBased,6.926521875
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Creative Writing,6.15626484375
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Critical Thinking,7.8415640625
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Data Management,4.748953125
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Development and Implementation,4.83582421875
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Discussion,6.112829296875
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Documentation,1.4015203125
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Factual,5.736387890625
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,General Character,7.3087546875
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,General Explanation,7.074202734375
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,GeneralExcerpt Language Translation,5.852216015624999
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Human Decision Making,6.996018749999999
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Hypothetical Scenarios,4.9574437499999995
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Idea Development,6.741196875
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Interpretative Analysis,6.495062109375
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Literary and Cultural Translation,12.347278124999999
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Logical Deduction,7.4477484375
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,OptionBased,6.636951562499999
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Paraphrasing,9.5094890625
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Personal Opinion and Advice,9.7643109375
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,PostQuality Assessment Rewriting,8.409121875
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Problem Solving,7.575159374999999
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Professional Content Generation,6.520399511718749
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Quality and Compliance Assessment,7.3203375
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Quality and Optimization,3.75283125
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Specialized Summaries,8.4366310546875
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Specific Character,3.9265734374999997
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Standard Summaries,6.71223984375
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Supportive Conversation,7.795232812499999
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Technical and Practical Support,8.01530625
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Technical and Scientific Translation,9.956875195312499
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Textual ExpansionReduction,7.790889257812499
rb_Skywork_Skywork-Reward-V2-Qwen3-4B/Skywork-Reward-V2-Qwen3-4B,Tone Adjustment,8.9173177734375
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Analytical Reasoning,6.9945708984375
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Casual Conversation,5.0689283203125
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Content Categorization,6.5254669921875
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,ContextBased,7.056828515625
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Creative Writing,6.238430419921874
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Critical Thinking,8.2817109375
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Data Management,5.443921875
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Development and Implementation,5.287553906249999
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Discussion,8.0616375
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Documentation,2.5564534057617188
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Factual,5.608434008789062
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,General Character,8.21800546875
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,General Explanation,7.560680859374999
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,GeneralExcerpt Language Translation,7.80030029296875
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Human Decision Making,8.70448359375
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Hypothetical Scenarios,6.53849765625
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Idea Development,7.355085937499999
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Interpretative Analysis,7.1480431640625
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Literary and Cultural Translation,13.9181970703125
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Logical Deduction,8.026889062499999
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,OptionBased,6.474792187499999
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Paraphrasing,10.650396093749999
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Personal Opinion and Advice,9.23729296875
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,PostQuality Assessment Rewriting,9.0215630859375
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Problem Solving,9.31837265625
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Professional Content Generation,6.474792187499999
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Quality and Compliance Assessment,8.674440673828125
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Quality and Optimization,3.5935675781249996
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Specialized Summaries,8.71679033203125
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Specific Character,5.01318603515625
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Standard Summaries,6.8686078125
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Supportive Conversation,7.6794046875
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Technical and Practical Support,9.894617578124999
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Technical and Scientific Translation,11.284555078124999
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Textual ExpansionReduction,8.495992968749999
rb_Skywork_Skywork-Reward-V2-Qwen3-8B/Skywork-Reward-V2-Qwen3-8B,Tone Adjustment,9.80123115234375
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Analytical Reasoning,6.0158232421874995
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Casual Conversation,2.9416724121093747
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Content Categorization,4.300119140625
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,ContextBased,5.72697685546875
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Creative Writing,4.957081787109375
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Critical Thinking,4.9994314453125
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Data Management,4.236413671875
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Development and Implementation,3.87807041015625
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Discussion,4.466984033203125
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Documentation,3.1635556640624998
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Factual,5.3324373046875
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,General Character,3.3908683593749998
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,General Explanation,5.5206580078125
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,GeneralExcerpt Language Translation,6.79331953125
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Human Decision Making,4.616746179199218
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Hypothetical Scenarios,4.683799804687499
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Idea Development,4.8618855468749995
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Interpretative Analysis,4.5983765624999995
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Literary and Cultural Translation,8.759501953125
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Logical Deduction,5.10874423828125
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,OptionBased,5.43451083984375
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Paraphrasing,4.68524765625
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Personal Opinion and Advice,5.9651484375
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,PostQuality Assessment Rewriting,4.727235351562499
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Problem Solving,6.6427429687499995
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Professional Content Generation,5.2832103515624995
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Quality and Compliance Assessment,2.5047832031249997
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Quality and Optimization,3.27431630859375
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Specialized Summaries,6.2753506347656245
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Specific Character,3.547236328125
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Standard Summaries,4.5013705078125
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Supportive Conversation,4.3783031249999995
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Technical and Practical Support,4.8618855468749995
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Technical and Scientific Translation,7.9544964843749995
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Textual ExpansionReduction,5.0949896484375
rb_allenai_Llama-3.1-8B-Base-RM-RB2/Llama-3.1-8B-Base-RM-RB2,Tone Adjustment,5.626351171875
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Analytical Reasoning,3.4647087890624997
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Casual Conversation,1.4572625976562499
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Content Categorization,2.36072197265625
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,ContextBased,2.9536171875
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Creative Writing,2.909819677734375
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Critical Thinking,2.5453230468749997
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Data Management,2.6365376953124997
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Development and Implementation,2.69227998046875
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Discussion,2.27095517578125
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Documentation,2.6625990234374997
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Factual,2.6186205322265623
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,General Character,2.468948876953125
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,General Explanation,3.10419375
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,GeneralExcerpt Language Translation,3.1874452148437498
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Human Decision Making,2.8877399414062497
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Hypothetical Scenarios,2.1340427124023438
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Idea Development,2.9706294433593747
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Interpretative Analysis,2.6697477905273437
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Literary and Cultural Translation,4.8474070312499995
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Logical Deduction,3.62324853515625
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,OptionBased,2.2292389526367185
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Paraphrasing,1.9082683593749998
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Personal Opinion and Advice,4.0583279296875
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,PostQuality Assessment Rewriting,3.2113347656249998
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Problem Solving,3.822328125
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Professional Content Generation,3.0853716796875
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Quality and Compliance Assessment,2.43601025390625
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Quality and Optimization,2.2702312499999997
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Specialized Summaries,3.0849192260742186
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Specific Character,2.008260607910156
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Standard Summaries,2.6249548828125
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Supportive Conversation,3.6239724609375
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Technical and Practical Support,3.1128808593749997
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Technical and Scientific Translation,3.618362036132812
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Textual ExpansionReduction,3.1389421874999996
rb_allenai_Llama-3.1-8B-Instruct-RM-RB2/Llama-3.1-8B-Instruct-RM-RB2,Tone Adjustment,3.8744507812499998
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Analytical Reasoning,3.9975181640625
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Casual Conversation,2.5062310546875
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Content Categorization,3.24391142578125
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,ContextBased,3.7412484375
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Creative Writing,3.3025494140625
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Critical Thinking,3.3242671875
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Data Management,2.875433203125
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Development and Implementation,2.580071484375
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Discussion,3.1056416015624997
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Documentation,1.358084765625
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Factual,3.72387421875
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,General Character,3.4603652343749998
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,General Explanation,3.7513833984374996
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,GeneralExcerpt Language Translation,3.162469775390625
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Human Decision Making,3.2701537353515624
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Hypothetical Scenarios,3.65510126953125
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Idea Development,3.7620613037109374
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Interpretative Analysis,3.1143287109375
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Literary and Cultural Translation,6.41108671875
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Logical Deduction,4.5129533203125
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,OptionBased,2.86674609375
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Paraphrasing,1.73525009765625
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Personal Opinion and Advice,4.717100390624999
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,PostQuality Assessment Rewriting,3.408242578125
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Problem Solving,4.0380580078124995
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Professional Content Generation,3.188169140625
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Quality and Compliance Assessment,2.8566111328124997
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Quality and Optimization,2.6582554687499997
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Specialized Summaries,2.6314702148437497
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Specific Character,3.28879482421875
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Standard Summaries,3.1295311523437497
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Supportive Conversation,4.5520453125
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Technical and Practical Support,3.70070859375
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Technical and Scientific Translation,3.9533586914062497
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Textual ExpansionReduction,3.35322421875
rb_allenai_Llama-3.1-Tulu-3-8B-DPO-RM-RB2/Llama-3.1-Tulu-3-8B-DPO-RM-RB2,Tone Adjustment,4.754382568359374
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Analytical Reasoning,3.8353587890624996
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Casual Conversation,2.221004296875
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Content Categorization,3.1403900390625
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,ContextBased,3.77237724609375
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Creative Writing,3.2229175781249997
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Critical Thinking,3.36480703125
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Data Management,2.661151171875
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Development and Implementation,2.5612494140625
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Discussion,3.028543505859375
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Documentation,1.3638761718749999
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Factual,3.7036042968749996
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,General Character,3.3952119140624997
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,General Explanation,3.7875796874999996
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,GeneralExcerpt Language Translation,3.40317509765625
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Human Decision Making,3.0882673828125
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Hypothetical Scenarios,3.4878744140625
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Idea Development,3.7021564453124998
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Interpretative Analysis,3.37277021484375
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Literary and Cultural Translation,6.222866015625
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Logical Deduction,4.578106640625
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,OptionBased,2.748022265625
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Paraphrasing,1.7381458007812498
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Personal Opinion and Advice,4.3182172851562495
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,PostQuality Assessment Rewriting,3.2895187499999996
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Problem Solving,3.89254892578125
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Professional Content Generation,3.199751953125
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Quality and Compliance Assessment,2.551114453125
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Quality and Optimization,2.591654296875
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Specialized Summaries,2.7263044921875
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Specific Character,3.3358499999999998
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Standard Summaries,3.1128808593749997
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Supportive Conversation,4.3667203125
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Technical and Practical Support,3.7825122070312496
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Technical and Scientific Translation,3.8006103515625
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Textual ExpansionReduction,3.269248828125
rb_allenai_Llama-3.1-Tulu-3-8B-RL-RM-RB2/Llama-3.1-Tulu-3-8B-RL-RM-RB2,Tone Adjustment,4.560732421875
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Analytical Reasoning,5.98179873046875
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Casual Conversation,3.6876779296874997
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Content Categorization,5.24773798828125
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,ContextBased,6.77015390625
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Creative Writing,5.139873046875
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Critical Thinking,5.6683388671875
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Data Management,4.873468359375
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Development and Implementation,5.089560205078125
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Discussion,5.3165109374999995
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Documentation,3.8049539062499997
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Factual,6.385025390625
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,General Character,4.68524765625
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,General Explanation,7.21247255859375
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,GeneralExcerpt Language Translation,5.593050585937499
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Human Decision Making,6.738301171874999
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Hypothetical Scenarios,5.79140625
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Idea Development,6.10197041015625
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Interpretative Analysis,5.10178776397705
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Literary and Cultural Translation,8.24117109375
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Logical Deduction,8.41491328125
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,OptionBased,4.7084132812499995
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Paraphrasing,4.042401562499999
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Personal Opinion and Advice,8.68131796875
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,PostQuality Assessment Rewriting,5.04431484375
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Problem Solving,7.282693359374999
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Professional Content Generation,5.9115779296875
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Quality and Compliance Assessment,4.702621875
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Quality and Optimization,4.52960361328125
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Specialized Summaries,4.535756982421875
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Specific Character,4.439112890625
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Standard Summaries,6.119797082519531
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Supportive Conversation,7.656239062499999
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Technical and Practical Support,6.873313330078124
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Technical and Scientific Translation,6.040029510498046
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Textual ExpansionReduction,5.5510628906249995
rb_allenai_Llama-3.1-Tulu-3-8B-SFT-RM-RB2/Llama-3.1-Tulu-3-8B-SFT-RM-RB2,Tone Adjustment,5.278866796875
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Analytical Reasoning,11.0847515625
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Casual Conversation,8.200631249999999
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Content Categorization,17.0383171875
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,ContextBased,9.5442375
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Creative Writing,16.100109375
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Critical Thinking,12.567351562499999
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Data Management,12.35306953125
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Development and Implementation,14.825999999999999
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Discussion,8.5133671875
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Documentation,18.9567205078125
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Factual,9.660065625
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,General Character,16.4823421875
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,General Explanation,8.779771875
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,GeneralExcerpt Language Translation,11.142665625
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Human Decision Making,8.8145203125
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Hypothetical Scenarios,11.25849375
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Idea Development,14.015203125
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Interpretative Analysis,9.428409375
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Literary and Cultural Translation,20.43208125
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Logical Deduction,10.470862499999999
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,OptionBased,11.015254687499999
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Paraphrasing,4.7142046875
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Personal Opinion and Advice,14.872331249999998
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,PostQuality Assessment Rewriting,16.095403857421875
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Problem Solving,11.953462499999999
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Professional Content Generation,15.49201171875
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Quality and Compliance Assessment,9.845390625
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Quality and Optimization,9.138839062499999
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Specialized Summaries,8.1543
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Specific Character,17.976525
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Standard Summaries,15.381974999999999
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Supportive Conversation,7.9689749999999995
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Technical and Practical Support,13.945706249999999
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Technical and Scientific Translation,16.34913984375
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Textual ExpansionReduction,11.47277578125
rb_allenai_tulu-v2.5-13b-uf-rm/tulu-v2.5-13b-uf-rm,Tone Adjustment,13.04803828125
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Analytical Reasoning,13.685092968749998
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Casual Conversation,13.6793015625
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Content Categorization,16.35493125
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,ContextBased,16.595274609375
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Creative Writing,15.439889062499999
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Critical Thinking,16.007446875
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Data Management,13.5866390625
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Development and Implementation,14.944723828125
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Discussion,15.590465624999998
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Documentation,19.5126955078125
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Factual,15.92057578125
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,General Character,15.4558154296875
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,General Explanation,14.536429687499998
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,GeneralExcerpt Language Translation,10.99788046875
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Human Decision Making,17.631936328125
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Hypothetical Scenarios,12.34148671875
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Idea Development,13.5866390625
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Interpretative Analysis,14.891515283203125
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Literary and Cultural Translation,14.385853124999999
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Logical Deduction,14.1773625
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,OptionBased,14.811521484375
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Paraphrasing,8.186152734375
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Personal Opinion and Advice,13.815399609375
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,PostQuality Assessment Rewriting,16.54025625
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Problem Solving,11.374321875
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Professional Content Generation,14.620767041015624
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Quality and Compliance Assessment,13.169657812499999
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Quality and Optimization,13.9109578125
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Specialized Summaries,9.451575
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Specific Character,19.583640234375
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Standard Summaries,13.5403078125
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Supportive Conversation,6.787528125
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Technical and Practical Support,14.38006171875
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Technical and Scientific Translation,16.575004687499998
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Textual ExpansionReduction,15.7164287109375
rb_nicolinho_QRM-Gemma-2-27B/QRM-Gemma-2-27B,Tone Adjustment,12.953927929687499
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Analytical Reasoning,1.1626248046874998
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Casual Conversation,0.7022080078124999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Content Categorization,0.7644656249999999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,ContextBased,0.7760484375
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Creative Writing,0.8397539062499999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Critical Thinking,1.0540359375
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Data Management,0.8368582031249999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Development and Implementation,0.63126328125
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Discussion,0.7340607421875
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Documentation,0.628367578125
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Factual,0.6544289062499999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,General Character,0.8093490234374999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,General Explanation,0.79342265625
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,GeneralExcerpt Language Translation,0.9237292968749999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Human Decision Making,0.93241640625
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Hypothetical Scenarios,1.201716796875
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Idea Development,0.7485392578125
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Interpretative Analysis,0.6906251953125
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Literary and Cultural Translation,0.8918765624999999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Logical Deduction,0.8223796874999999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,OptionBased,0.6602203125
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Paraphrasing,0.88608515625
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Personal Opinion and Advice,1.1930296875
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,PostQuality Assessment Rewriting,1.027974609375
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Problem Solving,0.599410546875
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Professional Content Generation,0.8759501953125
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Quality and Compliance Assessment,0.790526953125
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Quality and Optimization,1.062723046875
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Specialized Summaries,0.38078496093749997
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Specific Character,0.6602203125
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Standard Summaries,0.7094472656249999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Supportive Conversation,0.680490234375
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Technical and Practical Support,0.9411035156249999
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Technical and Scientific Translation,0.949790625
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Textual ExpansionReduction,0.79342265625
rb_nicolinho_QRM-Llama3.1-8B-v2/QRM-Llama3.1-8B-v2,Tone Adjustment,0.8412017578125
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Analytical Reasoning,3.6286779785156247
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Casual Conversation,2.4309427734374998
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Content Categorization,3.5720307861328124
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,ContextBased,3.4531259765624998
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Creative Writing,4.40726015625
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Critical Thinking,4.0150733642578125
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Data Management,3.5986350585937497
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Development and Implementation,3.4885983398437497
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Discussion,3.338745703125
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Documentation,3.7676717285156247
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Factual,4.016340234375
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,General Character,5.388903515625
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,General Explanation,4.4839962890625
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,GeneralExcerpt Language Translation,4.68524765625
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Human Decision Making,3.766223876953125
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Hypothetical Scenarios,4.633305981445313
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Idea Development,4.5339471679687495
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Interpretative Analysis,4.9675787109375
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Literary and Cultural Translation,7.23346640625
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Logical Deduction,4.2537878906249995
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,OptionBased,2.86674609375
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Paraphrasing,4.38409453125
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Personal Opinion and Advice,4.41884296875
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,PostQuality Assessment Rewriting,3.975800390625
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Problem Solving,6.8468900390625
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Professional Content Generation,4.2031130859375
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Quality and Compliance Assessment,3.338745703125
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Quality and Optimization,2.997052734375
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Specialized Summaries,2.4164642578124997
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Specific Character,2.1153111328125
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Standard Summaries,4.276953515624999
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Supportive Conversation,5.858007421875
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Technical and Practical Support,4.524445642089844
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Technical and Scientific Translation,4.6751126953124995
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Textual ExpansionReduction,5.6741302734375
rb_weqweasdas_RM-Mistral-7B/RM-Mistral-7B,Tone Adjustment,3.3474328124999997
