{
  "models": {
    "Skywork-Reward-V2-Qwen3-8B": {
      "model_name": "Skywork-Reward-V2-Qwen3-8B",
      "model_path": "Skywork/Skywork-Reward-V2-Qwen3-8B",
      "files": {
        "semantic_benchmark_chat_hard_filtered.json": {
          "file_name": "semantic_benchmark_chat_hard_filtered.json",
          "processed_samples": 200,
          "global_mean": 3.6301410418725295,
          "global_std": 1.700467977522258,
          "mean_variance": 0.6744472649426814,
          "mean_rvariance": 0.18701824293761476,
          "sample_results": [
            {
              "id": "chat_hard_sample_121",
              "variance": 2.4074255170420673,
              "mean_reward": 0.9709732732122905,
              "min_reward": -0.09490978013457887,
              "max_reward": 2.3125157369074882,
              "num_responses": 6,
              "rvariance": 1.2120947939256335
            },
            {
              "id": "chat_hard_sample_77",
              "variance": 2.1097133538658723,
              "mean_reward": 1.5354187324969628,
              "min_reward": 0.19912692423697131,
              "max_reward": 2.33089303093071,
              "num_responses": 7,
              "rvariance": 0.7775117252441578
            },
            {
              "id": "chat_hard_sample_265",
              "variance": 1.3893234281555746,
              "mean_reward": -0.00893029738307646,
              "min_reward": -1.2848395681381959,
              "max_reward": 0.5482955106781872,
              "num_responses": 7,
              "rvariance": 0.4614089393964594
            },
            {
              "id": "chat_hard_sample_334",
              "variance": 0.9694022597249545,
              "mean_reward": 0.48550642276551237,
              "min_reward": -0.10409842714618982,
              "max_reward": 1.0077278612587344,
              "num_responses": 6,
              "rvariance": 0.18446582889584076
            },
            {
              "id": "chat_hard_sample_61",
              "variance": 0.8012500194124743,
              "mean_reward": 1.438281606945647,
              "min_reward": 1.044482449305178,
              "max_reward": 1.9449698564430504,
              "num_responses": 7,
              "rvariance": 0.1118214154170025
            },
            {
              "id": "chat_hard_sample_133",
              "variance": 0.6615825848359878,
              "mean_reward": 0.5973016280734454,
              "min_reward": 0.2910133943530807,
              "max_reward": 1.3936510357463938,
              "num_responses": 6,
              "rvariance": 0.13606562406038034
            },
            {
              "id": "chat_hard_sample_62",
              "variance": 5.3620349636255655,
              "mean_reward": 1.1168430445216142,
              "min_reward": -2.9479846772397766,
              "max_reward": 3.5621717304865763,
              "num_responses": 4,
              "rvariance": 6.853329429317643
            },
            {
              "id": "chat_hard_sample_84",
              "variance": 2.1846008270105015,
              "mean_reward": -0.9662998050690167,
              "min_reward": -1.728191786448424,
              "max_reward": 0.6034273927478528,
              "num_responses": 6,
              "rvariance": 1.020957724895351
            },
            {
              "id": "chat_hard_sample_343",
              "variance": 0.8389234721600792,
              "mean_reward": 0.35763108518726006,
              "min_reward": 0.015353984004752448,
              "max_reward": 1.209878095514175,
              "num_responses": 4,
              "rvariance": 0.2433888625845987
            },
            {
              "id": "chat_hard_sample_87",
              "variance": 0.7571445137567417,
              "mean_reward": 0.026380360418685585,
              "min_reward": -0.24192813232035396,
              "max_reward": 0.9342186851658467,
              "num_responses": 5,
              "rvariance": 0.20855865674027182
            },
            {
              "id": "chat_hard_sample_110",
              "variance": 1.1127451531060861,
              "mean_reward": -0.584533399467562,
              "min_reward": -0.9770198932492294,
              "max_reward": 1.6325558580482784,
              "num_responses": 7,
              "rvariance": 0.8208456252737772
            },
            {
              "id": "chat_hard_sample_150",
              "variance": 1.1559317940606566,
              "mean_reward": 0.34351994870514335,
              "min_reward": -0.3797578374945181,
              "max_reward": 1.0077278612587344,
              "num_responses": 7,
              "rvariance": 0.24544676621810804
            },
            {
              "id": "chat_hard_sample_65",
              "variance": 0.7240653845149423,
              "mean_reward": -0.02271326790049289,
              "min_reward": -0.8483788350866762,
              "max_reward": 0.27263610032985885,
              "num_responses": 7,
              "rvariance": 0.13589637868448215
            },
            {
              "id": "chat_hard_sample_46",
              "variance": 1.0034002536679147,
              "mean_reward": 2.682686945089529,
              "min_reward": 1.9082152683966067,
              "max_reward": 3.451907966347245,
              "num_responses": 7,
              "rvariance": 0.20113243002335837
            },
            {
              "id": "chat_hard_sample_114",
              "variance": 1.080584888565447,
              "mean_reward": -0.11328707415780073,
              "min_reward": -0.5910967187615698,
              "max_reward": 0.6769365688407404,
              "num_responses": 4,
              "rvariance": 0.27710330967288443
            },
            {
              "id": "chat_hard_sample_371",
              "variance": 1.6594696502969364,
              "mean_reward": 0.6631535983233239,
              "min_reward": -0.27868272036679775,
              "max_reward": 1.7611969162108316,
              "num_responses": 4,
              "rvariance": 0.6007071214183894
            },
            {
              "id": "chat_hard_sample_223",
              "variance": 1.0842603473700914,
              "mean_reward": 1.2025271779048863,
              "min_reward": 0.4931636286085215,
              "max_reward": 1.7611969162108316,
              "num_responses": 5,
              "rvariance": 0.2328410796110584
            },
            {
              "id": "chat_hard_sample_52",
              "variance": 0.6450430202150882,
              "mean_reward": -0.2760573926491946,
              "min_reward": -0.7748696589937887,
              "max_reward": 0.024542631016363392,
              "num_responses": 7,
              "rvariance": 0.0786830176528908
            },
            {
              "id": "chat_hard_sample_372",
              "variance": 1.9123871592915276,
              "mean_reward": -0.49633879645433193,
              "min_reward": -1.6914371984019803,
              "max_reward": 0.8239549210265155,
              "num_responses": 4,
              "rvariance": 0.8267437163881818
            },
            {
              "id": "chat_hard_sample_346",
              "variance": 0.5329415266734348,
              "mean_reward": 1.805302421866564,
              "min_reward": 1.3936510357463938,
              "max_reward": 2.03685632655916,
              "num_responses": 5,
              "rvariance": 0.0614119022924038
            },
            {
              "id": "chat_hard_sample_108",
              "variance": 2.98079709056659,
              "mean_reward": 1.43224335319516,
              "min_reward": -0.6278513068080136,
              "max_reward": 2.6065524412790384,
              "num_responses": 5,
              "rvariance": 2.0659444851472943
            },
            {
              "id": "chat_hard_sample_64",
              "variance": 1.0383171123120367,
              "mean_reward": 1.4518458001532633,
              "min_reward": 0.566672804701409,
              "max_reward": 1.9817244444894944,
              "num_responses": 6,
              "rvariance": 0.2339777116443703
            },
            {
              "id": "chat_hard_sample_233",
              "variance": 0.7112012786986871,
              "mean_reward": -0.22092551057952892,
              "min_reward": -0.8391901880750653,
              "max_reward": 0.18074963021374943,
              "num_responses": 7,
              "rvariance": 0.0995737172417058
            },
            {
              "id": "chat_hard_sample_47",
              "variance": 0.5981809204558725,
              "mean_reward": -0.5336676749390015,
              "min_reward": -0.8300015410634544,
              "max_reward": -0.030589251053302267,
              "num_responses": 4,
              "rvariance": 0.09263689444903045
            },
            {
              "id": "chat_hard_sample_26",
              "variance": 0.8912987601262616,
              "mean_reward": -0.16710629236866484,
              "min_reward": -0.7840583060053996,
              "max_reward": 0.43803174653885585,
              "num_responses": 7,
              "rvariance": 0.14894703512506982
            },
            {
              "id": "chat_hard_sample_379",
              "variance": 0.87292146610304,
              "mean_reward": -0.1408530151926336,
              "min_reward": -0.6002853657731807,
              "max_reward": 1.2650099775838406,
              "num_responses": 7,
              "rvariance": 0.3428873024889335
            },
            {
              "id": "chat_hard_sample_247",
              "variance": 0.6432052908127661,
              "mean_reward": -0.06274951559394057,
              "min_reward": -0.3338146024364634,
              "max_reward": 0.5299182166549653,
              "num_responses": 4,
              "rvariance": 0.12117992846069696
            },
            {
              "id": "chat_hard_sample_215",
              "variance": 0.5917488675477447,
              "mean_reward": 0.6506832916647091,
              "min_reward": 0.25425880630663694,
              "max_reward": 0.9893505672355124,
              "num_responses": 7,
              "rvariance": 0.0651188814632544
            },
            {
              "id": "chat_hard_sample_49",
              "variance": 0.6181662277061264,
              "mean_reward": -1.6286754576530307,
              "min_reward": -2.0239513621346514,
              "max_reward": -1.0091801577898678,
              "num_responses": 7,
              "rvariance": 0.0907992631816517
            },
            {
              "id": "chat_hard_sample_415",
              "variance": 0.9372419951843163,
              "mean_reward": 2.0946135363464284,
              "min_reward": 1.209878095514175,
              "max_reward": 2.643307029325482,
              "num_responses": 7,
              "rvariance": 0.19727271647346184
            },
            {
              "id": "chat_hard_sample_297",
              "variance": 0.46494553878751377,
              "mean_reward": -0.9783325571080309,
              "min_reward": -1.2343020095743358,
              "max_reward": -0.6094740127847916,
              "num_responses": 7,
              "rvariance": 0.0430943910191573
            },
            {
              "id": "chat_hard_sample_80",
              "variance": 2.703299950815939,
              "mean_reward": 2.6782238879696036,
              "min_reward": -0.3430032494480743,
              "max_reward": 3.7459446707187953,
              "num_responses": 5,
              "rvariance": 2.360035359081661
            },
            {
              "id": "chat_hard_sample_413",
              "variance": 0.32619696891218847,
              "mean_reward": -0.4808329546222385,
              "min_reward": -0.6737945418660682,
              "max_reward": -0.24192813232035396,
              "num_responses": 6,
              "rvariance": 0.020291639881591477
            },
            {
              "id": "chat_hard_sample_306",
              "variance": 0.3969495509015928,
              "mean_reward": -0.12772637660461794,
              "min_reward": -0.3338146024364634,
              "max_reward": 0.10724045412086188,
              "num_responses": 7,
              "rvariance": 0.027014548676463336
            },
            {
              "id": "chat_hard_sample_43",
              "variance": 0.7718463489753192,
              "mean_reward": -1.0104928216486693,
              "min_reward": -1.3078111856672234,
              "max_reward": -0.3430032494480743,
              "num_responses": 7,
              "rvariance": 0.12077112621082506
            },
            {
              "id": "chat_hard_sample_102",
              "variance": 1.1623638469687845,
              "mean_reward": 1.6218357698680654,
              "min_reward": -0.06734383909974603,
              "max_reward": 2.1103655026520474,
              "num_responses": 6,
              "rvariance": 0.5773525039957256
            },
            {
              "id": "chat_hard_sample_187",
              "variance": 0.7259031139172645,
              "mean_reward": -0.41038666086655456,
              "min_reward": -0.8575674820982871,
              "max_reward": -0.048966545076524155,
              "num_responses": 6,
              "rvariance": 0.10136438692584183
            },
            {
              "id": "chat_hard_sample_360",
              "variance": 1.6332820063138451,
              "mean_reward": -0.5168218220843813,
              "min_reward": -1.7994038007884088,
              "max_reward": 1.2466326835606187,
              "num_responses": 6,
              "rvariance": 0.8102050521827228
            },
            {
              "id": "chat_hard_sample_236",
              "variance": 0.516401962052535,
              "mean_reward": 0.05867189134520403,
              "min_reward": -0.26949407335518677,
              "max_reward": 0.3461452764227464,
              "num_responses": 7,
              "rvariance": 0.052629951191089194
            },
            {
              "id": "chat_hard_sample_259",
              "variance": 1.187173193900134,
              "mean_reward": 0.9289680297306405,
              "min_reward": 0.16237233619052754,
              "max_reward": 1.9449698564430504,
              "num_responses": 7,
              "rvariance": 0.3062407008017907
            },
            {
              "id": "chat_hard_sample_127",
              "variance": 0.25360665752046196,
              "mean_reward": 0.9930260260401568,
              "min_reward": 0.8055776270032935,
              "max_reward": 1.1179916253980655,
              "num_responses": 5,
              "rvariance": 0.013427943440090045
            },
            {
              "id": "chat_hard_sample_280",
              "variance": 0.905081730643678,
              "mean_reward": -1.0708753591535412,
              "min_reward": -1.459423861358804,
              "max_reward": -0.0581551920881351,
              "num_responses": 7,
              "rvariance": 0.20596913356557242
            },
            {
              "id": "chat_hard_sample_421",
              "variance": 0.2646330339343952,
              "mean_reward": -0.638352617678426,
              "min_reward": -0.875944776121509,
              "max_reward": -0.4624556605990166,
              "num_responses": 7,
              "rvariance": 0.014773742846791466
            },
            {
              "id": "chat_hard_sample_270",
              "variance": 0.9464306421959271,
              "mean_reward": 1.0261051552819562,
              "min_reward": 0.25425880630663694,
              "max_reward": 1.522292093908947,
              "num_responses": 6,
              "rvariance": 0.1895762638590848
            },
            {
              "id": "chat_hard_sample_276",
              "variance": 0.408894792016687,
              "mean_reward": -0.5420906013663114,
              "min_reward": -0.7932469530170105,
              "max_reward": -0.08572113312296792,
              "num_responses": 6,
              "rvariance": 0.04856672199121547
            },
            {
              "id": "chat_hard_sample_349",
              "variance": 1.2184145937396111,
              "mean_reward": 0.4445950658328636,
              "min_reward": -0.06734383909974603,
              "max_reward": 1.3385191536767282,
              "num_responses": 7,
              "rvariance": 0.275045944503877
            },
            {
              "id": "chat_hard_sample_267",
              "variance": 0.16539564620899705,
              "mean_reward": 0.04554525275718841,
              "min_reward": -0.06734383909974603,
              "max_reward": 0.20831557124858224,
              "num_responses": 7,
              "rvariance": 0.006361635011704467
            },
            {
              "id": "chat_hard_sample_14",
              "variance": 0.8545441720798178,
              "mean_reward": 0.6493706278059076,
              "min_reward": 0.2910133943530807,
              "max_reward": 1.7611969162108316,
              "num_responses": 6,
              "rvariance": 0.25698053225910095
            },
            {
              "id": "chat_hard_sample_13",
              "variance": 0.4208400331317813,
              "mean_reward": -0.19467223340349765,
              "min_reward": -0.39813513151774,
              "max_reward": 0.19912692423697131,
              "num_responses": 7,
              "rvariance": 0.03824562587209978
            },
            {
              "id": "chat_hard_sample_147",
              "variance": 0.44656824476429186,
              "mean_reward": 0.4141412643086674,
              "min_reward": 0.1348063951556947,
              "max_reward": 0.6953138628639622,
              "num_responses": 5,
              "rvariance": 0.039567853456764335
            },
            {
              "id": "chat_hard_sample_173",
              "variance": 0.35835723345282666,
              "mean_reward": 1.6019270346762422,
              "min_reward": 1.1547462134445095,
              "max_reward": 1.7428196221876098,
              "num_responses": 6,
              "rvariance": 0.04330384174453362
            },
            {
              "id": "chat_hard_sample_345",
              "variance": 0.8453555250682067,
              "mean_reward": -0.01221195703008035,
              "min_reward": -0.3430032494480743,
              "max_reward": 0.8423322150497373,
              "num_responses": 6,
              "rvariance": 0.1805421218313583
            },
            {
              "id": "chat_hard_sample_412",
              "variance": 0.22052752827866265,
              "mean_reward": -0.26949407335518677,
              "min_reward": -0.3797578374945181,
              "max_reward": -0.10409842714618982,
              "num_responses": 7,
              "rvariance": 0.008684355487267204
            },
            {
              "id": "chat_hard_sample_21",
              "variance": 0.28484805735993923,
              "mean_reward": 0.7473828625964244,
              "min_reward": 0.6218046867710747,
              "max_reward": 0.9709732732122905,
              "num_responses": 6,
              "rvariance": 0.015919978215007274
            },
            {
              "id": "chat_hard_sample_357",
              "variance": 0.46770213289099716,
              "mean_reward": -1.3173279986435347,
              "min_reward": -1.6133336988032871,
              "max_reward": -0.6921718358892902,
              "num_responses": 7,
              "rvariance": 0.07404080748877695
            },
            {
              "id": "chat_hard_sample_184",
              "variance": 0.5559131442024621,
              "mean_reward": -0.1163499564950044,
              "min_reward": -0.42570107255257283,
              "max_reward": 0.41965445251563394,
              "num_responses": 6,
              "rvariance": 0.07296734858946764
            },
            {
              "id": "chat_hard_sample_348",
              "variance": 1.286410581625532,
              "mean_reward": 0.9280929204914394,
              "min_reward": -0.09490978013457887,
              "max_reward": 2.3860249130003757,
              "num_responses": 6,
              "rvariance": 0.5376205725077411
            },
            {
              "id": "chat_hard_sample_283",
              "variance": 0.9579164509604408,
              "mean_reward": -0.5872681158400652,
              "min_reward": -1.1470098629640317,
              "max_reward": 0.27263610032985885,
              "num_responses": 6,
              "rvariance": 0.20362292407184743
            },
            {
              "id": "chat_hard_sample_0",
              "variance": 1.0217775476911373,
              "mean_reward": -0.3771325097769149,
              "min_reward": -1.059717716353728,
              "max_reward": 0.9158413911426249,
              "num_responses": 7,
              "rvariance": 0.330818805299882
            },
            {
              "id": "chat_hard_sample_258",
              "variance": 0.6027752439616779,
              "mean_reward": -0.19861022497990238,
              "min_reward": -0.7105491299125121,
              "max_reward": 0.18993827722536036,
              "num_responses": 7,
              "rvariance": 0.07513690582892338
            },
            {
              "id": "chat_hard_sample_161",
              "variance": 1.285491716924371,
              "mean_reward": -0.368731461080585,
              "min_reward": -1.0643120398595334,
              "max_reward": 0.23588151228341508,
              "num_responses": 5,
              "rvariance": 0.3387262900500482
            },
            {
              "id": "chat_hard_sample_4",
              "variance": 0.496186938626991,
              "mean_reward": -0.07259449453495229,
              "min_reward": -0.42570107255257283,
              "max_reward": 0.3461452764227464,
              "num_responses": 7,
              "rvariance": 0.0503658156533374
            },
            {
              "id": "chat_hard_sample_238",
              "variance": 0.4824039681095745,
              "mean_reward": 0.3538024822657555,
              "min_reward": -0.06734383909974603,
              "max_reward": 0.6585592748175184,
              "num_responses": 6,
              "rvariance": 0.05044531694557917
            },
            {
              "id": "chat_hard_sample_8",
              "variance": 0.5200774208571793,
              "mean_reward": -0.1500416622042445,
              "min_reward": -0.7289264239357339,
              "max_reward": 0.17156098320213847,
              "num_responses": 7,
              "rvariance": 0.07053626512435918
            },
            {
              "id": "chat_hard_sample_32",
              "variance": 0.5807224911338116,
              "mean_reward": 0.8055776270032936,
              "min_reward": 0.3461452764227464,
              "max_reward": 1.1363689194212876,
              "num_responses": 7,
              "rvariance": 0.06619408738072559
            },
            {
              "id": "chat_hard_sample_109",
              "variance": 0.6689335024452765,
              "mean_reward": 0.7845750052624686,
              "min_reward": 0.38289986446919017,
              "max_reward": 1.1179916253980655,
              "num_responses": 7,
              "rvariance": 0.09074462249631746
            },
            {
              "id": "chat_hard_sample_188",
              "variance": 0.6009375145593555,
              "mean_reward": -0.29968534210762277,
              "min_reward": -1.0045858342840621,
              "max_reward": 0.0704858660744181,
              "num_responses": 7,
              "rvariance": 0.10140708117790663
            },
            {
              "id": "chat_hard_sample_53",
              "variance": 0.40430046851088147,
              "mean_reward": -0.472956971469429,
              "min_reward": -0.6829831888776792,
              "max_reward": -0.2511167793319649,
              "num_responses": 7,
              "rvariance": 0.031249895062912304
            },
            {
              "id": "chat_hard_sample_356",
              "variance": 0.48056623870725246,
              "mean_reward": -0.8733194484039059,
              "min_reward": -1.2251133625627249,
              "max_reward": -0.38894648450612906,
              "num_responses": 7,
              "rvariance": 0.057870718495558075
            },
            {
              "id": "chat_hard_sample_63",
              "variance": 0.8159518546310518,
              "mean_reward": 0.753071072651231,
              "min_reward": 0.2175042182601932,
              "max_reward": 1.2650099775838406,
              "num_responses": 7,
              "rvariance": 0.12364868050918541
            },
            {
              "id": "chat_hard_sample_159",
              "variance": 0.5761281676280062,
              "mean_reward": 0.045217086792488016,
              "min_reward": -0.23273948530874303,
              "max_reward": 0.5115409226317433,
              "num_responses": 4,
              "rvariance": 0.08233628391274403
            },
            {
              "id": "chat_hard_sample_152",
              "variance": 0.2664707633367174,
              "mean_reward": 0.18687539488815674,
              "min_reward": 0.04291992503958528,
              "max_reward": 0.3461452764227464,
              "num_responses": 6,
              "rvariance": 0.012176860178597195
            },
            {
              "id": "chat_hard_sample_318",
              "variance": 1.4575491322167857,
              "mean_reward": -0.9084879009542979,
              "min_reward": -1.5857677577684544,
              "max_reward": 0.07967451308602905,
              "num_responses": 6,
              "rvariance": 0.4031422849611564
            },
            {
              "id": "chat_hard_sample_176",
              "variance": 0.15436926979506405,
              "mean_reward": -1.2585862909621646,
              "min_reward": -1.3445657737136671,
              "max_reward": -1.1240382454350044,
              "num_responses": 7,
              "rvariance": 0.004979719713929018
            },
            {
              "id": "chat_hard_sample_274",
              "variance": 0.39970614500507595,
              "mean_reward": -0.5574050130523297,
              "min_reward": -0.8667561291098981,
              "max_reward": -0.38894648450612906,
              "num_responses": 6,
              "rvariance": 0.03294694371897792
            },
            {
              "id": "chat_hard_sample_389",
              "variance": 0.3473308570388937,
              "mean_reward": -0.5622181138679354,
              "min_reward": -0.8116242470402324,
              "max_reward": -0.26030542634357584,
              "num_responses": 7,
              "rvariance": 0.02615989781898625
            },
            {
              "id": "chat_hard_sample_42",
              "variance": 0.6064507027663223,
              "mean_reward": -0.12510104888701482,
              "min_reward": -0.43488971956418376,
              "max_reward": 0.3093906883763026,
              "num_responses": 7,
              "rvariance": 0.06773108045505939
            },
            {
              "id": "chat_hard_sample_5",
              "variance": 0.9611324774145046,
              "mean_reward": 0.41177846936282453,
              "min_reward": -0.5451534837035151,
              "max_reward": 0.8790868030961811,
              "num_responses": 7,
              "rvariance": 0.20848311663222385
            },
            {
              "id": "chat_hard_sample_17",
              "variance": 2.1179831361763224,
              "mean_reward": 1.4349999472986432,
              "min_reward": -0.012211957030080382,
              "max_reward": 2.459534089093263,
              "num_responses": 6,
              "rvariance": 0.8731244976096025
            },
            {
              "id": "chat_hard_sample_182",
              "variance": 1.1063131001979576,
              "mean_reward": 0.9342186851658469,
              "min_reward": 0.033731278027974335,
              "max_reward": 1.559046681955391,
              "num_responses": 4,
              "rvariance": 0.3118889780413269
            },
            {
              "id": "chat_hard_sample_140",
              "variance": 0.593586596950067,
              "mean_reward": 0.28550020614611415,
              "min_reward": -0.2878713673784087,
              "max_reward": 0.566672804701409,
              "num_responses": 5,
              "rvariance": 0.09225294341285203
            },
            {
              "id": "chat_hard_sample_251",
              "variance": 0.2618764398309119,
              "mean_reward": -0.6554172478428464,
              "min_reward": -0.7932469530170105,
              "max_reward": -0.47164430761062753,
              "num_responses": 6,
              "rvariance": 0.013959297338792469
            },
            {
              "id": "chat_hard_sample_98",
              "variance": 0.6579071260313435,
              "mean_reward": -0.3101866529780352,
              "min_reward": -0.6737945418660682,
              "max_reward": 0.033731278027974335,
              "num_responses": 7,
              "rvariance": 0.07533333767923057
            },
            {
              "id": "chat_hard_sample_409",
              "variance": 0.4755124828508661,
              "mean_reward": -0.9931000255195487,
              "min_reward": -1.266462274114974,
              "max_reward": -0.41651242554096185,
              "num_responses": 6,
              "rvariance": 0.0726091021733889
            },
            {
              "id": "chat_hard_sample_248",
              "variance": 0.11393922294397574,
              "mean_reward": 0.28445007505907294,
              "min_reward": 0.18993827722536036,
              "max_reward": 0.3645225704459683,
              "num_responses": 7,
              "rvariance": 0.0028155231877370276
            },
            {
              "id": "chat_hard_sample_382",
              "variance": 0.35651950405050464,
              "mean_reward": -0.19729756112110078,
              "min_reward": -0.4992102486454604,
              "max_reward": 0.033731278027974335,
              "num_responses": 7,
              "rvariance": 0.026449376335228498
            },
            {
              "id": "chat_hard_sample_254",
              "variance": 0.27565941034832825,
              "mean_reward": -0.47317574877922935,
              "min_reward": -0.6094740127847916,
              "max_reward": -0.27868272036679775,
              "num_responses": 6,
              "rvariance": 0.013661442708075624
            },
            {
              "id": "chat_hard_sample_2",
              "variance": 0.3822477156830152,
              "mean_reward": -0.6186626597964027,
              "min_reward": -0.8391901880750653,
              "max_reward": -0.43488971956418376,
              "num_responses": 7,
              "rvariance": 0.0237854847512374
            },
            {
              "id": "chat_hard_sample_203",
              "variance": 0.6321789143988328,
              "mean_reward": -0.07653248611135698,
              "min_reward": -0.8575674820982871,
              "max_reward": 0.25425880630663694,
              "num_responses": 7,
              "rvariance": 0.11632211711000684
            },
            {
              "id": "chat_hard_sample_68",
              "variance": 0.2425802811065289,
              "mean_reward": -0.5396402954965486,
              "min_reward": -0.6646058948544573,
              "max_reward": -0.3338146024364634,
              "num_responses": 5,
              "rvariance": 0.013292853465843668
            },
            {
              "id": "chat_hard_sample_220",
              "variance": 0.1580447285997083,
              "mean_reward": -0.7722443312761855,
              "min_reward": -0.875944776121509,
              "max_reward": -0.6737945418660682,
              "num_responses": 7,
              "rvariance": 0.004576517494877322
            },
            {
              "id": "chat_hard_sample_11",
              "variance": 0.2912801102680669,
              "mean_reward": -0.4004322932706427,
              "min_reward": -0.6002853657731807,
              "max_reward": -0.19598489726229923,
              "num_responses": 4,
              "rvariance": 0.020532620695025768
            },
            {
              "id": "chat_hard_sample_6",
              "variance": 0.8637328190914286,
              "mean_reward": -0.4532670135874057,
              "min_reward": -1.206736068539503,
              "max_reward": 0.25425880630663694,
              "num_responses": 6,
              "rvariance": 0.18650859569390663
            },
            {
              "id": "chat_hard_sample_286",
              "variance": 0.35146574819411835,
              "mean_reward": -1.2159247155511137,
              "min_reward": -1.3859146852659163,
              "max_reward": -1.0045858342840621,
              "num_responses": 6,
              "rvariance": 0.022071731729733857
            },
            {
              "id": "chat_hard_sample_400",
              "variance": 0.5035378562362798,
              "mean_reward": 0.4669103514324902,
              "min_reward": 0.18074963021374943,
              "max_reward": 0.8055776270032935,
              "num_responses": 7,
              "rvariance": 0.04543434235878207
            },
            {
              "id": "chat_hard_sample_209",
              "variance": 1.1449054176467235,
              "mean_reward": -0.20386088041510858,
              "min_reward": -0.9402653052027856,
              "max_reward": 0.3093906883763026,
              "num_responses": 7,
              "rvariance": 0.23272694236751143
            },
            {
              "id": "chat_hard_sample_337",
              "variance": 0.690986255273143,
              "mean_reward": 1.6430571689186908,
              "min_reward": 1.2282553895373969,
              "max_reward": 2.183874678744935,
              "num_responses": 7,
              "rvariance": 0.09445270444246805
            },
            {
              "id": "chat_hard_sample_268",
              "variance": 0.4640266740863526,
              "mean_reward": 0.6815308923465458,
              "min_reward": 0.006165336993141505,
              "max_reward": 0.897464097119403,
              "num_responses": 6,
              "rvariance": 0.09275474637968807
            },
            {
              "id": "chat_hard_sample_249",
              "variance": 0.47964737400609136,
              "mean_reward": 0.2739487641886604,
              "min_reward": 0.033731278027974335,
              "max_reward": 0.6953138628639622,
              "num_responses": 7,
              "rvariance": 0.04844974356963875
            },
            {
              "id": "chat_hard_sample_295",
              "variance": 0.5182396914548573,
              "mean_reward": 0.6218046867710747,
              "min_reward": 0.18074963021374943,
              "max_reward": 0.9525959791890687,
              "num_responses": 7,
              "rvariance": 0.054614946731035986
            },
            {
              "id": "chat_hard_sample_153",
              "variance": 0.9096760541494833,
              "mean_reward": 2.9771612040806796,
              "min_reward": 1.7795742102340535,
              "max_reward": 3.59892631853302,
              "num_responses": 6,
              "rvariance": 0.31728319576297054
            },
            {
              "id": "chat_hard_sample_192",
              "variance": 0.29954989257851683,
              "mean_reward": -0.7528169061659223,
              "min_reward": -0.9126993641679528,
              "max_reward": -0.5910967187615698,
              "num_responses": 5,
              "rvariance": 0.017514415161043
            },
            {
              "id": "chat_hard_sample_363",
              "variance": 0.1699899697148025,
              "mean_reward": -0.6737945418660681,
              "min_reward": -0.7656810119821778,
              "max_reward": -0.5083988956570713,
              "num_responses": 6,
              "rvariance": 0.0069515049247615745
            },
            {
              "id": "chat_hard_sample_129",
              "variance": 0.3197649160040609,
              "mean_reward": -0.2309017559064208,
              "min_reward": -0.4992102486454604,
              "max_reward": -0.03977789806491321,
              "num_responses": 5,
              "rvariance": 0.023721799477664108
            },
            {
              "id": "chat_hard_sample_359",
              "variance": 0.5402924442827235,
              "mean_reward": 0.7163164846047871,
              "min_reward": 0.4012771584924121,
              "max_reward": 1.2282553895373969,
              "num_responses": 7,
              "rvariance": 0.06536700590574775
            },
            {
              "id": "chat_hard_sample_7",
              "variance": 0.8981902453849697,
              "mean_reward": -0.49231876338675207,
              "min_reward": -0.9632369227318129,
              "max_reward": 0.17156098320213847,
              "num_responses": 6,
              "rvariance": 0.15726548603445292
            },
            {
              "id": "chat_hard_sample_316",
              "variance": 0.42635322133874765,
              "mean_reward": -0.936327313626381,
              "min_reward": -1.1837644510104757,
              "max_reward": -0.6278513068080136,
              "num_responses": 7,
              "rvariance": 0.03552401089350085
            },
            {
              "id": "chat_hard_sample_41",
              "variance": 0.2756594103483283,
              "mean_reward": 0.6463077454687038,
              "min_reward": 0.4747863345852996,
              "max_reward": 0.7872003329800716,
              "num_responses": 6,
              "rvariance": 0.014146922303023548
            },
            {
              "id": "chat_hard_sample_143",
              "variance": 0.33538561592379945,
              "mean_reward": -0.510696057409974,
              "min_reward": -0.7656810119821778,
              "max_reward": -0.3613805434712962,
              "num_responses": 4,
              "rvariance": 0.02703382570563275
            },
            {
              "id": "chat_hard_sample_370",
              "variance": 0.24074255170420672,
              "mean_reward": -0.3666311989065024,
              "min_reward": -0.5083988956570713,
              "max_reward": -0.22355083829713207,
              "num_responses": 7,
              "rvariance": 0.010321287573160825
            },
            {
              "id": "chat_hard_sample_91",
              "variance": 0.6762844200545655,
              "mean_reward": 0.045545252757188404,
              "min_reward": -0.30624866140163054,
              "max_reward": 0.41965445251563394,
              "num_responses": 7,
              "rvariance": 0.07318292584428823
            },
            {
              "id": "chat_hard_sample_245",
              "variance": 0.20766342246240738,
              "mean_reward": -0.4033857869529463,
              "min_reward": -0.5543421307151261,
              "max_reward": -0.21436219128552111,
              "num_responses": 7,
              "rvariance": 0.009838823379423758
            },
            {
              "id": "chat_hard_sample_131",
              "variance": 0.6340166438011551,
              "mean_reward": -0.1423844563612354,
              "min_reward": -0.5083988956570713,
              "max_reward": 0.16237233619052754,
              "num_responses": 6,
              "rvariance": 0.06978006950959212
            },
            {
              "id": "chat_hard_sample_171",
              "variance": 0.3712213392690822,
              "mean_reward": -0.5648434415855386,
              "min_reward": -0.8667561291098981,
              "max_reward": -0.32462595542485245,
              "num_responses": 7,
              "rvariance": 0.02845160273923732
            },
            {
              "id": "chat_hard_sample_329",
              "variance": 0.39694955090159273,
              "mean_reward": 0.09805180710925093,
              "min_reward": -0.2970600143900196,
              "max_reward": 0.32776798239952454,
              "num_responses": 5,
              "rvariance": 0.04400555911075788
            },
            {
              "id": "chat_hard_sample_384",
              "variance": 0.38224771568301524,
              "mean_reward": -0.27343206493159145,
              "min_reward": -0.5267761896802932,
              "max_reward": -0.09490978013457887,
              "num_responses": 7,
              "rvariance": 0.025253554369323045
            },
            {
              "id": "chat_hard_sample_29",
              "variance": 0.475971915201447,
              "mean_reward": -1.0899089851061639,
              "min_reward": -1.468612508370415,
              "max_reward": -0.6370399538196245,
              "num_responses": 7,
              "rvariance": 0.05509654938156995
            },
            {
              "id": "chat_hard_sample_106",
              "variance": 0.41716457432713694,
              "mean_reward": 0.010103328569546195,
              "min_reward": -0.20517354427391019,
              "max_reward": 0.38289986446919017,
              "num_responses": 7,
              "rvariance": 0.03801473229366847
            },
            {
              "id": "chat_hard_sample_320",
              "variance": 0.861435657338526,
              "mean_reward": -0.8629275261883934,
              "min_reward": -1.4456408908413876,
              "max_reward": -0.5359648366919041,
              "num_responses": 6,
              "rvariance": 0.15110669658356768
            },
            {
              "id": "chat_hard_sample_23",
              "variance": 0.7093635492963652,
              "mean_reward": -0.6842958527364809,
              "min_reward": -1.0137744812956733,
              "max_reward": 0.3461452764227464,
              "num_responses": 7,
              "rvariance": 0.18906393283430695
            },
            {
              "id": "chat_hard_sample_235",
              "variance": 0.5476433618920122,
              "mean_reward": -0.8470661712278746,
              "min_reward": -1.2251133625627249,
              "max_reward": -0.4073237785293509,
              "num_responses": 7,
              "rvariance": 0.06063196446149969
            },
            {
              "id": "chat_hard_sample_202",
              "variance": 0.7975745606078298,
              "mean_reward": 1.2203794063845876,
              "min_reward": 0.6034273927478528,
              "max_reward": 1.7979515042572753,
              "num_responses": 7,
              "rvariance": 0.13208491155395927
            },
            {
              "id": "chat_hard_sample_304",
              "variance": 0.5384547148804013,
              "mean_reward": 0.22275487369539945,
              "min_reward": -0.11328707415780076,
              "max_reward": 0.5299182166549653,
              "num_responses": 7,
              "rvariance": 0.05070354058895333
            },
            {
              "id": "chat_hard_sample_277",
              "variance": 0.24120198405478732,
              "mean_reward": -0.9670655256533175,
              "min_reward": -1.0918779808943662,
              "max_reward": -0.7656810119821778,
              "num_responses": 6,
              "rvariance": 0.01249875425785614
            },
            {
              "id": "chat_hard_sample_196",
              "variance": 0.7571445137567417,
              "mean_reward": 1.1232422808332718,
              "min_reward": 0.7872003329800716,
              "max_reward": 1.8530833863269411,
              "num_responses": 7,
              "rvariance": 0.12164990027798903
            },
            {
              "id": "chat_hard_sample_185",
              "variance": 0.6072547093798382,
              "mean_reward": -1.8989201295838023,
              "min_reward": -2.258836151368956,
              "max_reward": -1.468612508370415,
              "num_responses": 7,
              "rvariance": 0.06448461066480829
            },
            {
              "id": "chat_hard_sample_160",
              "variance": 0.6579071260313438,
              "mean_reward": 0.12036709270887751,
              "min_reward": -0.26949407335518677,
              "max_reward": 0.9342186851658467,
              "num_responses": 7,
              "rvariance": 0.12852501503874222
            },
            {
              "id": "chat_hard_sample_163",
              "variance": 0.4548380270747417,
              "mean_reward": 0.016885425173354267,
              "min_reward": -0.20517354427391019,
              "max_reward": 0.5850500987246309,
              "num_responses": 6,
              "rvariance": 0.06879503844737896
            },
            {
              "id": "chat_hard_sample_266",
              "variance": 0.4998623974316353,
              "mean_reward": 0.22800552913060573,
              "min_reward": -0.06734383909974603,
              "max_reward": 0.4931636286085215,
              "num_responses": 7,
              "rvariance": 0.040151359437361187
            },
            {
              "id": "chat_hard_sample_289",
              "variance": 0.7994122900101521,
              "mean_reward": -0.031901914912103826,
              "min_reward": -0.8575674820982871,
              "max_reward": 0.38289986446919017,
              "num_responses": 7,
              "rvariance": 0.15249314694903723
            },
            {
              "id": "chat_hard_sample_69",
              "variance": 0.3546817746481825,
              "mean_reward": 0.2503208147302323,
              "min_reward": -0.003023310018469438,
              "max_reward": 0.6218046867710747,
              "num_responses": 7,
              "rvariance": 0.031501465678218064
            },
            {
              "id": "chat_hard_sample_40",
              "variance": 0.42451549193642557,
              "mean_reward": -0.337752594012868,
              "min_reward": -0.6462286008312355,
              "max_reward": -0.1224757211694117,
              "num_responses": 7,
              "rvariance": 0.032587010114126465
            },
            {
              "id": "chat_hard_sample_313",
              "variance": 0.2738216809460061,
              "mean_reward": -0.6541045839840448,
              "min_reward": -0.9126993641679528,
              "max_reward": -0.43488971956418376,
              "num_responses": 7,
              "rvariance": 0.01885056528386969
            },
            {
              "id": "chat_hard_sample_226",
              "variance": 0.4226777625341035,
              "mean_reward": 0.30676536065869947,
              "min_reward": -0.021400604041691324,
              "max_reward": 0.7872003329800716,
              "num_responses": 7,
              "rvariance": 0.052822936868584004
            },
            {
              "id": "chat_hard_sample_28",
              "variance": 0.4116513861201704,
              "mean_reward": 0.2568841340242401,
              "min_reward": -0.16841895622746642,
              "max_reward": 0.6953138628639622,
              "num_responses": 7,
              "rvariance": 0.054270329449795214
            },
            {
              "id": "chat_hard_sample_323",
              "variance": 0.21501434007169618,
              "mean_reward": 0.8285492445323208,
              "min_reward": 0.6769365688407404,
              "max_reward": 0.9525959791890687,
              "num_responses": 4,
              "rvariance": 0.010363933961714372
            },
            {
              "id": "chat_hard_sample_116",
              "variance": 1.1504186058536898,
              "mean_reward": 1.6076152447310488,
              "min_reward": -0.15923030921585546,
              "max_reward": 2.1103655026520474,
              "num_responses": 7,
              "rvariance": 0.5421622453664041
            },
            {
              "id": "chat_hard_sample_36",
              "variance": 0.3408988041307659,
              "mean_reward": -1.3255321477610447,
              "min_reward": -1.5145557434284695,
              "max_reward": -1.1516041864698374,
              "num_responses": 7,
              "rvariance": 0.020513343665856353
            },
            {
              "id": "chat_hard_sample_351",
              "variance": 0.48883602101770224,
              "mean_reward": -0.1579176453570539,
              "min_reward": -0.4900216016338494,
              "max_reward": 0.2910133943530807,
              "num_responses": 7,
              "rvariance": 0.05392571216855444
            },
            {
              "id": "chat_hard_sample_296",
              "variance": 0.9556192892075381,
              "mean_reward": 0.8055776270032936,
              "min_reward": 0.10724045412086188,
              "max_reward": 1.1915008014909532,
              "num_responses": 6,
              "rvariance": 0.1854109896531548
            },
            {
              "id": "chat_hard_sample_338",
              "variance": 0.5265094737653069,
              "mean_reward": -1.4548295378529983,
              "min_reward": -1.8177810948116306,
              "max_reward": -1.059717716353728,
              "num_responses": 7,
              "rvariance": 0.051503052681431884
            },
            {
              "id": "chat_hard_sample_354",
              "variance": 1.238629617165155,
              "mean_reward": 0.36583523430476983,
              "min_reward": -0.47164430761062753,
              "max_reward": 1.0812370373516218,
              "num_responses": 7,
              "rvariance": 0.2703832726886895
            },
            {
              "id": "chat_hard_sample_186",
              "variance": 0.3399799394296049,
              "mean_reward": 0.8882754501077921,
              "min_reward": 0.6218046867710747,
              "max_reward": 1.044482449305178,
              "num_responses": 6,
              "rvariance": 0.02259942694163378
            },
            {
              "id": "chat_hard_sample_54",
              "variance": 1.41872709859273,
              "mean_reward": 2.41490351789401,
              "min_reward": 1.687687740117944,
              "max_reward": 4.2972634914154515,
              "num_responses": 7,
              "rvariance": 0.6571989400173827
            },
            {
              "id": "chat_hard_sample_241",
              "variance": 0.17550315792176907,
              "mean_reward": -0.9691439100964201,
              "min_reward": -1.1378212159524208,
              "max_reward": -0.8575674820982871,
              "num_responses": 7,
              "rvariance": 0.007193024202697801
            },
            {
              "id": "chat_hard_sample_67",
              "variance": 0.5843979499384562,
              "mean_reward": 0.9000894248370062,
              "min_reward": 0.6401819807942966,
              "max_reward": 1.4671602118392815,
              "num_responses": 7,
              "rvariance": 0.08099884578282875
            },
            {
              "id": "chat_hard_sample_262",
              "variance": 0.9868606890470153,
              "mean_reward": 0.04817058047479154,
              "min_reward": -0.6921718358892902,
              "max_reward": 0.5482955106781872,
              "num_responses": 7,
              "rvariance": 0.17245337987850218
            },
            {
              "id": "chat_hard_sample_244",
              "variance": 0.6119638909732887,
              "mean_reward": -0.4558923413050087,
              "min_reward": -0.7932469530170105,
              "max_reward": -0.048966545076524155,
              "num_responses": 7,
              "rvariance": 0.0627616992595676
            },
            {
              "id": "chat_hard_sample_85",
              "variance": 0.21685206947401825,
              "mean_reward": 0.524667561219759,
              "min_reward": 0.41965445251563394,
              "max_reward": 0.6585592748175184,
              "num_responses": 7,
              "rvariance": 0.008753278943515358
            },
            {
              "id": "chat_hard_sample_398",
              "variance": 0.41716457432713683,
              "mean_reward": -0.43751504728178686,
              "min_reward": -0.5910967187615698,
              "max_reward": -0.16841895622746642,
              "num_responses": 7,
              "rvariance": 0.034513420716262316
            },
            {
              "id": "chat_hard_sample_319",
              "variance": 0.8633882448284931,
              "mean_reward": -1.739431470739412,
              "min_reward": -2.2886992541566915,
              "max_reward": -1.0459347458363115,
              "num_responses": 7,
              "rvariance": 0.146836644664234
            },
            {
              "id": "chat_hard_sample_190",
              "variance": 0.34227710118250765,
              "mean_reward": -0.7067205269910075,
              "min_reward": -1.0091801577898678,
              "max_reward": -0.4624556605990166,
              "num_responses": 6,
              "rvariance": 0.027386795169592474
            },
            {
              "id": "chat_hard_sample_20",
              "variance": 0.49067375042002437,
              "mean_reward": -0.09884777171098356,
              "min_reward": -0.39813513151774,
              "max_reward": 0.20831557124858224,
              "num_responses": 7,
              "rvariance": 0.039558617713627085
            },
            {
              "id": "chat_hard_sample_264",
              "variance": 0.3583572334528268,
              "mean_reward": -0.2156748551443227,
              "min_reward": -0.4624556605990166,
              "max_reward": 0.033731278027974335,
              "num_responses": 7,
              "rvariance": 0.02345809833405868
            },
            {
              "id": "chat_hard_sample_27",
              "variance": 1.0272907358981034,
              "mean_reward": 0.5089155949141402,
              "min_reward": -0.41651242554096185,
              "max_reward": 1.1179916253980655,
              "num_responses": 7,
              "rvariance": 0.22769208388858386
            },
            {
              "id": "chat_hard_sample_115",
              "variance": 0.24625573991117328,
              "mean_reward": -0.6120993405023948,
              "min_reward": -0.719737776924123,
              "max_reward": -0.43488971956418376,
              "num_responses": 7,
              "rvariance": 0.00998011646473247
            },
            {
              "id": "chat_hard_sample_260",
              "variance": 0.22052752827866262,
              "mean_reward": 0.5587968215485997,
              "min_reward": 0.3093906883763026,
              "max_reward": 0.6953138628639622,
              "num_responses": 7,
              "rvariance": 0.013302227055893415
            },
            {
              "id": "chat_hard_sample_175",
              "variance": 0.17917861672641333,
              "mean_reward": -1.210017728186507,
              "min_reward": -1.3353771267020562,
              "max_reward": -1.0045858342840621,
              "num_responses": 7,
              "rvariance": 0.009261589433345485
            },
            {
              "id": "chat_hard_sample_200",
              "variance": 0.3436553982342494,
              "mean_reward": 1.4993204763799197,
              "min_reward": 1.35689644769995,
              "max_reward": 1.8163287982804972,
              "num_responses": 4,
              "rvariance": 0.034680129326062535
            },
            {
              "id": "chat_hard_sample_240",
              "variance": 0.27428111329658666,
              "mean_reward": -0.8977678127740851,
              "min_reward": -1.0551233928479224,
              "max_reward": -0.7105491299125121,
              "num_responses": 4,
              "rvariance": 0.01658414127198464
            },
            {
              "id": "chat_hard_sample_130",
              "variance": 0.5145642326502129,
              "mean_reward": 1.3595217754175533,
              "min_reward": 0.9893505672355124,
              "max_reward": 1.724442328164388,
              "num_responses": 7,
              "rvariance": 0.04963867318991939
            },
            {
              "id": "chat_hard_sample_374",
              "variance": 0.8306536898496293,
              "mean_reward": 0.6638099302527246,
              "min_reward": -0.20517354427391019,
              "max_reward": 1.0996143313748437,
              "num_responses": 7,
              "rvariance": 0.16806295571549487
            },
            {
              "id": "chat_hard_sample_257",
              "variance": 0.4667832681898359,
              "mean_reward": -0.097535107852182,
              "min_reward": -0.37056919048290715,
              "max_reward": 0.10724045412086188,
              "num_responses": 7,
              "rvariance": 0.04172626041263148
            },
            {
              "id": "chat_hard_sample_237",
              "variance": 0.3454931276365715,
              "mean_reward": 0.5771741155718215,
              "min_reward": 0.4012771584924121,
              "max_reward": 0.9342186851658467,
              "num_responses": 7,
              "rvariance": 0.02748667435176318
            },
            {
              "id": "chat_hard_sample_22",
              "variance": 0.3601949628551489,
              "mean_reward": -0.33749006124110775,
              "min_reward": -0.6462286008312355,
              "max_reward": -0.15004166220424453,
              "num_responses": 5,
              "rvariance": 0.029740057830340283
            },
            {
              "id": "chat_hard_sample_292",
              "variance": 0.19296158724382984,
              "mean_reward": -0.05946785594693666,
              "min_reward": -0.1867962502506883,
              "max_reward": 0.06129721906280716,
              "num_responses": 7,
              "rvariance": 0.0063306194563928
            },
            {
              "id": "chat_hard_sample_92",
              "variance": 0.5696961147198785,
              "mean_reward": 0.7749488036312572,
              "min_reward": 0.38289986446919017,
              "max_reward": 1.0996143313748437,
              "num_responses": 6,
              "rvariance": 0.061428413289256106
            },
            {
              "id": "chat_hard_sample_407",
              "variance": 0.20754856437476232,
              "mean_reward": -1.8656769173596526,
              "min_reward": -2.057260207551741,
              "max_reward": -1.778729345012284,
              "num_responses": 5,
              "rvariance": 0.010944873620494993
            },
            {
              "id": "chat_hard_sample_44",
              "variance": 0.5788847617314894,
              "mean_reward": 0.1164291011324728,
              "min_reward": -0.23273948530874303,
              "max_reward": 0.38289986446919017,
              "num_responses": 6,
              "rvariance": 0.05915815122206003
            },
            {
              "id": "chat_hard_sample_119",
              "variance": 0.5421301736850457,
              "mean_reward": -0.3944596727130956,
              "min_reward": -0.8024356000286215,
              "max_reward": 0.05210857205119622,
              "num_responses": 5,
              "rvariance": 0.07391447940890612
            },
            {
              "id": "chat_hard_sample_303",
              "variance": 0.40246273910855923,
              "mean_reward": -1.0833456658121559,
              "min_reward": -1.257273627103363,
              "max_reward": -0.8300015410634544,
              "num_responses": 7,
              "rvariance": 0.026296883188279448
            },
            {
              "id": "chat_hard_sample_365",
              "variance": 0.5673989529669758,
              "mean_reward": -1.3679968235932751,
              "min_reward": -1.8430498740935608,
              "max_reward": -0.9586425992260075,
              "num_responses": 5,
              "rvariance": 0.07991922876415768
            },
            {
              "id": "chat_hard_sample_45",
              "variance": 0.38592317448765967,
              "mean_reward": 0.6953138628639622,
              "min_reward": 0.5299182166549653,
              "max_reward": 1.0812370373516218,
              "num_responses": 7,
              "rvariance": 0.0331935365291102
            },
            {
              "id": "chat_hard_sample_78",
              "variance": 0.5389141472309819,
              "mean_reward": -0.8242586366811975,
              "min_reward": -1.041340422330506,
              "max_reward": -0.32462595542485245,
              "num_responses": 4,
              "rvariance": 0.08609215458344173
            },
            {
              "id": "chat_hard_sample_189",
              "variance": 0.5017001268339576,
              "mean_reward": 2.261978178343628,
              "min_reward": 2.055233620582382,
              "max_reward": 2.7168162054183695,
              "num_responses": 4,
              "rvariance": 0.07216759717943261
            },
            {
              "id": "chat_hard_sample_39",
              "variance": 0.6468807496174104,
              "mean_reward": 1.044482449305178,
              "min_reward": 0.5115409226317433,
              "max_reward": 1.3936510357463938,
              "num_responses": 5,
              "rvariance": 0.09496825189520425
            },
            {
              "id": "chat_hard_sample_387",
              "variance": 0.24533687521001202,
              "mean_reward": -1.191640434163285,
              "min_reward": -1.362943067736889,
              "max_reward": -1.0183688048014787,
              "num_responses": 7,
              "rvariance": 0.010877844482364648
            },
            {
              "id": "chat_hard_sample_118",
              "variance": 0.2526877928193009,
              "mean_reward": -0.09337833896597704,
              "min_reward": -0.2511167793319649,
              "max_reward": 0.05210857205119622,
              "num_responses": 6,
              "rvariance": 0.011691380583649274
            },
            {
              "id": "chat_hard_sample_154",
              "variance": 0.1984747754507964,
              "mean_reward": 0.059984555204005585,
              "min_reward": -0.03977789806491321,
              "max_reward": 0.23588151228341508,
              "num_responses": 7,
              "rvariance": 0.007970997715098828
            },
            {
              "id": "chat_hard_sample_103",
              "variance": 0.6119638909732888,
              "mean_reward": 0.2581967978830417,
              "min_reward": -0.15004166220424453,
              "max_reward": 0.7872003329800716,
              "num_responses": 7,
              "rvariance": 0.07719082482511828
            },
            {
              "id": "chat_hard_sample_162",
              "variance": 0.5586697383059454,
              "mean_reward": 1.0720483903400109,
              "min_reward": 0.566672804701409,
              "max_reward": 1.35689644769995,
              "num_responses": 4,
              "rvariance": 0.0902569890433618
            },
            {
              "id": "chat_hard_sample_321",
              "variance": 0.3528440452458603,
              "mean_reward": 1.5144161107561378,
              "min_reward": 1.2466326835606187,
              "max_reward": 1.7979515042572753,
              "num_responses": 7,
              "rvariance": 0.02603928177055199
            },
            {
              "id": "chat_hard_sample_273",
              "variance": 0.004594323505805464,
              "mean_reward": -1.0426530861893075,
              "min_reward": -1.0459347458363115,
              "max_reward": -1.041340422330506,
              "num_responses": 7,
              "rvariance": 4.307716015509513e-06
            },
            {
              "id": "chat_hard_sample_291",
              "variance": 0.35100631584353814,
              "mean_reward": -0.9107303683797505,
              "min_reward": -1.0826893338827552,
              "max_reward": -0.5083988956570713,
              "num_responses": 7,
              "rvariance": 0.032330270239602096
            },
            {
              "id": "chat_hard_sample_312",
              "variance": 0.5788847617314894,
              "mean_reward": -0.10672375486379293,
              "min_reward": -0.6462286008312355,
              "max_reward": 0.23588151228341508,
              "num_responses": 7,
              "rvariance": 0.07270046165055118
            },
            {
              "id": "chat_hard_sample_314",
              "variance": 1.6792252413718995,
              "mean_reward": -1.9688673376015042,
              "min_reward": -2.9663619712629985,
              "max_reward": -1.1240382454350044,
              "num_responses": 6,
              "rvariance": 0.493202920540059
            },
            {
              "id": "chat_hard_sample_399",
              "variance": 0.2646330339343952,
              "mean_reward": -0.9179500196031591,
              "min_reward": -1.1791701275046702,
              "max_reward": -0.7932469530170105,
              "num_responses": 7,
              "rvariance": 0.015314791978339477
            },
            {
              "id": "chat_hard_sample_219",
              "variance": 0.2517689281181399,
              "mean_reward": 0.2280055291306057,
              "min_reward": 0.04291992503958528,
              "max_reward": 0.4931636286085215,
              "num_responses": 7,
              "rvariance": 0.016534737153931764
            },
            {
              "id": "chat_hard_sample_406",
              "variance": 0.32619696891218863,
              "mean_reward": -0.8188438982636411,
              "min_reward": -0.9953971872724513,
              "max_reward": -0.43488971956418376,
              "num_responses": 7,
              "rvariance": 0.0292123453875763
            },
            {
              "id": "chat_hard_sample_253",
              "variance": 0.7038503610893982,
              "mean_reward": 0.4957889563261246,
              "min_reward": 0.09805180710925093,
              "max_reward": 0.9342186851658467,
              "num_responses": 7,
              "rvariance": 0.07735624112011386
            },
            {
              "id": "chat_hard_sample_24",
              "variance": 0.2389048223018846,
              "mean_reward": 0.6095531574222601,
              "min_reward": 0.4747863345852996,
              "max_reward": 0.8607095090729592,
              "num_responses": 6,
              "rvariance": 0.014934947152794092
            },
            {
              "id": "chat_hard_sample_60",
              "variance": 0.40062500970623716,
              "mean_reward": 0.7346937786280091,
              "min_reward": 0.4747863345852996,
              "max_reward": 0.9525959791890687,
              "num_responses": 7,
              "rvariance": 0.029664655569204803
            },
            {
              "id": "chat_hard_sample_386",
              "variance": 1.0406142740649393,
              "mean_reward": -1.8365412491270028,
              "min_reward": -3.1685122055184394,
              "max_reward": -1.2710565976207795,
              "num_responses": 6,
              "rvariance": 0.37678566811079534
            },
            {
              "id": "chat_hard_sample_216",
              "variance": 0.3606543952057297,
              "mean_reward": -1.5046013758325574,
              "min_reward": -1.7603520509890622,
              "max_reward": -1.2894338916440014,
              "num_responses": 6,
              "rvariance": 0.02472897028565656
            },
            {
              "id": "chat_hard_sample_300",
              "variance": 0.29771216317619464,
              "mean_reward": 1.0129785166939407,
              "min_reward": 0.7688230389568498,
              "max_reward": 1.375273741723172,
              "num_responses": 7,
              "rvariance": 0.02843781804798768
            },
            {
              "id": "chat_hard_sample_352",
              "variance": 0.381328850981854,
              "mean_reward": -0.8575674820982871,
              "min_reward": -1.1056609514117826,
              "max_reward": -0.6002853657731807,
              "num_responses": 6,
              "rvariance": 0.029522788121760683
            },
            {
              "id": "chat_hard_sample_311",
              "variance": 1.4522656601851094,
              "mean_reward": -2.1344612507475076,
              "min_reward": -2.791777678042391,
              "max_reward": -1.1424155394582263,
              "num_responses": 7,
              "rvariance": 0.3947362647664108
            },
            {
              "id": "chat_hard_sample_419",
              "variance": 0.4116513861201702,
              "mean_reward": -0.24061546846155238,
              "min_reward": -0.5910967187615698,
              "max_reward": -0.030589251053302267,
              "num_responses": 7,
              "rvariance": 0.0316117432082151
            },
            {
              "id": "chat_hard_sample_287",
              "variance": 0.18561066963454104,
              "mean_reward": -0.5714067608795463,
              "min_reward": -0.6829831888776792,
              "max_reward": -0.4532670135874056,
              "num_responses": 7,
              "rvariance": 0.006210003407958532
            },
            {
              "id": "chat_hard_sample_55",
              "variance": 0.3013876219808389,
              "mean_reward": -0.39288447608253374,
              "min_reward": -0.5359648366919041,
              "max_reward": -0.19598489726229923,
              "num_responses": 7,
              "rvariance": 0.019126259108862294
            }
          ]
        }
      },
      "overall_stats": {
        "mean_variance_across_files": 0.6744472649426814,
        "mean_rvariance_across_files": 0.18701824293761476,
        "total_processed_files": 1
      }
    }
  },
  "summary": {
    "model_comparison": {
      "Skywork-Reward-V2-Qwen3-8B": {
        "mean_variance": 0.6744472649426814,
        "mean_rvariance": 0.18701824293761476
      }
    }
  }
}