,EI,Markovian,baseline,PPO,Qwen3,Unnorm,EMA,NonMarkovian,baseline_qwen,NoReward
arc,"0.656 [0.602, 0.711]","0.799 [0.754, 0.845]","0.361 [0.306, 0.415]","0.000 [0.000, 0.000]","0.850 [0.810, 0.891]","0.748 [0.699, 0.798]","0.265 [0.215, 0.316]","0.786 [0.739, 0.833]","0.398 [0.342, 0.454]","0.793 [0.746, 0.839]"
wiki_continuation,-2.279,-2.564,-3.200,-4.228,-3.012,-2.703,-3.331,-2.900,-3.031,-2.647
svamp,"0.400 [0.345, 0.455]","0.423 [0.367, 0.479]","0.180 [0.137, 0.223]","0.000 [0.000, 0.000]","0.317 [0.264, 0.369]","0.433 [0.377, 0.489]","0.000 [0.000, 0.000]","0.433 [0.377, 0.489]","0.283 [0.232, 0.334]","0.407 [0.351, 0.462]"
mmlu,"0.532 [0.507, 0.557]","0.555 [0.530, 0.579]","0.214 [0.193, 0.234]","0.000 [0.000, 0.000]","0.605 [0.580, 0.629]","0.628 [0.604, 0.653]","0.238 [0.216, 0.259]","0.687 [0.664, 0.710]","0.318 [0.295, 0.341]","0.466 [0.441, 0.491]"
gsm8k,"0.616 [0.590, 0.643]","0.571 [0.544, 0.598]","0.196 [0.175, 0.218]","0.000 [0.000, 0.000]","0.716 [0.691, 0.740]","0.562 [0.535, 0.589]","0.000 [0.000, 0.000]","0.633 [0.607, 0.659]","0.130 [0.112, 0.149]","0.622 [0.596, 0.648]"
arithmetic,"0.760 [0.701, 0.819]","0.980 [0.961, 0.999]","0.010 [0.000, 0.024]","1.000 [1.000, 1.000]","0.005 [0.000, 0.015]","0.990 [0.976, 1.000]","0.970 [0.946, 0.994]","0.970 [0.946, 0.994]","0.000 [0.000, 0.000]","0.810 [0.756, 0.864]"
