{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "target15 (fixed to all +1s): [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
            "Train size: 67584 | base reps/anchor: 2 | target extra reps: 2048\n",
            "[ep     1] loss=4.469550e+02 | all-ones pred≈1.197 (true=30.0) | max|err|≈28.803 | mean|err|≈1.898 | group means≈[1.17,1.18,1.19,1.20] (true=[0,10,20,30])\n",
            "[ep    50] loss=9.399955e+00 | all-ones pred≈26.459 (true=30.0) | max|err|≈16.522 | mean|err|≈2.175 | group means≈[-0.28,4.44,13.96,26.46] (true=[0,10,20,30])\n",
            "[ep   100] loss=2.687315e+00 | all-ones pred≈24.402 (true=30.0) | max|err|≈17.169 | mean|err|≈1.289 | group means≈[0.44,3.57,11.81,24.40] (true=[0,10,20,30])\n",
            "[ep   150] loss=1.377977e+00 | all-ones pred≈25.402 (true=30.0) | max|err|≈16.315 | mean|err|≈1.394 | group means≈[0.75,4.29,12.84,25.40] (true=[0,10,20,30])\n",
            "[ep   200] loss=8.412905e-01 | all-ones pred≈24.865 (true=30.0) | max|err|≈16.547 | mean|err|≈1.312 | group means≈[0.79,4.10,12.46,24.86] (true=[0,10,20,30])\n",
            "[ep   250] loss=4.841992e-01 | all-ones pred≈24.215 (true=30.0) | max|err|≈17.228 | mean|err|≈1.193 | group means≈[0.64,3.58,11.79,24.21] (true=[0,10,20,30])\n",
            "[ep   300] loss=4.502012e-01 | all-ones pred≈24.757 (true=30.0) | max|err|≈16.839 | mean|err|≈1.215 | group means≈[0.69,3.81,12.24,24.76] (true=[0,10,20,30])\n",
            "[ep   350] loss=3.004342e-01 | all-ones pred≈24.644 (true=30.0) | max|err|≈17.163 | mean|err|≈1.153 | group means≈[0.60,3.68,12.19,24.64] (true=[0,10,20,30])\n",
            "[ep   400] loss=2.985106e-01 | all-ones pred≈24.560 (true=30.0) | max|err|≈17.093 | mean|err|≈1.180 | group means≈[0.64,3.66,12.03,24.56] (true=[0,10,20,30])\n",
            "[ep   450] loss=2.260316e-01 | all-ones pred≈24.781 (true=30.0) | max|err|≈17.135 | mean|err|≈1.130 | group means≈[0.58,3.64,12.26,24.78] (true=[0,10,20,30])\n",
            "[ep   500] loss=2.485876e-01 | all-ones pred≈25.916 (true=30.0) | max|err|≈16.069 | mean|err|≈1.414 | group means≈[0.96,4.57,13.38,25.92] (true=[0,10,20,30])\n",
            "[ep   550] loss=1.960432e-01 | all-ones pred≈24.328 (true=30.0) | max|err|≈17.293 | mean|err|≈1.129 | group means≈[0.57,3.50,11.95,24.33] (true=[0,10,20,30])\n",
            "[ep   600] loss=2.156465e-01 | all-ones pred≈25.353 (true=30.0) | max|err|≈16.776 | mean|err|≈1.227 | group means≈[0.70,3.96,12.82,25.35] (true=[0,10,20,30])\n",
            "[ep   650] loss=1.706835e-01 | all-ones pred≈24.424 (true=30.0) | max|err|≈17.271 | mean|err|≈1.112 | group means≈[0.55,3.50,11.97,24.42] (true=[0,10,20,30])\n",
            "[ep   700] loss=1.900096e-01 | all-ones pred≈24.682 (true=30.0) | max|err|≈17.471 | mean|err|≈1.072 | group means≈[0.49,3.36,12.00,24.68] (true=[0,10,20,30])\n",
            "[ep   750] loss=1.676284e-01 | all-ones pred≈25.238 (true=30.0) | max|err|≈16.803 | mean|err|≈1.142 | group means≈[0.59,3.86,12.77,25.24] (true=[0,10,20,30])\n",
            "[ep   800] loss=1.378440e-01 | all-ones pred≈25.355 (true=30.0) | max|err|≈16.984 | mean|err|≈1.147 | group means≈[0.60,3.85,12.83,25.35] (true=[0,10,20,30])\n",
            "[ep   850] loss=1.443314e-01 | all-ones pred≈25.916 (true=30.0) | max|err|≈16.225 | mean|err|≈1.262 | group means≈[0.76,4.34,13.47,25.92] (true=[0,10,20,30])\n",
            "[ep   900] loss=1.091142e-01 | all-ones pred≈25.770 (true=30.0) | max|err|≈16.632 | mean|err|≈1.194 | group means≈[0.67,4.06,13.15,25.77] (true=[0,10,20,30])\n",
            "[ep   950] loss=1.326728e-01 | all-ones pred≈24.728 (true=30.0) | max|err|≈17.069 | mean|err|≈1.095 | group means≈[0.53,3.59,12.29,24.73] (true=[0,10,20,30])\n",
            "[ep  1000] loss=1.009664e-01 | all-ones pred≈25.665 (true=30.0) | max|err|≈16.807 | mean|err|≈1.181 | group means≈[0.65,3.99,13.05,25.67] (true=[0,10,20,30])\n",
            "[ep  1050] loss=1.303489e-01 | all-ones pred≈25.698 (true=30.0) | max|err|≈16.676 | mean|err|≈1.133 | group means≈[0.59,3.98,13.15,25.70] (true=[0,10,20,30])\n",
            "[ep  1100] loss=1.139576e-01 | all-ones pred≈25.720 (true=30.0) | max|err|≈16.600 | mean|err|≈1.173 | group means≈[0.64,4.01,13.14,25.72] (true=[0,10,20,30])\n",
            "[ep  1150] loss=3.189088e-01 | all-ones pred≈24.999 (true=30.0) | max|err|≈17.154 | mean|err|≈1.066 | group means≈[0.50,3.59,12.53,25.00] (true=[0,10,20,30])\n",
            "[ep  1200] loss=1.371647e-01 | all-ones pred≈25.526 (true=30.0) | max|err|≈16.726 | mean|err|≈1.127 | group means≈[0.59,3.96,13.10,25.53] (true=[0,10,20,30])\n",
            "[ep  1250] loss=1.010339e-01 | all-ones pred≈25.019 (true=30.0) | max|err|≈17.062 | mean|err|≈1.124 | group means≈[0.57,3.71,12.60,25.02] (true=[0,10,20,30])\n",
            "[ep  1300] loss=1.473917e-01 | all-ones pred≈25.289 (true=30.0) | max|err|≈17.076 | mean|err|≈1.069 | group means≈[0.50,3.65,12.73,25.29] (true=[0,10,20,30])\n",
            "[ep  1350] loss=6.791632e-02 | all-ones pred≈25.135 (true=30.0) | max|err|≈16.629 | mean|err|≈1.162 | group means≈[0.62,3.95,12.87,25.14] (true=[0,10,20,30])\n",
            "[ep  1400] loss=1.463089e-01 | all-ones pred≈26.086 (true=30.0) | max|err|≈16.257 | mean|err|≈1.224 | group means≈[0.72,4.32,13.60,26.09] (true=[0,10,20,30])\n",
            "[ep  1450] loss=6.266370e-02 | all-ones pred≈25.359 (true=30.0) | max|err|≈16.673 | mean|err|≈1.159 | group means≈[0.63,4.06,13.12,25.36] (true=[0,10,20,30])\n",
            "[ep  1500] loss=1.820056e-01 | all-ones pred≈26.133 (true=30.0) | max|err|≈16.149 | mean|err|≈1.216 | group means≈[0.71,4.41,13.77,26.13] (true=[0,10,20,30])\n",
            "[ep  1550] loss=6.255674e-02 | all-ones pred≈25.481 (true=30.0) | max|err|≈16.319 | mean|err|≈1.179 | group means≈[0.66,4.19,13.29,25.48] (true=[0,10,20,30])\n",
            "[ep  1600] loss=1.728144e-01 | all-ones pred≈25.662 (true=30.0) | max|err|≈16.612 | mean|err|≈1.154 | group means≈[0.62,4.05,13.27,25.66] (true=[0,10,20,30])\n",
            "[ep  1650] loss=6.423838e-02 | all-ones pred≈24.886 (true=30.0) | max|err|≈17.081 | mean|err|≈1.058 | group means≈[0.48,3.64,12.60,24.89] (true=[0,10,20,30])\n",
            "[ep  1700] loss=1.603239e-01 | all-ones pred≈25.921 (true=30.0) | max|err|≈16.267 | mean|err|≈1.208 | group means≈[0.70,4.27,13.56,25.92] (true=[0,10,20,30])\n",
            "[ep  1750] loss=6.382118e-02 | all-ones pred≈25.828 (true=30.0) | max|err|≈16.136 | mean|err|≈1.163 | group means≈[0.64,4.27,13.57,25.83] (true=[0,10,20,30])\n",
            "[ep  1800] loss=1.474128e-01 | all-ones pred≈25.792 (true=30.0) | max|err|≈16.344 | mean|err|≈1.195 | group means≈[0.68,4.18,13.40,25.79] (true=[0,10,20,30])\n",
            "[ep  1850] loss=6.200750e-02 | all-ones pred≈25.855 (true=30.0) | max|err|≈16.053 | mean|err|≈1.234 | group means≈[0.73,4.43,13.70,25.86] (true=[0,10,20,30])\n",
            "[ep  1900] loss=1.484338e-01 | all-ones pred≈25.702 (true=30.0) | max|err|≈16.479 | mean|err|≈1.138 | group means≈[0.60,4.04,13.28,25.70] (true=[0,10,20,30])\n",
            "[ep  1950] loss=5.976270e-02 | all-ones pred≈25.682 (true=30.0) | max|err|≈16.350 | mean|err|≈1.165 | group means≈[0.63,4.19,13.42,25.68] (true=[0,10,20,30])\n",
            "[ep  2000] loss=1.401877e-01 | all-ones pred≈26.116 (true=30.0) | max|err|≈16.083 | mean|err|≈1.211 | group means≈[0.71,4.39,13.75,26.12] (true=[0,10,20,30])\n",
            "[ep  2050] loss=5.892612e-02 | all-ones pred≈25.680 (true=30.0) | max|err|≈16.131 | mean|err|≈1.175 | group means≈[0.65,4.25,13.47,25.68] (true=[0,10,20,30])\n",
            "[ep  2100] loss=1.362162e-01 | all-ones pred≈25.248 (true=30.0) | max|err|≈17.000 | mean|err|≈1.088 | group means≈[0.53,3.70,12.83,25.25] (true=[0,10,20,30])\n",
            "[ep  2150] loss=6.591174e-02 | all-ones pred≈24.967 (true=30.0) | max|err|≈16.654 | mean|err|≈1.083 | group means≈[0.52,3.81,12.81,24.97] (true=[0,10,20,30])\n",
            "[ep  2200] loss=1.599039e-01 | all-ones pred≈25.389 (true=30.0) | max|err|≈16.341 | mean|err|≈1.188 | group means≈[0.67,4.09,13.15,25.39] (true=[0,10,20,30])\n",
            "[ep  2250] loss=2.172969e-01 | all-ones pred≈25.463 (true=30.0) | max|err|≈16.566 | mean|err|≈1.104 | group means≈[0.55,3.94,13.12,25.46] (true=[0,10,20,30])\n",
            "[ep  2300] loss=1.312965e-01 | all-ones pred≈25.199 (true=30.0) | max|err|≈16.474 | mean|err|≈1.182 | group means≈[0.66,4.06,13.05,25.20] (true=[0,10,20,30])\n",
            "[ep  2350] loss=3.254247e-01 | all-ones pred≈25.649 (true=30.0) | max|err|≈16.246 | mean|err|≈1.210 | group means≈[0.70,4.28,13.51,25.65] (true=[0,10,20,30])\n",
            "[ep  2400] loss=3.445958e-02 | all-ones pred≈25.209 (true=30.0) | max|err|≈16.269 | mean|err|≈1.168 | group means≈[0.62,4.16,13.20,25.21] (true=[0,10,20,30])\n",
            "[ep  2450] loss=1.342044e-01 | all-ones pred≈26.296 (true=30.0) | max|err|≈15.677 | mean|err|≈1.210 | group means≈[0.72,4.58,14.12,26.30] (true=[0,10,20,30])\n",
            "[ep  2500] loss=1.038020e-01 | all-ones pred≈26.114 (true=30.0) | max|err|≈16.027 | mean|err|≈1.229 | group means≈[0.73,4.44,13.85,26.11] (true=[0,10,20,30])\n",
            "[ep  2550] loss=6.218511e-02 | all-ones pred≈25.317 (true=30.0) | max|err|≈16.130 | mean|err|≈1.170 | group means≈[0.64,4.21,13.32,25.32] (true=[0,10,20,30])\n",
            "[ep  2600] loss=5.718439e-02 | all-ones pred≈25.822 (true=30.0) | max|err|≈16.115 | mean|err|≈1.188 | group means≈[0.68,4.31,13.63,25.82] (true=[0,10,20,30])\n",
            "[ep  2650] loss=6.593836e-02 | all-ones pred≈25.672 (true=30.0) | max|err|≈16.042 | mean|err|≈1.206 | group means≈[0.69,4.37,13.63,25.67] (true=[0,10,20,30])\n",
            "[ep  2700] loss=9.329356e-02 | all-ones pred≈25.940 (true=30.0) | max|err|≈16.114 | mean|err|≈1.209 | group means≈[0.70,4.31,13.69,25.94] (true=[0,10,20,30])\n",
            "[ep  2750] loss=7.458054e-02 | all-ones pred≈25.370 (true=30.0) | max|err|≈16.169 | mean|err|≈1.149 | group means≈[0.62,4.20,13.36,25.37] (true=[0,10,20,30])\n",
            "[ep  2800] loss=1.285164e-01 | all-ones pred≈25.824 (true=30.0) | max|err|≈15.878 | mean|err|≈1.234 | group means≈[0.74,4.45,13.72,25.82] (true=[0,10,20,30])\n",
            "[ep  2850] loss=4.072174e-02 | all-ones pred≈25.343 (true=30.0) | max|err|≈16.256 | mean|err|≈1.099 | group means≈[0.54,4.04,13.26,25.34] (true=[0,10,20,30])\n",
            "[ep  2900] loss=1.041767e-01 | all-ones pred≈25.545 (true=30.0) | max|err|≈16.153 | mean|err|≈1.163 | group means≈[0.65,4.28,13.48,25.54] (true=[0,10,20,30])\n",
            "[ep  2950] loss=3.733371e-02 | all-ones pred≈24.995 (true=30.0) | max|err|≈16.751 | mean|err|≈1.072 | group means≈[0.49,3.77,12.83,24.99] (true=[0,10,20,30])\n",
            "[ep  3000] loss=2.021756e-01 | all-ones pred≈25.671 (true=30.0) | max|err|≈16.023 | mean|err|≈1.240 | group means≈[0.74,4.36,13.58,25.67] (true=[0,10,20,30])\n",
            "[ep  3050] loss=8.773904e-02 | all-ones pred≈25.386 (true=30.0) | max|err|≈15.837 | mean|err|≈1.199 | group means≈[0.68,4.40,13.61,25.39] (true=[0,10,20,30])\n",
            "[ep  3100] loss=3.200819e-02 | all-ones pred≈25.710 (true=30.0) | max|err|≈15.939 | mean|err|≈1.198 | group means≈[0.70,4.42,13.69,25.71] (true=[0,10,20,30])\n",
            "[ep  3150] loss=9.299678e-02 | all-ones pred≈25.022 (true=30.0) | max|err|≈16.340 | mean|err|≈1.129 | group means≈[0.57,4.04,13.10,25.02] (true=[0,10,20,30])\n",
            "[ep  3200] loss=3.120853e-02 | all-ones pred≈26.396 (true=30.0) | max|err|≈15.396 | mean|err|≈1.255 | group means≈[0.78,4.80,14.36,26.40] (true=[0,10,20,30])\n",
            "[ep  3250] loss=7.306746e-02 | all-ones pred≈25.287 (true=30.0) | max|err|≈16.541 | mean|err|≈1.113 | group means≈[0.57,3.94,13.10,25.29] (true=[0,10,20,30])\n",
            "[ep  3300] loss=6.552628e-02 | all-ones pred≈25.147 (true=30.0) | max|err|≈16.034 | mean|err|≈1.158 | group means≈[0.63,4.18,13.21,25.15] (true=[0,10,20,30])\n",
            "[ep  3350] loss=2.589414e-02 | all-ones pred≈25.445 (true=30.0) | max|err|≈16.376 | mean|err|≈1.084 | group means≈[0.54,3.97,13.27,25.45] (true=[0,10,20,30])\n",
            "[ep  3400] loss=1.254463e-01 | all-ones pred≈26.320 (true=30.0) | max|err|≈15.370 | mean|err|≈1.261 | group means≈[0.79,4.83,14.35,26.32] (true=[0,10,20,30])\n",
            "[ep  3450] loss=3.149058e-02 | all-ones pred≈26.171 (true=30.0) | max|err|≈15.514 | mean|err|≈1.261 | group means≈[0.79,4.76,14.21,26.17] (true=[0,10,20,30])\n",
            "[ep  3500] loss=5.020431e-02 | all-ones pred≈26.131 (true=30.0) | max|err|≈15.275 | mean|err|≈1.291 | group means≈[0.83,4.85,14.26,26.13] (true=[0,10,20,30])\n",
            "[ep  3550] loss=1.675393e-01 | all-ones pred≈25.609 (true=30.0) | max|err|≈16.016 | mean|err|≈1.183 | group means≈[0.67,4.27,13.52,25.61] (true=[0,10,20,30])\n",
            "[ep  3600] loss=3.084067e-02 | all-ones pred≈25.504 (true=30.0) | max|err|≈15.946 | mean|err|≈1.166 | group means≈[0.64,4.28,13.58,25.50] (true=[0,10,20,30])\n",
            "[ep  3650] loss=5.981971e-02 | all-ones pred≈26.254 (true=30.0) | max|err|≈15.426 | mean|err|≈1.263 | group means≈[0.79,4.73,14.21,26.25] (true=[0,10,20,30])\n",
            "[ep  3700] loss=4.147128e-02 | all-ones pred≈25.212 (true=30.0) | max|err|≈16.133 | mean|err|≈1.134 | group means≈[0.58,4.13,13.31,25.21] (true=[0,10,20,30])\n",
            "[ep  3750] loss=9.842241e-02 | all-ones pred≈25.572 (true=30.0) | max|err|≈15.876 | mean|err|≈1.165 | group means≈[0.66,4.31,13.53,25.57] (true=[0,10,20,30])\n",
            "[ep  3800] loss=7.244518e-02 | all-ones pred≈26.077 (true=30.0) | max|err|≈15.415 | mean|err|≈1.258 | group means≈[0.78,4.71,14.12,26.08] (true=[0,10,20,30])\n",
            "[ep  3850] loss=4.465359e-02 | all-ones pred≈25.406 (true=30.0) | max|err|≈15.925 | mean|err|≈1.172 | group means≈[0.66,4.30,13.51,25.41] (true=[0,10,20,30])\n",
            "[ep  3900] loss=4.558182e-02 | all-ones pred≈25.916 (true=30.0) | max|err|≈15.818 | mean|err|≈1.201 | group means≈[0.70,4.42,13.83,25.92] (true=[0,10,20,30])\n",
            "[ep  3950] loss=8.435164e-02 | all-ones pred≈25.044 (true=30.0) | max|err|≈16.392 | mean|err|≈1.148 | group means≈[0.61,4.02,13.08,25.04] (true=[0,10,20,30])\n",
            "[ep  4000] loss=6.623366e-02 | all-ones pred≈25.508 (true=30.0) | max|err|≈15.999 | mean|err|≈1.139 | group means≈[0.62,4.16,13.44,25.51] (true=[0,10,20,30])\n",
            "[ep  4050] loss=1.319477e-01 | all-ones pred≈25.273 (true=30.0) | max|err|≈15.961 | mean|err|≈1.200 | group means≈[0.69,4.32,13.43,25.27] (true=[0,10,20,30])\n",
            "[ep  4100] loss=5.855081e-02 | all-ones pred≈25.635 (true=30.0) | max|err|≈15.920 | mean|err|≈1.140 | group means≈[0.62,4.26,13.62,25.64] (true=[0,10,20,30])\n",
            "[ep  4150] loss=3.194791e-02 | all-ones pred≈24.782 (true=30.0) | max|err|≈16.507 | mean|err|≈1.080 | group means≈[0.51,3.84,12.82,24.78] (true=[0,10,20,30])\n",
            "[ep  4200] loss=8.456241e-02 | all-ones pred≈25.764 (true=30.0) | max|err|≈15.579 | mean|err|≈1.195 | group means≈[0.70,4.49,13.88,25.76] (true=[0,10,20,30])\n",
            "[ep  4250] loss=4.264717e-02 | all-ones pred≈24.453 (true=30.0) | max|err|≈16.542 | mean|err|≈1.072 | group means≈[0.52,3.74,12.62,24.45] (true=[0,10,20,30])\n",
            "[ep  4300] loss=7.483175e-02 | all-ones pred≈26.166 (true=30.0) | max|err|≈15.292 | mean|err|≈1.217 | group means≈[0.74,4.72,14.24,26.17] (true=[0,10,20,30])\n",
            "[ep  4350] loss=4.924791e-02 | all-ones pred≈24.786 (true=30.0) | max|err|≈16.523 | mean|err|≈1.060 | group means≈[0.49,3.81,12.89,24.79] (true=[0,10,20,30])\n",
            "[ep  4400] loss=5.516567e-02 | all-ones pred≈26.178 (true=30.0) | max|err|≈15.187 | mean|err|≈1.289 | group means≈[0.83,4.88,14.31,26.18] (true=[0,10,20,30])\n",
            "[ep  4450] loss=1.400364e-02 | all-ones pred≈25.661 (true=30.0) | max|err|≈15.714 | mean|err|≈1.150 | group means≈[0.61,4.39,13.80,25.66] (true=[0,10,20,30])\n",
            "[ep  4500] loss=6.883112e-02 | all-ones pred≈25.445 (true=30.0) | max|err|≈15.792 | mean|err|≈1.143 | group means≈[0.61,4.28,13.62,25.44] (true=[0,10,20,30])\n",
            "[ep  4550] loss=5.379681e-02 | all-ones pred≈26.157 (true=30.0) | max|err|≈15.254 | mean|err|≈1.260 | group means≈[0.78,4.73,14.26,26.16] (true=[0,10,20,30])\n",
            "[ep  4600] loss=3.500197e-02 | all-ones pred≈25.331 (true=30.0) | max|err|≈15.860 | mean|err|≈1.163 | group means≈[0.64,4.27,13.52,25.33] (true=[0,10,20,30])\n",
            "[ep  4650] loss=2.962315e-02 | all-ones pred≈25.654 (true=30.0) | max|err|≈15.672 | mean|err|≈1.208 | group means≈[0.71,4.45,13.76,25.65] (true=[0,10,20,30])\n",
            "[ep  4700] loss=7.125612e-02 | all-ones pred≈25.132 (true=30.0) | max|err|≈15.926 | mean|err|≈1.135 | group means≈[0.60,4.19,13.31,25.13] (true=[0,10,20,30])\n",
            "[ep  4750] loss=3.985100e-02 | all-ones pred≈25.420 (true=30.0) | max|err|≈15.958 | mean|err|≈1.141 | group means≈[0.62,4.15,13.44,25.42] (true=[0,10,20,30])\n",
            "[ep  4800] loss=3.384031e-02 | all-ones pred≈25.588 (true=30.0) | max|err|≈15.518 | mean|err|≈1.189 | group means≈[0.69,4.45,13.76,25.59] (true=[0,10,20,30])\n",
            "[ep  4850] loss=4.341621e-02 | all-ones pred≈25.766 (true=30.0) | max|err|≈15.582 | mean|err|≈1.165 | group means≈[0.66,4.42,13.84,25.77] (true=[0,10,20,30])\n",
            "[ep  4900] loss=4.767116e-02 | all-ones pred≈24.717 (true=30.0) | max|err|≈16.221 | mean|err|≈1.072 | group means≈[0.51,3.89,12.95,24.72] (true=[0,10,20,30])\n",
            "[ep  4950] loss=5.536953e-02 | all-ones pred≈26.497 (true=30.0) | max|err|≈14.750 | mean|err|≈1.326 | group means≈[0.88,5.10,14.68,26.50] (true=[0,10,20,30])\n",
            "[ep  5000] loss=6.049442e-02 | all-ones pred≈25.828 (true=30.0) | max|err|≈15.212 | mean|err|≈1.215 | group means≈[0.72,4.68,14.07,25.83] (true=[0,10,20,30])\n",
            "[ep  5050] loss=4.174878e-02 | all-ones pred≈25.361 (true=30.0) | max|err|≈15.729 | mean|err|≈1.182 | group means≈[0.68,4.35,13.55,25.36] (true=[0,10,20,30])\n",
            "[ep  5100] loss=4.821116e-02 | all-ones pred≈25.251 (true=30.0) | max|err|≈15.783 | mean|err|≈1.121 | group means≈[0.58,4.20,13.45,25.25] (true=[0,10,20,30])\n",
            "[ep  5150] loss=2.173073e-02 | all-ones pred≈26.046 (true=30.0) | max|err|≈15.228 | mean|err|≈1.149 | group means≈[0.65,4.55,14.12,26.05] (true=[0,10,20,30])\n",
            "[ep  5200] loss=4.750831e-02 | all-ones pred≈25.134 (true=30.0) | max|err|≈15.896 | mean|err|≈1.115 | group means≈[0.58,4.19,13.40,25.13] (true=[0,10,20,30])\n",
            "[ep  5250] loss=3.429082e-02 | all-ones pred≈25.811 (true=30.0) | max|err|≈15.289 | mean|err|≈1.186 | group means≈[0.69,4.54,13.95,25.81] (true=[0,10,20,30])\n",
            "[ep  5300] loss=5.733874e-02 | all-ones pred≈26.027 (true=30.0) | max|err|≈14.946 | mean|err|≈1.233 | group means≈[0.75,4.82,14.26,26.03] (true=[0,10,20,30])\n",
            "[ep  5350] loss=4.742574e-02 | all-ones pred≈25.634 (true=30.0) | max|err|≈15.659 | mean|err|≈1.162 | group means≈[0.65,4.35,13.73,25.63] (true=[0,10,20,30])\n",
            "[ep  5400] loss=7.979119e-02 | all-ones pred≈25.412 (true=30.0) | max|err|≈15.681 | mean|err|≈1.137 | group means≈[0.62,4.31,13.60,25.41] (true=[0,10,20,30])\n",
            "[ep  5450] loss=1.454284e-01 | all-ones pred≈25.766 (true=30.0) | max|err|≈15.260 | mean|err|≈1.197 | group means≈[0.71,4.58,13.94,25.77] (true=[0,10,20,30])\n",
            "[ep  5500] loss=1.112375e-01 | all-ones pred≈25.834 (true=30.0) | max|err|≈15.280 | mean|err|≈1.180 | group means≈[0.68,4.54,14.02,25.83] (true=[0,10,20,30])\n",
            "[ep  5550] loss=1.874647e-01 | all-ones pred≈25.549 (true=30.0) | max|err|≈15.816 | mean|err|≈1.084 | group means≈[0.52,4.16,13.64,25.55] (true=[0,10,20,30])\n",
            "[ep  5600] loss=2.176388e-02 | all-ones pred≈26.059 (true=30.0) | max|err|≈15.087 | mean|err|≈1.270 | group means≈[0.80,4.81,14.31,26.06] (true=[0,10,20,30])\n",
            "[ep  5650] loss=7.178513e-02 | all-ones pred≈25.204 (true=30.0) | max|err|≈15.763 | mean|err|≈1.117 | group means≈[0.56,4.22,13.48,25.20] (true=[0,10,20,30])\n",
            "[ep  5700] loss=4.552867e-02 | all-ones pred≈25.683 (true=30.0) | max|err|≈15.742 | mean|err|≈1.177 | group means≈[0.67,4.35,13.71,25.68] (true=[0,10,20,30])\n",
            "[ep  5750] loss=5.919026e-02 | all-ones pred≈25.522 (true=30.0) | max|err|≈15.408 | mean|err|≈1.152 | group means≈[0.62,4.42,13.75,25.52] (true=[0,10,20,30])\n",
            "[ep  5800] loss=6.059463e-02 | all-ones pred≈25.645 (true=30.0) | max|err|≈15.369 | mean|err|≈1.203 | group means≈[0.71,4.47,13.80,25.64] (true=[0,10,20,30])\n",
            "[ep  5850] loss=6.158035e-02 | all-ones pred≈26.081 (true=30.0) | max|err|≈14.714 | mean|err|≈1.269 | group means≈[0.80,4.96,14.43,26.08] (true=[0,10,20,30])\n",
            "[ep  5900] loss=6.515275e-02 | all-ones pred≈25.937 (true=30.0) | max|err|≈15.234 | mean|err|≈1.182 | group means≈[0.69,4.58,14.10,25.94] (true=[0,10,20,30])\n",
            "[ep  5950] loss=6.252366e-02 | all-ones pred≈25.051 (true=30.0) | max|err|≈15.611 | mean|err|≈1.128 | group means≈[0.58,4.21,13.38,25.05] (true=[0,10,20,30])\n",
            "[ep  6000] loss=6.589220e-02 | all-ones pred≈25.478 (true=30.0) | max|err|≈15.662 | mean|err|≈1.148 | group means≈[0.64,4.29,13.60,25.48] (true=[0,10,20,30])\n",
            "[ep  6050] loss=6.390359e-02 | all-ones pred≈25.627 (true=30.0) | max|err|≈15.278 | mean|err|≈1.194 | group means≈[0.70,4.53,13.89,25.63] (true=[0,10,20,30])\n",
            "[ep  6100] loss=6.671822e-02 | all-ones pred≈25.656 (true=30.0) | max|err|≈15.333 | mean|err|≈1.165 | group means≈[0.67,4.48,13.88,25.66] (true=[0,10,20,30])\n",
            "[ep  6150] loss=6.186271e-02 | all-ones pred≈25.597 (true=30.0) | max|err|≈15.081 | mean|err|≈1.154 | group means≈[0.63,4.56,13.96,25.60] (true=[0,10,20,30])\n",
            "[ep  6200] loss=6.557923e-02 | all-ones pred≈25.794 (true=30.0) | max|err|≈15.465 | mean|err|≈1.137 | group means≈[0.63,4.39,13.89,25.79] (true=[0,10,20,30])\n",
            "[ep  6250] loss=5.935053e-02 | all-ones pred≈25.447 (true=30.0) | max|err|≈15.287 | mean|err|≈1.127 | group means≈[0.60,4.43,13.79,25.45] (true=[0,10,20,30])\n",
            "[ep  6300] loss=6.305289e-02 | all-ones pred≈25.939 (true=30.0) | max|err|≈14.840 | mean|err|≈1.198 | group means≈[0.72,4.71,14.18,25.94] (true=[0,10,20,30])\n",
            "[ep  6350] loss=6.731614e-02 | all-ones pred≈25.861 (true=30.0) | max|err|≈14.899 | mean|err|≈1.202 | group means≈[0.71,4.75,14.20,25.86] (true=[0,10,20,30])\n",
            "[ep  6400] loss=4.579985e-02 | all-ones pred≈25.931 (true=30.0) | max|err|≈14.833 | mean|err|≈1.192 | group means≈[0.71,4.72,14.19,25.93] (true=[0,10,20,30])\n",
            "[ep  6450] loss=5.730282e-02 | all-ones pred≈25.429 (true=30.0) | max|err|≈15.110 | mean|err|≈1.184 | group means≈[0.69,4.54,13.77,25.43] (true=[0,10,20,30])\n",
            "[ep  6500] loss=4.353127e-02 | all-ones pred≈25.916 (true=30.0) | max|err|≈14.810 | mean|err|≈1.197 | group means≈[0.72,4.71,14.16,25.92] (true=[0,10,20,30])\n",
            "[ep  6550] loss=5.684801e-02 | all-ones pred≈25.534 (true=30.0) | max|err|≈15.032 | mean|err|≈1.159 | group means≈[0.65,4.53,13.88,25.53] (true=[0,10,20,30])\n",
            "[ep  6600] loss=4.355591e-02 | all-ones pred≈26.153 (true=30.0) | max|err|≈14.599 | mean|err|≈1.207 | group means≈[0.74,4.81,14.33,26.15] (true=[0,10,20,30])\n",
            "[ep  6650] loss=5.887780e-02 | all-ones pred≈25.214 (true=30.0) | max|err|≈15.253 | mean|err|≈1.082 | group means≈[0.55,4.28,13.62,25.21] (true=[0,10,20,30])\n",
            "[ep  6700] loss=4.673882e-02 | all-ones pred≈25.944 (true=30.0) | max|err|≈14.597 | mean|err|≈1.214 | group means≈[0.74,4.77,14.24,25.94] (true=[0,10,20,30])\n",
            "[ep  6750] loss=7.538120e-02 | all-ones pred≈25.433 (true=30.0) | max|err|≈15.205 | mean|err|≈1.058 | group means≈[0.53,4.22,13.65,25.43] (true=[0,10,20,30])\n",
            "[ep  6800] loss=1.539902e-01 | all-ones pred≈26.360 (true=30.0) | max|err|≈14.141 | mean|err|≈1.236 | group means≈[0.78,5.05,14.62,26.36] (true=[0,10,20,30])\n",
            "[ep  6850] loss=1.120670e-01 | all-ones pred≈25.660 (true=30.0) | max|err|≈14.828 | mean|err|≈1.144 | group means≈[0.65,4.54,13.89,25.66] (true=[0,10,20,30])\n",
            "[ep  6900] loss=3.381808e-01 | all-ones pred≈27.191 (true=30.0) | max|err|≈14.049 | mean|err|≈1.336 | group means≈[0.90,5.21,15.13,27.19] (true=[0,10,20,30])\n",
            "[ep  6950] loss=9.049276e-02 | all-ones pred≈26.483 (true=30.0) | max|err|≈14.360 | mean|err|≈1.155 | group means≈[0.68,4.86,14.59,26.48] (true=[0,10,20,30])\n",
            "[ep  7000] loss=1.272999e-02 | all-ones pred≈25.952 (true=30.0) | max|err|≈14.256 | mean|err|≈1.231 | group means≈[0.75,4.99,14.40,25.95] (true=[0,10,20,30])\n",
            "[ep  7050] loss=1.039278e-01 | all-ones pred≈26.265 (true=30.0) | max|err|≈14.355 | mean|err|≈1.195 | group means≈[0.73,4.86,14.44,26.26] (true=[0,10,20,30])\n",
            "[ep  7100] loss=1.644148e-02 | all-ones pred≈25.369 (true=30.0) | max|err|≈14.947 | mean|err|≈1.094 | group means≈[0.56,4.37,13.69,25.37] (true=[0,10,20,30])\n",
            "[ep  7150] loss=5.672638e-02 | all-ones pred≈25.948 (true=30.0) | max|err|≈14.456 | mean|err|≈1.188 | group means≈[0.71,4.76,14.20,25.95] (true=[0,10,20,30])\n",
            "[ep  7200] loss=2.963138e-02 | all-ones pred≈25.406 (true=30.0) | max|err|≈14.708 | mean|err|≈1.124 | group means≈[0.61,4.52,13.80,25.41] (true=[0,10,20,30])\n",
            "[ep  7250] loss=5.917196e-02 | all-ones pred≈26.463 (true=30.0) | max|err|≈14.032 | mean|err|≈1.177 | group means≈[0.72,5.04,14.69,26.46] (true=[0,10,20,30])\n",
            "[ep  7300] loss=2.842125e-02 | all-ones pred≈25.507 (true=30.0) | max|err|≈14.426 | mean|err|≈1.121 | group means≈[0.61,4.65,13.99,25.51] (true=[0,10,20,30])\n",
            "[ep  7350] loss=6.650458e-02 | all-ones pred≈26.271 (true=30.0) | max|err|≈14.187 | mean|err|≈1.199 | group means≈[0.74,4.94,14.49,26.27] (true=[0,10,20,30])\n",
            "[ep  7400] loss=2.738798e-02 | all-ones pred≈25.357 (true=30.0) | max|err|≈14.607 | mean|err|≈1.103 | group means≈[0.60,4.53,13.78,25.36] (true=[0,10,20,30])\n",
            "[ep  7450] loss=7.448045e-02 | all-ones pred≈25.771 (true=30.0) | max|err|≈14.338 | mean|err|≈1.157 | group means≈[0.68,4.69,14.08,25.77] (true=[0,10,20,30])\n",
            "[ep  7500] loss=2.783737e-02 | all-ones pred≈25.663 (true=30.0) | max|err|≈14.315 | mean|err|≈1.148 | group means≈[0.65,4.73,14.05,25.66] (true=[0,10,20,30])\n",
            "[ep  7550] loss=7.986905e-02 | all-ones pred≈26.161 (true=30.0) | max|err|≈14.034 | mean|err|≈1.166 | group means≈[0.70,4.89,14.38,26.16] (true=[0,10,20,30])\n",
            "[ep  7600] loss=2.826679e-02 | all-ones pred≈25.434 (true=30.0) | max|err|≈14.641 | mean|err|≈1.081 | group means≈[0.57,4.47,13.76,25.43] (true=[0,10,20,30])\n",
            "[ep  7650] loss=7.628595e-02 | all-ones pred≈26.233 (true=30.0) | max|err|≈13.880 | mean|err|≈1.175 | group means≈[0.72,5.03,14.61,26.23] (true=[0,10,20,30])\n",
            "[ep  7700] loss=5.079177e-02 | all-ones pred≈25.892 (true=30.0) | max|err|≈13.925 | mean|err|≈1.160 | group means≈[0.70,4.98,14.40,25.89] (true=[0,10,20,30])\n",
            "[ep  7750] loss=5.245259e-02 | all-ones pred≈25.987 (true=30.0) | max|err|≈13.919 | mean|err|≈1.140 | group means≈[0.67,4.90,14.38,25.99] (true=[0,10,20,30])\n",
            "[ep  7800] loss=5.115198e-02 | all-ones pred≈25.677 (true=30.0) | max|err|≈14.184 | mean|err|≈1.120 | group means≈[0.63,4.81,14.19,25.68] (true=[0,10,20,30])\n",
            "[ep  7850] loss=8.783215e-02 | all-ones pred≈25.823 (true=30.0) | max|err|≈13.999 | mean|err|≈1.177 | group means≈[0.72,4.91,14.22,25.82] (true=[0,10,20,30])\n",
            "[ep  7900] loss=3.156351e-02 | all-ones pred≈25.785 (true=30.0) | max|err|≈14.076 | mean|err|≈1.110 | group means≈[0.62,4.83,14.24,25.78] (true=[0,10,20,30])\n",
            "[ep  7950] loss=5.070178e-02 | all-ones pred≈26.309 (true=30.0) | max|err|≈13.640 | mean|err|≈1.173 | group means≈[0.72,5.06,14.57,26.31] (true=[0,10,20,30])\n",
            "[ep  8000] loss=3.823899e-02 | all-ones pred≈25.484 (true=30.0) | max|err|≈14.457 | mean|err|≈1.071 | group means≈[0.56,4.53,13.81,25.48] (true=[0,10,20,30])\n",
            "[ep  8050] loss=6.336702e-02 | all-ones pred≈26.028 (true=30.0) | max|err|≈13.630 | mean|err|≈1.165 | group means≈[0.71,5.02,14.42,26.03] (true=[0,10,20,30])\n",
            "[ep  8100] loss=9.021462e-02 | all-ones pred≈25.453 (true=30.0) | max|err|≈14.087 | mean|err|≈1.094 | group means≈[0.59,4.66,13.91,25.45] (true=[0,10,20,30])\n",
            "[ep  8150] loss=1.343431e-01 | all-ones pred≈26.147 (true=30.0) | max|err|≈13.364 | mean|err|≈1.185 | group means≈[0.75,5.23,14.63,26.15] (true=[0,10,20,30])\n",
            "[ep  8200] loss=8.236957e-02 | all-ones pred≈25.769 (true=30.0) | max|err|≈13.939 | mean|err|≈1.094 | group means≈[0.62,4.82,14.17,25.77] (true=[0,10,20,30])\n",
            "[ep  8250] loss=7.571246e-02 | all-ones pred≈26.084 (true=30.0) | max|err|≈14.011 | mean|err|≈1.087 | group means≈[0.55,4.75,14.24,26.08] (true=[0,10,20,30])\n",
            "[ep  8300] loss=3.773884e-02 | all-ones pred≈26.736 (true=30.0) | max|err|≈13.138 | mean|err|≈1.234 | group means≈[0.82,5.50,15.07,26.74] (true=[0,10,20,30])\n",
            "[ep  8350] loss=6.205436e-02 | all-ones pred≈26.155 (true=30.0) | max|err|≈13.553 | mean|err|≈1.074 | group means≈[0.58,4.94,14.42,26.15] (true=[0,10,20,30])\n",
            "[ep  8400] loss=4.425777e-02 | all-ones pred≈26.089 (true=30.0) | max|err|≈13.665 | mean|err|≈1.070 | group means≈[0.60,4.87,14.30,26.09] (true=[0,10,20,30])\n",
            "[ep  8450] loss=4.990415e-02 | all-ones pred≈26.141 (true=30.0) | max|err|≈13.614 | mean|err|≈1.058 | group means≈[0.57,4.96,14.45,26.14] (true=[0,10,20,30])\n",
            "[ep  8500] loss=4.614501e-02 | all-ones pred≈26.209 (true=30.0) | max|err|≈13.568 | mean|err|≈1.082 | group means≈[0.62,4.97,14.43,26.21] (true=[0,10,20,30])\n",
            "[ep  8550] loss=5.118010e-02 | all-ones pred≈25.581 (true=30.0) | max|err|≈13.804 | mean|err|≈1.004 | group means≈[0.48,4.61,13.90,25.58] (true=[0,10,20,30])\n",
            "[ep  8600] loss=5.464407e-02 | all-ones pred≈26.204 (true=30.0) | max|err|≈13.395 | mean|err|≈1.102 | group means≈[0.64,4.99,14.40,26.20] (true=[0,10,20,30])\n",
            "[ep  8650] loss=5.356503e-02 | all-ones pred≈25.679 (true=30.0) | max|err|≈13.568 | mean|err|≈1.083 | group means≈[0.62,4.92,14.11,25.68] (true=[0,10,20,30])\n",
            "[ep  8700] loss=5.943023e-02 | all-ones pred≈25.965 (true=30.0) | max|err|≈13.684 | mean|err|≈1.059 | group means≈[0.58,4.75,14.14,25.96] (true=[0,10,20,30])\n",
            "[ep  8750] loss=5.140506e-02 | all-ones pred≈25.863 (true=30.0) | max|err|≈13.336 | mean|err|≈1.056 | group means≈[0.58,4.93,14.22,25.86] (true=[0,10,20,30])\n",
            "[ep  8800] loss=5.363727e-02 | all-ones pred≈26.379 (true=30.0) | max|err|≈13.104 | mean|err|≈1.085 | group means≈[0.64,5.14,14.62,26.38] (true=[0,10,20,30])\n",
            "[ep  8850] loss=4.991614e-02 | all-ones pred≈25.560 (true=30.0) | max|err|≈13.477 | mean|err|≈1.042 | group means≈[0.55,4.78,13.95,25.56] (true=[0,10,20,30])\n",
            "[ep  8900] loss=5.281399e-02 | all-ones pred≈26.488 (true=30.0) | max|err|≈12.969 | mean|err|≈1.106 | group means≈[0.67,5.23,14.66,26.49] (true=[0,10,20,30])\n",
            "[ep  8950] loss=5.104935e-02 | all-ones pred≈25.741 (true=30.0) | max|err|≈13.460 | mean|err|≈1.017 | group means≈[0.54,4.77,14.04,25.74] (true=[0,10,20,30])\n",
            "[ep  9000] loss=5.106133e-02 | all-ones pred≈26.594 (true=30.0) | max|err|≈12.581 | mean|err|≈1.118 | group means≈[0.69,5.37,14.82,26.59] (true=[0,10,20,30])\n",
            "[ep  9050] loss=4.816992e-02 | all-ones pred≈26.054 (true=30.0) | max|err|≈12.762 | mean|err|≈1.028 | group means≈[0.57,5.07,14.42,26.05] (true=[0,10,20,30])\n",
            "[ep  9100] loss=3.178997e-02 | all-ones pred≈26.011 (true=30.0) | max|err|≈12.881 | mean|err|≈1.067 | group means≈[0.62,5.06,14.29,26.01] (true=[0,10,20,30])\n",
            "[ep  9150] loss=4.559808e-02 | all-ones pred≈25.542 (true=30.0) | max|err|≈13.358 | mean|err|≈0.996 | group means≈[0.50,4.67,13.80,25.54] (true=[0,10,20,30])\n",
            "[ep  9200] loss=3.130135e-02 | all-ones pred≈26.024 (true=30.0) | max|err|≈12.851 | mean|err|≈1.019 | group means≈[0.56,4.99,14.29,26.02] (true=[0,10,20,30])\n",
            "[ep  9250] loss=4.430889e-02 | all-ones pred≈25.564 (true=30.0) | max|err|≈13.204 | mean|err|≈1.010 | group means≈[0.54,4.77,13.91,25.56] (true=[0,10,20,30])\n",
            "[ep  9300] loss=3.600327e-02 | all-ones pred≈26.251 (true=30.0) | max|err|≈12.334 | mean|err|≈1.098 | group means≈[0.68,5.39,14.64,26.25] (true=[0,10,20,30])\n",
            "[ep  9350] loss=7.872450e-02 | all-ones pred≈26.030 (true=30.0) | max|err|≈12.686 | mean|err|≈1.000 | group means≈[0.53,5.03,14.31,26.03] (true=[0,10,20,30])\n",
            "[ep  9400] loss=1.116844e-01 | all-ones pred≈26.325 (true=30.0) | max|err|≈12.337 | mean|err|≈1.021 | group means≈[0.58,5.26,14.57,26.33] (true=[0,10,20,30])\n",
            "[ep  9450] loss=2.227272e-02 | all-ones pred≈26.056 (true=30.0) | max|err|≈12.630 | mean|err|≈0.992 | group means≈[0.52,5.00,14.27,26.06] (true=[0,10,20,30])\n",
            "[ep  9500] loss=4.568028e-02 | all-ones pred≈26.210 (true=30.0) | max|err|≈12.493 | mean|err|≈1.005 | group means≈[0.55,5.08,14.39,26.21] (true=[0,10,20,30])\n",
            "[ep  9550] loss=8.614242e-03 | all-ones pred≈26.047 (true=30.0) | max|err|≈12.218 | mean|err|≈1.026 | group means≈[0.57,5.32,14.47,26.05] (true=[0,10,20,30])\n",
            "[ep  9600] loss=6.244222e-02 | all-ones pred≈26.335 (true=30.0) | max|err|≈12.218 | mean|err|≈0.982 | group means≈[0.54,5.19,14.47,26.33] (true=[0,10,20,30])\n",
            "[ep  9650] loss=3.455533e-02 | all-ones pred≈26.255 (true=30.0) | max|err|≈12.059 | mean|err|≈1.015 | group means≈[0.57,5.37,14.56,26.25] (true=[0,10,20,30])\n",
            "[ep  9700] loss=1.975887e-02 | all-ones pred≈26.659 (true=30.0) | max|err|≈11.956 | mean|err|≈1.072 | group means≈[0.66,5.56,14.75,26.66] (true=[0,10,20,30])\n",
            "[ep  9750] loss=1.666041e-02 | all-ones pred≈26.400 (true=30.0) | max|err|≈11.805 | mean|err|≈1.030 | group means≈[0.60,5.52,14.74,26.40] (true=[0,10,20,30])\n",
            "[ep  9800] loss=6.180331e-02 | all-ones pred≈26.973 (true=30.0) | max|err|≈11.677 | mean|err|≈1.053 | group means≈[0.64,5.56,14.96,26.97] (true=[0,10,20,30])\n",
            "[ep  9850] loss=2.083475e-02 | all-ones pred≈26.316 (true=30.0) | max|err|≈11.854 | mean|err|≈1.009 | group means≈[0.56,5.49,14.63,26.32] (true=[0,10,20,30])\n",
            "[ep  9900] loss=3.345891e-02 | all-ones pred≈26.452 (true=30.0) | max|err|≈11.700 | mean|err|≈1.027 | group means≈[0.61,5.47,14.68,26.45] (true=[0,10,20,30])\n",
            "[ep  9950] loss=1.857775e-02 | all-ones pred≈25.857 (true=30.0) | max|err|≈12.179 | mean|err|≈0.944 | group means≈[0.46,5.07,14.11,25.86] (true=[0,10,20,30])\n",
            "[ep 10000] loss=4.156368e-02 | all-ones pred≈26.439 (true=30.0) | max|err|≈11.780 | mean|err|≈0.983 | group means≈[0.55,5.32,14.53,26.44] (true=[0,10,20,30])\n",
            "[ep 10050] loss=2.952267e-02 | all-ones pred≈25.693 (true=30.0) | max|err|≈12.465 | mean|err|≈0.912 | group means≈[0.42,4.79,13.82,25.69] (true=[0,10,20,30])\n",
            "[ep 10100] loss=4.220055e-02 | all-ones pred≈26.553 (true=30.0) | max|err|≈11.687 | mean|err|≈0.951 | group means≈[0.52,5.35,14.60,26.55] (true=[0,10,20,30])\n",
            "[ep 10150] loss=4.890373e-02 | all-ones pred≈25.954 (true=30.0) | max|err|≈11.849 | mean|err|≈0.945 | group means≈[0.51,5.24,14.19,25.95] (true=[0,10,20,30])\n",
            "[ep 10200] loss=1.340390e-01 | all-ones pred≈26.755 (true=30.0) | max|err|≈11.412 | mean|err|≈1.026 | group means≈[0.62,5.64,14.85,26.76] (true=[0,10,20,30])\n",
            "[ep 10250] loss=8.299455e-02 | all-ones pred≈27.055 (true=30.0) | max|err|≈11.430 | mean|err|≈0.988 | group means≈[0.59,5.89,15.09,27.05] (true=[0,10,20,30])\n",
            "[ep 10300] loss=9.678811e-02 | all-ones pred≈26.483 (true=30.0) | max|err|≈11.457 | mean|err|≈0.905 | group means≈[0.47,5.39,14.53,26.48] (true=[0,10,20,30])\n",
            "[ep 10350] loss=1.586907e-01 | all-ones pred≈26.730 (true=30.0) | max|err|≈11.273 | mean|err|≈0.941 | group means≈[0.52,5.48,14.61,26.73] (true=[0,10,20,30])\n",
            "[ep 10400] loss=3.579865e-02 | all-ones pred≈26.690 (true=30.0) | max|err|≈11.290 | mean|err|≈0.929 | group means≈[0.50,5.45,14.57,26.69] (true=[0,10,20,30])\n",
            "[ep 10450] loss=2.219427e-02 | all-ones pred≈26.208 (true=30.0) | max|err|≈11.612 | mean|err|≈0.891 | group means≈[0.41,5.13,14.15,26.21] (true=[0,10,20,30])\n",
            "[ep 10500] loss=4.943563e-02 | all-ones pred≈26.746 (true=30.0) | max|err|≈11.274 | mean|err|≈0.919 | group means≈[0.50,5.50,14.61,26.75] (true=[0,10,20,30])\n",
            "[ep 10550] loss=1.866355e-02 | all-ones pred≈26.503 (true=30.0) | max|err|≈10.936 | mean|err|≈0.911 | group means≈[0.49,5.67,14.58,26.50] (true=[0,10,20,30])\n",
            "[ep 10600] loss=6.161939e-02 | all-ones pred≈26.993 (true=30.0) | max|err|≈11.123 | mean|err|≈0.959 | group means≈[0.56,5.79,14.90,26.99] (true=[0,10,20,30])\n",
            "[ep 10650] loss=1.821470e-02 | all-ones pred≈26.590 (true=30.0) | max|err|≈10.848 | mean|err|≈0.889 | group means≈[0.46,5.57,14.58,26.59] (true=[0,10,20,30])\n",
            "[ep 10700] loss=5.985218e-02 | all-ones pred≈26.712 (true=30.0) | max|err|≈11.010 | mean|err|≈0.936 | group means≈[0.51,5.44,14.53,26.71] (true=[0,10,20,30])\n",
            "[ep 10750] loss=1.847426e-02 | all-ones pred≈26.204 (true=30.0) | max|err|≈10.789 | mean|err|≈0.895 | group means≈[0.47,5.56,14.36,26.20] (true=[0,10,20,30])\n",
            "[ep 10800] loss=5.722302e-02 | all-ones pred≈26.959 (true=30.0) | max|err|≈10.895 | mean|err|≈0.916 | group means≈[0.51,5.72,14.77,26.96] (true=[0,10,20,30])\n",
            "[ep 10850] loss=1.812075e-02 | all-ones pred≈26.291 (true=30.0) | max|err|≈10.972 | mean|err|≈0.848 | group means≈[0.39,5.33,14.28,26.29] (true=[0,10,20,30])\n",
            "[ep 10900] loss=5.598961e-02 | all-ones pred≈26.756 (true=30.0) | max|err|≈10.734 | mean|err|≈0.864 | group means≈[0.44,5.51,14.58,26.76] (true=[0,10,20,30])\n",
            "[ep 10950] loss=1.858512e-02 | all-ones pred≈26.513 (true=30.0) | max|err|≈10.745 | mean|err|≈0.900 | group means≈[0.48,5.70,14.58,26.51] (true=[0,10,20,30])\n",
            "[ep 11000] loss=5.234649e-02 | all-ones pred≈27.491 (true=30.0) | max|err|≈11.596 | mean|err|≈1.013 | group means≈[0.68,6.49,15.47,27.49] (true=[0,10,20,30])\n",
            "[ep 11050] loss=1.824912e-02 | all-ones pred≈25.968 (true=30.0) | max|err|≈11.140 | mean|err|≈0.830 | group means≈[0.35,5.14,13.98,25.97] (true=[0,10,20,30])\n",
            "[ep 11100] loss=5.277286e-02 | all-ones pred≈27.001 (true=30.0) | max|err|≈10.909 | mean|err|≈0.892 | group means≈[0.50,5.85,14.86,27.00] (true=[0,10,20,30])\n",
            "[ep 11150] loss=1.848198e-02 | all-ones pred≈26.632 (true=30.0) | max|err|≈10.740 | mean|err|≈0.886 | group means≈[0.48,5.79,14.64,26.63] (true=[0,10,20,30])\n",
            "[ep 11200] loss=5.274432e-02 | all-ones pred≈26.664 (true=30.0) | max|err|≈10.602 | mean|err|≈0.870 | group means≈[0.47,5.69,14.59,26.66] (true=[0,10,20,30])\n",
            "[ep 11250] loss=2.489218e-02 | all-ones pred≈26.679 (true=30.0) | max|err|≈10.759 | mean|err|≈0.872 | group means≈[0.46,5.89,14.70,26.68] (true=[0,10,20,30])\n",
            "[ep 11300] loss=5.645060e-02 | all-ones pred≈26.737 (true=30.0) | max|err|≈10.523 | mean|err|≈0.837 | group means≈[0.42,5.54,14.55,26.74] (true=[0,10,20,30])\n",
            "[ep 11350] loss=2.716828e-02 | all-ones pred≈26.267 (true=30.0) | max|err|≈10.538 | mean|err|≈0.851 | group means≈[0.40,5.46,14.23,26.27] (true=[0,10,20,30])\n",
            "[ep 11400] loss=3.503885e-02 | all-ones pred≈26.982 (true=30.0) | max|err|≈10.779 | mean|err|≈0.853 | group means≈[0.47,5.94,14.83,26.98] (true=[0,10,20,30])\n",
            "[ep 11450] loss=3.241323e-02 | all-ones pred≈26.409 (true=30.0) | max|err|≈10.265 | mean|err|≈0.847 | group means≈[0.43,5.59,14.35,26.41] (true=[0,10,20,30])\n",
            "[ep 11500] loss=6.984517e-02 | all-ones pred≈27.005 (true=30.0) | max|err|≈10.731 | mean|err|≈0.822 | group means≈[0.42,5.82,14.82,27.01] (true=[0,10,20,30])\n",
            "[ep 11550] loss=9.690493e-02 | all-ones pred≈26.776 (true=30.0) | max|err|≈10.494 | mean|err|≈0.830 | group means≈[0.41,5.75,14.57,26.78] (true=[0,10,20,30])\n",
            "[ep 11600] loss=6.290320e-02 | all-ones pred≈26.961 (true=30.0) | max|err|≈10.758 | mean|err|≈0.853 | group means≈[0.48,6.06,14.84,26.96] (true=[0,10,20,30])\n",
            "[ep 11650] loss=5.020437e-02 | all-ones pred≈27.066 (true=30.0) | max|err|≈10.793 | mean|err|≈0.842 | group means≈[0.47,6.06,14.88,27.07] (true=[0,10,20,30])\n",
            "[ep 11700] loss=6.564843e-03 | all-ones pred≈26.098 (true=30.0) | max|err|≈10.143 | mean|err|≈0.773 | group means≈[0.31,5.41,14.02,26.10] (true=[0,10,20,30])\n",
            "[ep 11750] loss=3.487048e-02 | all-ones pred≈26.884 (true=30.0) | max|err|≈10.536 | mean|err|≈0.804 | group means≈[0.40,5.78,14.63,26.88] (true=[0,10,20,30])\n",
            "[ep 11800] loss=8.027461e-03 | all-ones pred≈26.333 (true=30.0) | max|err|≈10.157 | mean|err|≈0.788 | group means≈[0.36,5.66,14.31,26.33] (true=[0,10,20,30])\n",
            "[ep 11850] loss=4.202153e-02 | all-ones pred≈27.673 (true=30.0) | max|err|≈11.335 | mean|err|≈0.880 | group means≈[0.55,6.65,15.53,27.67] (true=[0,10,20,30])\n",
            "[ep 11900] loss=4.460724e-02 | all-ones pred≈26.826 (true=30.0) | max|err|≈10.717 | mean|err|≈0.865 | group means≈[0.49,6.25,14.85,26.83] (true=[0,10,20,30])\n",
            "[ep 11950] loss=3.910773e-02 | all-ones pred≈27.099 (true=30.0) | max|err|≈10.701 | mean|err|≈0.842 | group means≈[0.48,6.17,14.92,27.10] (true=[0,10,20,30])\n",
            "[ep 12000] loss=9.034011e-03 | all-ones pred≈26.680 (true=30.0) | max|err|≈10.336 | mean|err|≈0.811 | group means≈[0.38,5.83,14.55,26.68] (true=[0,10,20,30])\n",
            "[ep 12050] loss=2.200597e-02 | all-ones pred≈27.483 (true=30.0) | max|err|≈11.040 | mean|err|≈0.852 | group means≈[0.52,6.53,15.32,27.48] (true=[0,10,20,30])\n",
            "[ep 12100] loss=1.951776e-02 | all-ones pred≈26.960 (true=30.0) | max|err|≈10.599 | mean|err|≈0.814 | group means≈[0.44,6.15,14.85,26.96] (true=[0,10,20,30])\n",
            "[ep 12150] loss=4.815792e-02 | all-ones pred≈27.105 (true=30.0) | max|err|≈10.599 | mean|err|≈0.811 | group means≈[0.44,6.06,14.87,27.10] (true=[0,10,20,30])\n",
            "[ep 12200] loss=1.131922e-02 | all-ones pred≈26.908 (true=30.0) | max|err|≈10.555 | mean|err|≈0.813 | group means≈[0.42,6.18,14.86,26.91] (true=[0,10,20,30])\n",
            "[ep 12250] loss=3.785794e-02 | all-ones pred≈27.110 (true=30.0) | max|err|≈10.558 | mean|err|≈0.813 | group means≈[0.44,6.11,14.88,27.11] (true=[0,10,20,30])\n",
            "[ep 12300] loss=2.093774e-02 | all-ones pred≈26.687 (true=30.0) | max|err|≈10.343 | mean|err|≈0.775 | group means≈[0.38,6.08,14.66,26.69] (true=[0,10,20,30])\n",
            "[ep 12350] loss=3.979942e-02 | all-ones pred≈27.529 (true=30.0) | max|err|≈10.902 | mean|err|≈0.838 | group means≈[0.50,6.54,15.32,27.53] (true=[0,10,20,30])\n",
            "[ep 12400] loss=2.453188e-02 | all-ones pred≈26.689 (true=30.0) | max|err|≈10.160 | mean|err|≈0.771 | group means≈[0.35,5.88,14.56,26.69] (true=[0,10,20,30])\n",
            "[ep 12450] loss=6.267551e-02 | all-ones pred≈26.948 (true=30.0) | max|err|≈10.187 | mean|err|≈0.756 | group means≈[0.36,5.83,14.63,26.95] (true=[0,10,20,30])\n",
            "[ep 12500] loss=1.197805e-01 | all-ones pred≈27.117 (true=30.0) | max|err|≈10.509 | mean|err|≈0.781 | group means≈[0.41,6.17,14.88,27.12] (true=[0,10,20,30])\n",
            "[ep 12550] loss=6.718703e-02 | all-ones pred≈26.754 (true=30.0) | max|err|≈10.177 | mean|err|≈0.760 | group means≈[0.38,5.96,14.58,26.75] (true=[0,10,20,30])\n",
            "[ep 12600] loss=3.628923e-02 | all-ones pred≈26.844 (true=30.0) | max|err|≈10.201 | mean|err|≈0.765 | group means≈[0.39,6.04,14.62,26.84] (true=[0,10,20,30])\n",
            "[ep 12650] loss=4.058673e-02 | all-ones pred≈27.121 (true=30.0) | max|err|≈10.547 | mean|err|≈0.762 | group means≈[0.41,6.45,14.94,27.12] (true=[0,10,20,30])\n",
            "[ep 12700] loss=4.852299e-02 | all-ones pred≈26.968 (true=30.0) | max|err|≈10.408 | mean|err|≈0.792 | group means≈[0.43,6.18,14.76,26.97] (true=[0,10,20,30])\n",
            "[ep 12750] loss=3.257729e-02 | all-ones pred≈26.546 (true=30.0) | max|err|≈9.994 | mean|err|≈0.734 | group means≈[0.33,5.92,14.43,26.55] (true=[0,10,20,30])\n",
            "[ep 12800] loss=1.310185e-02 | all-ones pred≈27.339 (true=30.0) | max|err|≈10.688 | mean|err|≈0.790 | group means≈[0.45,6.51,15.14,27.34] (true=[0,10,20,30])\n",
            "[ep 12850] loss=3.409720e-02 | all-ones pred≈27.079 (true=30.0) | max|err|≈10.497 | mean|err|≈0.779 | group means≈[0.42,6.40,15.01,27.08] (true=[0,10,20,30])\n",
            "[ep 12900] loss=1.402295e-02 | all-ones pred≈27.887 (true=30.0) | max|err|≈11.143 | mean|err|≈0.830 | group means≈[0.53,6.94,15.69,27.89] (true=[0,10,20,30])\n",
            "[ep 12950] loss=3.559729e-02 | all-ones pred≈26.751 (true=30.0) | max|err|≈10.103 | mean|err|≈0.753 | group means≈[0.37,6.18,14.67,26.75] (true=[0,10,20,30])\n",
            "[ep 13000] loss=1.742095e-02 | all-ones pred≈27.469 (true=30.0) | max|err|≈10.753 | mean|err|≈0.796 | group means≈[0.47,6.65,15.31,27.47] (true=[0,10,20,30])\n",
            "[ep 13050] loss=1.375204e-02 | all-ones pred≈26.692 (true=30.0) | max|err|≈9.992 | mean|err|≈0.730 | group means≈[0.33,6.02,14.55,26.69] (true=[0,10,20,30])\n",
            "[ep 13100] loss=6.033728e-02 | all-ones pred≈27.275 (true=30.0) | max|err|≈10.499 | mean|err|≈0.779 | group means≈[0.44,6.51,15.11,27.28] (true=[0,10,20,30])\n",
            "[ep 13150] loss=1.300655e-02 | all-ones pred≈26.929 (true=30.0) | max|err|≈10.188 | mean|err|≈0.733 | group means≈[0.34,6.16,14.75,26.93] (true=[0,10,20,30])\n",
            "[ep 13200] loss=2.758827e-02 | all-ones pred≈27.295 (true=30.0) | max|err|≈10.562 | mean|err|≈0.782 | group means≈[0.45,6.62,15.17,27.29] (true=[0,10,20,30])\n",
            "[ep 13250] loss=1.121941e-02 | all-ones pred≈26.692 (true=30.0) | max|err|≈9.996 | mean|err|≈0.718 | group means≈[0.33,6.16,14.65,26.69] (true=[0,10,20,30])\n",
            "[ep 13300] loss=4.909761e-02 | all-ones pred≈27.080 (true=30.0) | max|err|≈10.176 | mean|err|≈0.731 | group means≈[0.37,6.29,14.84,27.08] (true=[0,10,20,30])\n",
            "[ep 13350] loss=2.350420e-02 | all-ones pred≈26.879 (true=30.0) | max|err|≈10.181 | mean|err|≈0.724 | group means≈[0.35,6.32,14.85,26.88] (true=[0,10,20,30])\n",
            "[ep 13400] loss=3.247566e-02 | all-ones pred≈27.337 (true=30.0) | max|err|≈10.456 | mean|err|≈0.751 | group means≈[0.42,6.58,15.14,27.34] (true=[0,10,20,30])\n",
            "[ep 13450] loss=3.175574e-02 | all-ones pred≈26.850 (true=30.0) | max|err|≈10.009 | mean|err|≈0.708 | group means≈[0.33,6.19,14.75,26.85] (true=[0,10,20,30])\n",
            "[ep 13500] loss=6.395406e-02 | all-ones pred≈27.342 (true=30.0) | max|err|≈10.407 | mean|err|≈0.743 | group means≈[0.40,6.52,15.14,27.34] (true=[0,10,20,30])\n",
            "[ep 13550] loss=7.395882e-02 | all-ones pred≈27.495 (true=30.0) | max|err|≈10.611 | mean|err|≈0.743 | group means≈[0.42,6.71,15.34,27.49] (true=[0,10,20,30])\n",
            "[ep 13600] loss=3.742675e-01 | all-ones pred≈27.373 (true=30.0) | max|err|≈10.320 | mean|err|≈0.796 | group means≈[0.46,6.51,15.05,27.37] (true=[0,10,20,30])\n",
            "[ep 13650] loss=2.091212e-02 | all-ones pred≈27.687 (true=30.0) | max|err|≈10.760 | mean|err|≈0.787 | group means≈[0.49,7.00,15.46,27.69] (true=[0,10,20,30])\n",
            "[ep 13700] loss=1.290805e-02 | all-ones pred≈27.009 (true=30.0) | max|err|≈10.240 | mean|err|≈0.697 | group means≈[0.35,6.51,14.89,27.01] (true=[0,10,20,30])\n",
            "[ep 13750] loss=3.915904e-02 | all-ones pred≈27.542 (true=30.0) | max|err|≈10.678 | mean|err|≈0.715 | group means≈[0.37,6.82,15.37,27.54] (true=[0,10,20,30])\n",
            "[ep 13800] loss=1.459099e-02 | all-ones pred≈26.944 (true=30.0) | max|err|≈10.082 | mean|err|≈0.717 | group means≈[0.38,6.45,14.87,26.94] (true=[0,10,20,30])\n",
            "[ep 13850] loss=3.472990e-02 | all-ones pred≈27.538 (true=30.0) | max|err|≈10.488 | mean|err|≈0.756 | group means≈[0.44,6.78,15.33,27.54] (true=[0,10,20,30])\n",
            "[ep 13900] loss=8.835526e-03 | all-ones pred≈27.082 (true=30.0) | max|err|≈10.167 | mean|err|≈0.703 | group means≈[0.36,6.54,14.98,27.08] (true=[0,10,20,30])\n",
            "[ep 13950] loss=3.359013e-02 | all-ones pred≈27.374 (true=30.0) | max|err|≈10.389 | mean|err|≈0.717 | group means≈[0.39,6.65,15.24,27.37] (true=[0,10,20,30])\n",
            "[ep 14000] loss=1.218671e-02 | all-ones pred≈27.057 (true=30.0) | max|err|≈10.153 | mean|err|≈0.723 | group means≈[0.37,6.45,14.94,27.06] (true=[0,10,20,30])\n",
            "[ep 14050] loss=4.741871e-02 | all-ones pred≈27.259 (true=30.0) | max|err|≈10.206 | mean|err|≈0.694 | group means≈[0.35,6.49,15.06,27.26] (true=[0,10,20,30])\n",
            "[ep 14100] loss=1.347480e-02 | all-ones pred≈26.857 (true=30.0) | max|err|≈9.976 | mean|err|≈0.688 | group means≈[0.33,6.33,14.78,26.86] (true=[0,10,20,30])\n",
            "[ep 14150] loss=5.340075e-02 | all-ones pred≈27.108 (true=30.0) | max|err|≈9.957 | mean|err|≈0.684 | group means≈[0.33,6.36,14.86,27.11] (true=[0,10,20,30])\n",
            "[ep 14200] loss=1.511110e-02 | all-ones pred≈26.961 (true=30.0) | max|err|≈10.129 | mean|err|≈0.665 | group means≈[0.30,6.54,14.98,26.96] (true=[0,10,20,30])\n",
            "[ep 14250] loss=5.098680e-02 | all-ones pred≈27.431 (true=30.0) | max|err|≈10.397 | mean|err|≈0.710 | group means≈[0.39,6.79,15.30,27.43] (true=[0,10,20,30])\n",
            "[ep 14300] loss=1.639989e-02 | all-ones pred≈26.990 (true=30.0) | max|err|≈10.053 | mean|err|≈0.667 | group means≈[0.32,6.56,14.96,26.99] (true=[0,10,20,30])\n",
            "[ep 14350] loss=4.784900e-02 | all-ones pred≈27.530 (true=30.0) | max|err|≈10.385 | mean|err|≈0.721 | group means≈[0.40,6.79,15.32,27.53] (true=[0,10,20,30])\n",
            "[ep 14400] loss=1.734704e-02 | all-ones pred≈27.283 (true=30.0) | max|err|≈10.270 | mean|err|≈0.685 | group means≈[0.36,6.75,15.20,27.28] (true=[0,10,20,30])\n",
            "[ep 14450] loss=5.040117e-02 | all-ones pred≈27.320 (true=30.0) | max|err|≈10.084 | mean|err|≈0.699 | group means≈[0.37,6.67,15.16,27.32] (true=[0,10,20,30])\n",
            "[ep 14500] loss=2.848226e-02 | all-ones pred≈27.130 (true=30.0) | max|err|≈10.225 | mean|err|≈0.669 | group means≈[0.35,6.78,15.11,27.13] (true=[0,10,20,30])\n",
            "[ep 14550] loss=3.398495e-02 | all-ones pred≈27.516 (true=30.0) | max|err|≈10.428 | mean|err|≈0.699 | group means≈[0.39,6.96,15.43,27.52] (true=[0,10,20,30])\n",
            "[ep 14600] loss=3.167059e-02 | all-ones pred≈27.391 (true=30.0) | max|err|≈10.439 | mean|err|≈0.709 | group means≈[0.40,6.99,15.43,27.39] (true=[0,10,20,30])\n",
            "[ep 14650] loss=3.914264e-02 | all-ones pred≈27.681 (true=30.0) | max|err|≈10.492 | mean|err|≈0.688 | group means≈[0.39,7.03,15.51,27.68] (true=[0,10,20,30])\n",
            "[ep 14700] loss=2.891165e-02 | all-ones pred≈27.119 (true=30.0) | max|err|≈10.125 | mean|err|≈0.720 | group means≈[0.38,6.71,15.12,27.12] (true=[0,10,20,30])\n",
            "[ep 14750] loss=3.979365e-02 | all-ones pred≈27.642 (true=30.0) | max|err|≈10.415 | mean|err|≈0.712 | group means≈[0.41,7.04,15.54,27.64] (true=[0,10,20,30])\n",
            "[ep 14800] loss=2.435279e-02 | all-ones pred≈26.992 (true=30.0) | max|err|≈9.809 | mean|err|≈0.652 | group means≈[0.30,6.47,14.94,26.99] (true=[0,10,20,30])\n",
            "[ep 14850] loss=3.781449e-02 | all-ones pred≈27.612 (true=30.0) | max|err|≈10.450 | mean|err|≈0.749 | group means≈[0.46,7.12,15.57,27.61] (true=[0,10,20,30])\n",
            "[ep 14900] loss=3.640835e-02 | all-ones pred≈26.983 (true=30.0) | max|err|≈10.011 | mean|err|≈0.692 | group means≈[0.37,6.74,15.08,26.98] (true=[0,10,20,30])\n",
            "[ep 14950] loss=9.313097e-02 | all-ones pred≈27.309 (true=30.0) | max|err|≈9.932 | mean|err|≈0.643 | group means≈[0.31,6.60,15.13,27.31] (true=[0,10,20,30])\n",
            "[ep 15000] loss=6.419477e-02 | all-ones pred≈27.481 (true=30.0) | max|err|≈10.268 | mean|err|≈0.668 | group means≈[0.36,6.92,15.38,27.48] (true=[0,10,20,30])\n",
            "[ep 15050] loss=2.170962e-02 | all-ones pred≈27.069 (true=30.0) | max|err|≈9.806 | mean|err|≈0.643 | group means≈[0.30,6.58,14.93,27.07] (true=[0,10,20,30])\n",
            "[ep 15100] loss=2.523576e-02 | all-ones pred≈26.943 (true=30.0) | max|err|≈9.795 | mean|err|≈0.644 | group means≈[0.30,6.59,14.90,26.94] (true=[0,10,20,30])\n",
            "[ep 15150] loss=2.592463e-02 | all-ones pred≈27.521 (true=30.0) | max|err|≈10.311 | mean|err|≈0.685 | group means≈[0.38,7.00,15.46,27.52] (true=[0,10,20,30])\n",
            "[ep 15200] loss=1.950807e-02 | all-ones pred≈27.239 (true=30.0) | max|err|≈9.972 | mean|err|≈0.645 | group means≈[0.30,6.61,15.17,27.24] (true=[0,10,20,30])\n",
            "[ep 15250] loss=1.354972e-02 | all-ones pred≈27.619 (true=30.0) | max|err|≈10.351 | mean|err|≈0.664 | group means≈[0.36,7.07,15.60,27.62] (true=[0,10,20,30])\n",
            "[ep 15300] loss=3.827466e-02 | all-ones pred≈26.962 (true=30.0) | max|err|≈9.734 | mean|err|≈0.642 | group means≈[0.29,6.57,14.96,26.96] (true=[0,10,20,30])\n",
            "[ep 15350] loss=9.256925e-03 | all-ones pred≈27.471 (true=30.0) | max|err|≈10.064 | mean|err|≈0.652 | group means≈[0.34,6.88,15.36,27.47] (true=[0,10,20,30])\n",
            "[ep 15400] loss=3.899724e-02 | all-ones pred≈27.194 (true=30.0) | max|err|≈9.925 | mean|err|≈0.668 | group means≈[0.35,6.81,15.18,27.19] (true=[0,10,20,30])\n",
            "[ep 15450] loss=1.742542e-02 | all-ones pred≈27.282 (true=30.0) | max|err|≈10.009 | mean|err|≈0.638 | group means≈[0.32,6.82,15.23,27.28] (true=[0,10,20,30])\n",
            "[ep 15500] loss=1.928655e-02 | all-ones pred≈27.162 (true=30.0) | max|err|≈9.899 | mean|err|≈0.651 | group means≈[0.32,6.77,15.12,27.16] (true=[0,10,20,30])\n",
            "[ep 15550] loss=3.117117e-02 | all-ones pred≈27.265 (true=30.0) | max|err|≈9.951 | mean|err|≈0.649 | group means≈[0.33,6.77,15.20,27.26] (true=[0,10,20,30])\n",
            "[ep 15600] loss=1.459396e-02 | all-ones pred≈27.244 (true=30.0) | max|err|≈10.096 | mean|err|≈0.641 | group means≈[0.34,7.09,15.40,27.24] (true=[0,10,20,30])\n",
            "[ep 15650] loss=3.295321e-02 | all-ones pred≈27.569 (true=30.0) | max|err|≈10.210 | mean|err|≈0.645 | group means≈[0.34,7.05,15.51,27.57] (true=[0,10,20,30])\n",
            "[ep 15700] loss=2.132310e-02 | all-ones pred≈27.251 (true=30.0) | max|err|≈10.015 | mean|err|≈0.646 | group means≈[0.33,6.99,15.34,27.25] (true=[0,10,20,30])\n",
            "[ep 15750] loss=2.618795e-02 | all-ones pred≈27.884 (true=30.0) | max|err|≈10.555 | mean|err|≈0.682 | group means≈[0.42,7.47,15.92,27.88] (true=[0,10,20,30])\n",
            "[ep 15800] loss=2.093886e-02 | all-ones pred≈26.913 (true=30.0) | max|err|≈9.647 | mean|err|≈0.627 | group means≈[0.28,6.61,15.01,26.91] (true=[0,10,20,30])\n",
            "[ep 15850] loss=1.773809e-02 | all-ones pred≈27.986 (true=30.0) | max|err|≈10.663 | mean|err|≈0.687 | group means≈[0.43,7.54,16.04,27.99] (true=[0,10,20,30])\n",
            "[ep 15900] loss=2.385018e-02 | all-ones pred≈26.885 (true=30.0) | max|err|≈9.524 | mean|err|≈0.634 | group means≈[0.29,6.48,14.91,26.89] (true=[0,10,20,30])\n",
            "[ep 15950] loss=2.092099e-02 | all-ones pred≈27.503 (true=30.0) | max|err|≈10.032 | mean|err|≈0.642 | group means≈[0.34,7.01,15.44,27.50] (true=[0,10,20,30])\n",
            "[ep 16000] loss=2.466133e-02 | all-ones pred≈27.051 (true=30.0) | max|err|≈9.675 | mean|err|≈0.622 | group means≈[0.27,6.67,15.07,27.05] (true=[0,10,20,30])\n",
            "[ep 16050] loss=1.763136e-02 | all-ones pred≈27.598 (true=30.0) | max|err|≈10.253 | mean|err|≈0.656 | group means≈[0.36,7.11,15.62,27.60] (true=[0,10,20,30])\n",
            "[ep 16100] loss=2.797155e-02 | all-ones pred≈27.123 (true=30.0) | max|err|≈9.858 | mean|err|≈0.641 | group means≈[0.32,6.95,15.26,27.12] (true=[0,10,20,30])\n",
            "[ep 16150] loss=1.997274e-02 | all-ones pred≈27.386 (true=30.0) | max|err|≈9.866 | mean|err|≈0.642 | group means≈[0.32,6.79,15.35,27.39] (true=[0,10,20,30])\n",
            "[ep 16200] loss=3.235998e-02 | all-ones pred≈27.116 (true=30.0) | max|err|≈9.922 | mean|err|≈0.637 | group means≈[0.33,7.09,15.32,27.12] (true=[0,10,20,30])\n",
            "[ep 16250] loss=2.354855e-02 | all-ones pred≈27.264 (true=30.0) | max|err|≈9.709 | mean|err|≈0.610 | group means≈[0.28,6.67,15.23,27.26] (true=[0,10,20,30])\n",
            "[ep 16300] loss=2.810714e-02 | all-ones pred≈27.069 (true=30.0) | max|err|≈9.715 | mean|err|≈0.597 | group means≈[0.26,6.86,15.17,27.07] (true=[0,10,20,30])\n",
            "[ep 16350] loss=2.014850e-02 | all-ones pred≈27.657 (true=30.0) | max|err|≈10.025 | mean|err|≈0.620 | group means≈[0.32,7.05,15.57,27.66] (true=[0,10,20,30])\n",
            "[ep 16400] loss=2.711790e-02 | all-ones pred≈27.156 (true=30.0) | max|err|≈9.699 | mean|err|≈0.620 | group means≈[0.28,6.75,15.23,27.16] (true=[0,10,20,30])\n",
            "[ep 16450] loss=2.113454e-02 | all-ones pred≈27.517 (true=30.0) | max|err|≈9.973 | mean|err|≈0.650 | group means≈[0.35,7.05,15.53,27.52] (true=[0,10,20,30])\n",
            "[ep 16500] loss=2.840534e-02 | all-ones pred≈27.467 (true=30.0) | max|err|≈9.969 | mean|err|≈0.612 | group means≈[0.30,7.04,15.50,27.47] (true=[0,10,20,30])\n",
            "[ep 16550] loss=2.148229e-02 | all-ones pred≈27.651 (true=30.0) | max|err|≈10.143 | mean|err|≈0.649 | group means≈[0.36,7.23,15.69,27.65] (true=[0,10,20,30])\n",
            "[ep 16600] loss=2.979936e-02 | all-ones pred≈27.564 (true=30.0) | max|err|≈10.144 | mean|err|≈0.634 | group means≈[0.34,7.32,15.66,27.56] (true=[0,10,20,30])\n",
            "[ep 16650] loss=2.004493e-02 | all-ones pred≈27.415 (true=30.0) | max|err|≈9.948 | mean|err|≈0.623 | group means≈[0.33,7.13,15.55,27.42] (true=[0,10,20,30])\n",
            "[ep 16700] loss=3.067605e-02 | all-ones pred≈27.062 (true=30.0) | max|err|≈9.744 | mean|err|≈0.628 | group means≈[0.31,7.00,15.29,27.06] (true=[0,10,20,30])\n",
            "[ep 16750] loss=2.697520e-02 | all-ones pred≈27.748 (true=30.0) | max|err|≈10.155 | mean|err|≈0.618 | group means≈[0.33,7.27,15.77,27.75] (true=[0,10,20,30])\n",
            "[ep 16800] loss=2.441598e-02 | all-ones pred≈27.371 (true=30.0) | max|err|≈9.949 | mean|err|≈0.599 | group means≈[0.30,7.13,15.51,27.37] (true=[0,10,20,30])\n",
            "[ep 16850] loss=1.869473e-02 | all-ones pred≈27.485 (true=30.0) | max|err|≈9.977 | mean|err|≈0.639 | group means≈[0.35,7.16,15.59,27.49] (true=[0,10,20,30])\n",
            "[ep 16900] loss=2.615694e-02 | all-ones pred≈26.975 (true=30.0) | max|err|≈9.517 | mean|err|≈0.603 | group means≈[0.27,6.76,15.12,26.97] (true=[0,10,20,30])\n",
            "[ep 16950] loss=2.076594e-02 | all-ones pred≈27.457 (true=30.0) | max|err|≈9.911 | mean|err|≈0.630 | group means≈[0.34,7.17,15.57,27.46] (true=[0,10,20,30])\n",
            "[ep 17000] loss=2.595666e-02 | all-ones pred≈27.339 (true=30.0) | max|err|≈9.935 | mean|err|≈0.618 | group means≈[0.32,7.22,15.53,27.34] (true=[0,10,20,30])\n",
            "[ep 17050] loss=2.049030e-02 | all-ones pred≈27.355 (true=30.0) | max|err|≈9.771 | mean|err|≈0.607 | group means≈[0.30,6.95,15.43,27.36] (true=[0,10,20,30])\n",
            "[ep 17100] loss=3.626475e-02 | all-ones pred≈27.322 (true=30.0) | max|err|≈9.862 | mean|err|≈0.588 | group means≈[0.28,7.10,15.49,27.32] (true=[0,10,20,30])\n",
            "[ep 17150] loss=8.297449e-02 | all-ones pred≈27.682 (true=30.0) | max|err|≈10.092 | mean|err|≈0.618 | group means≈[0.35,7.40,15.77,27.68] (true=[0,10,20,30])\n",
            "[ep 17200] loss=4.634465e-02 | all-ones pred≈27.398 (true=30.0) | max|err|≈9.817 | mean|err|≈0.586 | group means≈[0.28,6.96,15.35,27.40] (true=[0,10,20,30])\n",
            "[ep 17250] loss=2.614790e-02 | all-ones pred≈27.408 (true=30.0) | max|err|≈9.804 | mean|err|≈0.581 | group means≈[0.28,7.12,15.38,27.41] (true=[0,10,20,30])\n",
            "[ep 17300] loss=3.843304e-02 | all-ones pred≈27.697 (true=30.0) | max|err|≈10.225 | mean|err|≈0.597 | group means≈[0.30,7.40,15.76,27.70] (true=[0,10,20,30])\n",
            "[ep 17350] loss=2.524059e-02 | all-ones pred≈27.693 (true=30.0) | max|err|≈10.128 | mean|err|≈0.597 | group means≈[0.33,7.48,15.77,27.69] (true=[0,10,20,30])\n",
            "[ep 17400] loss=3.326806e-02 | all-ones pred≈27.407 (true=30.0) | max|err|≈9.941 | mean|err|≈0.600 | group means≈[0.30,7.30,15.55,27.41] (true=[0,10,20,30])\n",
            "[ep 17450] loss=2.678505e-02 | all-ones pred≈27.680 (true=30.0) | max|err|≈10.074 | mean|err|≈0.593 | group means≈[0.31,7.32,15.74,27.68] (true=[0,10,20,30])\n",
            "[ep 17500] loss=2.942468e-02 | all-ones pred≈27.329 (true=30.0) | max|err|≈9.827 | mean|err|≈0.584 | group means≈[0.27,7.14,15.45,27.33] (true=[0,10,20,30])\n",
            "[ep 17550] loss=2.830466e-02 | all-ones pred≈27.410 (true=30.0) | max|err|≈9.817 | mean|err|≈0.600 | group means≈[0.30,7.06,15.45,27.41] (true=[0,10,20,30])\n",
            "[ep 17600] loss=2.779282e-02 | all-ones pred≈27.040 (true=30.0) | max|err|≈9.538 | mean|err|≈0.594 | group means≈[0.26,6.90,15.19,27.04] (true=[0,10,20,30])\n",
            "[ep 17650] loss=2.752801e-02 | all-ones pred≈27.701 (true=30.0) | max|err|≈10.024 | mean|err|≈0.614 | group means≈[0.33,7.30,15.75,27.70] (true=[0,10,20,30])\n",
            "[ep 17700] loss=2.677077e-02 | all-ones pred≈27.141 (true=30.0) | max|err|≈9.673 | mean|err|≈0.576 | group means≈[0.25,7.01,15.33,27.14] (true=[0,10,20,30])\n",
            "[ep 17750] loss=2.694537e-02 | all-ones pred≈27.633 (true=30.0) | max|err|≈10.122 | mean|err|≈0.616 | group means≈[0.35,7.44,15.80,27.63] (true=[0,10,20,30])\n",
            "[ep 17800] loss=2.549155e-02 | all-ones pred≈27.199 (true=30.0) | max|err|≈9.659 | mean|err|≈0.572 | group means≈[0.25,7.07,15.37,27.20] (true=[0,10,20,30])\n",
            "[ep 17850] loss=2.797052e-02 | all-ones pred≈27.653 (true=30.0) | max|err|≈10.070 | mean|err|≈0.598 | group means≈[0.32,7.38,15.75,27.65] (true=[0,10,20,30])\n",
            "[ep 17900] loss=2.637670e-02 | all-ones pred≈27.447 (true=30.0) | max|err|≈9.847 | mean|err|≈0.588 | group means≈[0.29,7.23,15.57,27.45] (true=[0,10,20,30])\n",
            "[ep 17950] loss=2.574852e-02 | all-ones pred≈27.733 (true=30.0) | max|err|≈9.995 | mean|err|≈0.601 | group means≈[0.32,7.34,15.75,27.73] (true=[0,10,20,30])\n",
            "[ep 18000] loss=2.673406e-02 | all-ones pred≈27.396 (true=30.0) | max|err|≈9.870 | mean|err|≈0.586 | group means≈[0.29,7.33,15.59,27.40] (true=[0,10,20,30])\n",
            "[ep 18050] loss=3.278165e-02 | all-ones pred≈27.748 (true=30.0) | max|err|≈10.167 | mean|err|≈0.610 | group means≈[0.35,7.48,15.85,27.75] (true=[0,10,20,30])\n",
            "[ep 18100] loss=3.534222e-02 | all-ones pred≈27.439 (true=30.0) | max|err|≈9.897 | mean|err|≈0.606 | group means≈[0.30,7.20,15.60,27.44] (true=[0,10,20,30])\n",
            "[ep 18150] loss=1.613121e-02 | all-ones pred≈27.569 (true=30.0) | max|err|≈9.910 | mean|err|≈0.595 | group means≈[0.32,7.29,15.65,27.57] (true=[0,10,20,30])\n",
            "[ep 18200] loss=1.808849e-02 | all-ones pred≈27.067 (true=30.0) | max|err|≈9.445 | mean|err|≈0.555 | group means≈[0.21,6.77,15.21,27.07] (true=[0,10,20,30])\n",
            "[ep 18250] loss=1.846676e-02 | all-ones pred≈27.586 (true=30.0) | max|err|≈9.803 | mean|err|≈0.561 | group means≈[0.26,7.12,15.60,27.59] (true=[0,10,20,30])\n",
            "[ep 18300] loss=2.296214e-02 | all-ones pred≈27.189 (true=30.0) | max|err|≈9.576 | mean|err|≈0.553 | group means≈[0.23,7.01,15.39,27.19] (true=[0,10,20,30])\n",
            "[ep 18350] loss=1.954320e-02 | all-ones pred≈27.414 (true=30.0) | max|err|≈9.675 | mean|err|≈0.572 | group means≈[0.27,7.07,15.51,27.41] (true=[0,10,20,30])\n",
            "[ep 18400] loss=2.656506e-02 | all-ones pred≈27.332 (true=30.0) | max|err|≈9.720 | mean|err|≈0.578 | group means≈[0.28,7.19,15.54,27.33] (true=[0,10,20,30])\n",
            "[ep 18450] loss=1.701116e-02 | all-ones pred≈27.862 (true=30.0) | max|err|≈10.187 | mean|err|≈0.589 | group means≈[0.33,7.62,16.02,27.86] (true=[0,10,20,30])\n",
            "[ep 18500] loss=2.315636e-02 | all-ones pred≈27.483 (true=30.0) | max|err|≈9.922 | mean|err|≈0.594 | group means≈[0.31,7.44,15.74,27.48] (true=[0,10,20,30])\n",
            "[ep 18550] loss=1.514323e-02 | all-ones pred≈27.343 (true=30.0) | max|err|≈9.617 | mean|err|≈0.550 | group means≈[0.25,7.13,15.50,27.34] (true=[0,10,20,30])\n",
            "[ep 18600] loss=2.184349e-02 | all-ones pred≈27.405 (true=30.0) | max|err|≈9.946 | mean|err|≈0.610 | group means≈[0.35,7.64,15.80,27.40] (true=[0,10,20,30])\n",
            "[ep 18650] loss=1.748536e-02 | all-ones pred≈27.696 (true=30.0) | max|err|≈9.973 | mean|err|≈0.586 | group means≈[0.31,7.41,15.85,27.70] (true=[0,10,20,30])\n",
            "[ep 18700] loss=7.948124e-02 | all-ones pred≈27.572 (true=30.0) | max|err|≈9.925 | mean|err|≈0.573 | group means≈[0.28,7.43,15.75,27.57] (true=[0,10,20,30])\n",
            "[ep 18750] loss=1.882012e-01 | all-ones pred≈27.617 (true=30.0) | max|err|≈9.937 | mean|err|≈0.586 | group means≈[0.32,7.48,15.78,27.62] (true=[0,10,20,30])\n",
            "[ep 18800] loss=6.773254e-02 | all-ones pred≈27.423 (true=30.0) | max|err|≈9.659 | mean|err|≈0.580 | group means≈[0.28,7.03,15.47,27.42] (true=[0,10,20,30])\n",
            "[ep 18850] loss=9.033362e-03 | all-ones pred≈27.260 (true=30.0) | max|err|≈9.724 | mean|err|≈0.561 | group means≈[0.25,7.21,15.47,27.26] (true=[0,10,20,30])\n",
            "[ep 18900] loss=1.223636e-02 | all-ones pred≈27.526 (true=30.0) | max|err|≈9.834 | mean|err|≈0.570 | group means≈[0.28,7.25,15.67,27.53] (true=[0,10,20,30])\n",
            "[ep 18950] loss=1.187749e-02 | all-ones pred≈27.185 (true=30.0) | max|err|≈9.707 | mean|err|≈0.559 | group means≈[0.25,7.26,15.54,27.18] (true=[0,10,20,30])\n",
            "[ep 19000] loss=2.467837e-02 | all-ones pred≈27.364 (true=30.0) | max|err|≈9.765 | mean|err|≈0.566 | group means≈[0.28,7.21,15.58,27.36] (true=[0,10,20,30])\n",
            "[ep 19050] loss=1.033946e-02 | all-ones pred≈27.397 (true=30.0) | max|err|≈9.860 | mean|err|≈0.588 | group means≈[0.30,7.45,15.73,27.40] (true=[0,10,20,30])\n",
            "[ep 19100] loss=2.815418e-02 | all-ones pred≈27.804 (true=30.0) | max|err|≈10.029 | mean|err|≈0.565 | group means≈[0.30,7.50,15.92,27.80] (true=[0,10,20,30])\n",
            "[ep 19150] loss=3.530281e-03 | all-ones pred≈27.271 (true=30.0) | max|err|≈9.684 | mean|err|≈0.564 | group means≈[0.26,7.31,15.54,27.27] (true=[0,10,20,30])\n",
            "[ep 19200] loss=1.830443e-02 | all-ones pred≈27.594 (true=30.0) | max|err|≈9.955 | mean|err|≈0.568 | group means≈[0.30,7.50,15.85,27.59] (true=[0,10,20,30])\n",
            "[ep 19250] loss=1.156519e-02 | all-ones pred≈27.515 (true=30.0) | max|err|≈10.057 | mean|err|≈0.593 | group means≈[0.32,7.63,15.89,27.51] (true=[0,10,20,30])\n",
            "[ep 19300] loss=4.710197e-02 | all-ones pred≈27.874 (true=30.0) | max|err|≈10.053 | mean|err|≈0.570 | group means≈[0.30,7.45,15.97,27.87] (true=[0,10,20,30])\n",
            "[ep 19350] loss=1.485129e-02 | all-ones pred≈27.346 (true=30.0) | max|err|≈9.815 | mean|err|≈0.587 | group means≈[0.30,7.40,15.68,27.35] (true=[0,10,20,30])\n",
            "[ep 19400] loss=2.739257e-02 | all-ones pred≈27.545 (true=30.0) | max|err|≈9.788 | mean|err|≈0.565 | group means≈[0.28,7.28,15.69,27.54] (true=[0,10,20,30])\n",
            "[ep 19450] loss=1.795408e-02 | all-ones pred≈27.493 (true=30.0) | max|err|≈9.835 | mean|err|≈0.579 | group means≈[0.29,7.46,15.79,27.49] (true=[0,10,20,30])\n",
            "[ep 19500] loss=2.654830e-02 | all-ones pred≈27.549 (true=30.0) | max|err|≈9.823 | mean|err|≈0.577 | group means≈[0.30,7.30,15.72,27.55] (true=[0,10,20,30])\n",
            "[ep 19550] loss=2.348648e-02 | all-ones pred≈27.370 (true=30.0) | max|err|≈9.677 | mean|err|≈0.570 | group means≈[0.27,7.24,15.62,27.37] (true=[0,10,20,30])\n",
            "[ep 19600] loss=2.111304e-02 | all-ones pred≈27.661 (true=30.0) | max|err|≈9.951 | mean|err|≈0.575 | group means≈[0.31,7.48,15.92,27.66] (true=[0,10,20,30])\n",
            "[ep 19650] loss=1.685310e-02 | all-ones pred≈27.300 (true=30.0) | max|err|≈9.560 | mean|err|≈0.555 | group means≈[0.24,7.11,15.57,27.30] (true=[0,10,20,30])\n",
            "[ep 19700] loss=3.182781e-02 | all-ones pred≈27.940 (true=30.0) | max|err|≈10.286 | mean|err|≈0.593 | group means≈[0.36,7.91,16.25,27.94] (true=[0,10,20,30])\n",
            "[ep 19750] loss=2.030933e-02 | all-ones pred≈27.322 (true=30.0) | max|err|≈9.655 | mean|err|≈0.571 | group means≈[0.26,7.17,15.60,27.32] (true=[0,10,20,30])\n",
            "[ep 19800] loss=3.425587e-02 | all-ones pred≈27.395 (true=30.0) | max|err|≈9.610 | mean|err|≈0.554 | group means≈[0.26,7.20,15.66,27.39] (true=[0,10,20,30])\n",
            "[ep 19850] loss=2.516803e-02 | all-ones pred≈27.239 (true=30.0) | max|err|≈9.627 | mean|err|≈0.547 | group means≈[0.24,7.26,15.58,27.24] (true=[0,10,20,30])\n",
            "[ep 19900] loss=2.678237e-02 | all-ones pred≈27.452 (true=30.0) | max|err|≈9.697 | mean|err|≈0.575 | group means≈[0.29,7.30,15.70,27.45] (true=[0,10,20,30])\n",
            "[ep 19950] loss=3.313535e-02 | all-ones pred≈27.475 (true=30.0) | max|err|≈9.769 | mean|err|≈0.561 | group means≈[0.27,7.40,15.75,27.48] (true=[0,10,20,30])\n",
            "[ep 20000] loss=2.408947e-02 | all-ones pred≈27.939 (true=30.0) | max|err|≈10.216 | mean|err|≈0.562 | group means≈[0.31,7.76,16.21,27.94] (true=[0,10,20,30])\n",
            "[ep 20050] loss=2.491980e-02 | all-ones pred≈27.307 (true=30.0) | max|err|≈9.674 | mean|err|≈0.568 | group means≈[0.28,7.35,15.67,27.31] (true=[0,10,20,30])\n",
            "[ep 20100] loss=3.097689e-02 | all-ones pred≈27.596 (true=30.0) | max|err|≈9.780 | mean|err|≈0.563 | group means≈[0.30,7.47,15.85,27.60] (true=[0,10,20,30])\n",
            "[ep 20150] loss=4.005270e-02 | all-ones pred≈27.181 (true=30.0) | max|err|≈9.409 | mean|err|≈0.547 | group means≈[0.23,7.08,15.43,27.18] (true=[0,10,20,30])\n",
            "[ep 20200] loss=4.412780e-02 | all-ones pred≈27.964 (true=30.0) | max|err|≈10.125 | mean|err|≈0.565 | group means≈[0.31,7.66,16.16,27.96] (true=[0,10,20,30])\n",
            "[ep 20250] loss=1.894188e-02 | all-ones pred≈27.380 (true=30.0) | max|err|≈9.776 | mean|err|≈0.556 | group means≈[0.28,7.59,15.82,27.38] (true=[0,10,20,30])\n",
            "[ep 20300] loss=2.282785e-02 | all-ones pred≈27.766 (true=30.0) | max|err|≈10.008 | mean|err|≈0.555 | group means≈[0.29,7.59,16.00,27.77] (true=[0,10,20,30])\n",
            "[ep 20350] loss=2.498343e-02 | all-ones pred≈27.162 (true=30.0) | max|err|≈9.498 | mean|err|≈0.551 | group means≈[0.25,7.24,15.57,27.16] (true=[0,10,20,30])\n",
            "[ep 20400] loss=4.922610e-02 | all-ones pred≈27.789 (true=30.0) | max|err|≈9.983 | mean|err|≈0.568 | group means≈[0.30,7.51,16.04,27.79] (true=[0,10,20,30])\n",
            "[ep 20450] loss=1.795493e-02 | all-ones pred≈27.491 (true=30.0) | max|err|≈9.833 | mean|err|≈0.543 | group means≈[0.27,7.51,15.88,27.49] (true=[0,10,20,30])\n",
            "[ep 20500] loss=4.017224e-02 | all-ones pred≈27.665 (true=30.0) | max|err|≈9.694 | mean|err|≈0.558 | group means≈[0.26,7.13,15.67,27.67] (true=[0,10,20,30])\n",
            "[ep 20550] loss=4.494083e-03 | all-ones pred≈27.022 (true=30.0) | max|err|≈9.313 | mean|err|≈0.532 | group means≈[0.19,6.95,15.27,27.02] (true=[0,10,20,30])\n",
            "[ep 20600] loss=2.574611e-02 | all-ones pred≈27.701 (true=30.0) | max|err|≈10.015 | mean|err|≈0.551 | group means≈[0.29,7.58,15.97,27.70] (true=[0,10,20,30])\n",
            "[ep 20650] loss=5.193407e-03 | all-ones pred≈27.304 (true=30.0) | max|err|≈9.772 | mean|err|≈0.544 | group means≈[0.26,7.47,15.74,27.30] (true=[0,10,20,30])\n",
            "[ep 20700] loss=2.194671e-02 | all-ones pred≈27.616 (true=30.0) | max|err|≈9.899 | mean|err|≈0.553 | group means≈[0.29,7.52,15.93,27.62] (true=[0,10,20,30])\n",
            "[ep 20750] loss=1.663463e-02 | all-ones pred≈26.920 (true=30.0) | max|err|≈9.263 | mean|err|≈0.535 | group means≈[0.21,6.94,15.23,26.92] (true=[0,10,20,30])\n",
            "[ep 20800] loss=3.601080e-02 | all-ones pred≈27.663 (true=30.0) | max|err|≈9.902 | mean|err|≈0.569 | group means≈[0.31,7.58,15.99,27.66] (true=[0,10,20,30])\n",
            "[ep 20850] loss=8.612958e-03 | all-ones pred≈27.055 (true=30.0) | max|err|≈9.401 | mean|err|≈0.537 | group means≈[0.23,7.19,15.43,27.06] (true=[0,10,20,30])\n",
            "[ep 20900] loss=1.931884e-02 | all-ones pred≈27.890 (true=30.0) | max|err|≈10.070 | mean|err|≈0.556 | group means≈[0.29,7.56,16.09,27.89] (true=[0,10,20,30])\n",
            "[ep 20950] loss=1.427693e-02 | all-ones pred≈27.384 (true=30.0) | max|err|≈9.742 | mean|err|≈0.538 | group means≈[0.25,7.39,15.75,27.38] (true=[0,10,20,30])\n",
            "[ep 21000] loss=2.399997e-02 | all-ones pred≈27.674 (true=30.0) | max|err|≈9.903 | mean|err|≈0.562 | group means≈[0.30,7.58,15.98,27.67] (true=[0,10,20,30])\n",
            "[ep 21050] loss=1.683859e-02 | all-ones pred≈27.225 (true=30.0) | max|err|≈9.499 | mean|err|≈0.538 | group means≈[0.23,7.11,15.53,27.22] (true=[0,10,20,30])\n",
            "[ep 21100] loss=2.270669e-02 | all-ones pred≈27.477 (true=30.0) | max|err|≈9.651 | mean|err|≈0.551 | group means≈[0.26,7.22,15.73,27.48] (true=[0,10,20,30])\n",
            "[ep 21150] loss=1.963917e-02 | all-ones pred≈27.215 (true=30.0) | max|err|≈9.413 | mean|err|≈0.533 | group means≈[0.22,7.13,15.53,27.21] (true=[0,10,20,30])\n",
            "[ep 21200] loss=2.768929e-02 | all-ones pred≈27.806 (true=30.0) | max|err|≈10.154 | mean|err|≈0.571 | group means≈[0.33,7.86,16.20,27.81] (true=[0,10,20,30])\n",
            "[ep 21250] loss=2.010110e-02 | all-ones pred≈27.221 (true=30.0) | max|err|≈9.534 | mean|err|≈0.546 | group means≈[0.24,7.23,15.63,27.22] (true=[0,10,20,30])\n",
            "[ep 21300] loss=2.774842e-02 | all-ones pred≈27.578 (true=30.0) | max|err|≈9.731 | mean|err|≈0.547 | group means≈[0.27,7.38,15.81,27.58] (true=[0,10,20,30])\n",
            "[ep 21350] loss=4.682155e-02 | all-ones pred≈27.222 (true=30.0) | max|err|≈9.589 | mean|err|≈0.552 | group means≈[0.25,7.34,15.61,27.22] (true=[0,10,20,30])\n",
            "[ep 21400] loss=1.121159e-01 | all-ones pred≈27.639 (true=30.0) | max|err|≈9.845 | mean|err|≈0.549 | group means≈[0.24,7.48,15.88,27.64] (true=[0,10,20,30])\n",
            "[ep 21450] loss=4.332842e-02 | all-ones pred≈27.499 (true=30.0) | max|err|≈9.739 | mean|err|≈0.557 | group means≈[0.22,7.40,15.72,27.50] (true=[0,10,20,30])\n",
            "[ep 21500] loss=7.735027e-03 | all-ones pred≈27.845 (true=30.0) | max|err|≈10.115 | mean|err|≈0.542 | group means≈[0.29,7.73,16.12,27.85] (true=[0,10,20,30])\n",
            "[ep 21550] loss=2.230807e-02 | all-ones pred≈27.879 (true=30.0) | max|err|≈10.255 | mean|err|≈0.554 | group means≈[0.31,7.86,16.21,27.88] (true=[0,10,20,30])\n",
            "[ep 21600] loss=1.352052e-02 | all-ones pred≈27.516 (true=30.0) | max|err|≈9.828 | mean|err|≈0.563 | group means≈[0.30,7.52,15.88,27.52] (true=[0,10,20,30])\n",
            "[ep 21650] loss=2.006383e-02 | all-ones pred≈27.196 (true=30.0) | max|err|≈9.623 | mean|err|≈0.531 | group means≈[0.25,7.38,15.69,27.20] (true=[0,10,20,30])\n",
            "[ep 21700] loss=1.430307e-02 | all-ones pred≈27.665 (true=30.0) | max|err|≈9.927 | mean|err|≈0.564 | group means≈[0.31,7.64,16.03,27.67] (true=[0,10,20,30])\n",
            "[ep 21750] loss=1.794206e-02 | all-ones pred≈27.350 (true=30.0) | max|err|≈9.685 | mean|err|≈0.550 | group means≈[0.27,7.45,15.74,27.35] (true=[0,10,20,30])\n",
            "[ep 21800] loss=1.439433e-02 | all-ones pred≈27.670 (true=30.0) | max|err|≈9.885 | mean|err|≈0.554 | group means≈[0.29,7.59,16.01,27.67] (true=[0,10,20,30])\n",
            "[ep 21850] loss=1.575977e-02 | all-ones pred≈27.185 (true=30.0) | max|err|≈9.525 | mean|err|≈0.542 | group means≈[0.25,7.32,15.61,27.19] (true=[0,10,20,30])\n",
            "[ep 21900] loss=2.424518e-02 | all-ones pred≈27.502 (true=30.0) | max|err|≈9.675 | mean|err|≈0.536 | group means≈[0.26,7.37,15.79,27.50] (true=[0,10,20,30])\n",
            "[ep 21950] loss=2.162636e-02 | all-ones pred≈27.426 (true=30.0) | max|err|≈9.779 | mean|err|≈0.574 | group means≈[0.30,7.48,15.80,27.43] (true=[0,10,20,30])\n",
            "[ep 22000] loss=1.502728e-02 | all-ones pred≈27.783 (true=30.0) | max|err|≈9.995 | mean|err|≈0.571 | group means≈[0.32,7.70,16.14,27.78] (true=[0,10,20,30])\n",
            "[ep 22050] loss=1.812568e-02 | all-ones pred≈27.098 (true=30.0) | max|err|≈9.433 | mean|err|≈0.531 | group means≈[0.24,7.30,15.53,27.10] (true=[0,10,20,30])\n",
            "[ep 22100] loss=1.470331e-02 | all-ones pred≈27.730 (true=30.0) | max|err|≈9.952 | mean|err|≈0.553 | group means≈[0.30,7.71,16.09,27.73] (true=[0,10,20,30])\n",
            "[ep 22150] loss=2.106113e-02 | all-ones pred≈27.297 (true=30.0) | max|err|≈9.568 | mean|err|≈0.542 | group means≈[0.25,7.21,15.66,27.30] (true=[0,10,20,30])\n",
            "[ep 22200] loss=1.958035e-02 | all-ones pred≈27.518 (true=30.0) | max|err|≈9.713 | mean|err|≈0.521 | group means≈[0.25,7.43,15.89,27.52] (true=[0,10,20,30])\n",
            "[ep 22250] loss=9.200689e-02 | all-ones pred≈27.640 (true=30.0) | max|err|≈9.868 | mean|err|≈0.541 | group means≈[0.28,7.61,15.95,27.64] (true=[0,10,20,30])\n",
            "[ep 22300] loss=8.750658e-02 | all-ones pred≈27.313 (true=30.0) | max|err|≈9.409 | mean|err|≈0.643 | group means≈[0.36,7.16,15.52,27.31] (true=[0,10,20,30])\n",
            "[ep 22350] loss=9.719525e-03 | all-ones pred≈27.934 (true=30.0) | max|err|≈10.228 | mean|err|≈0.551 | group means≈[0.32,7.91,16.22,27.93] (true=[0,10,20,30])\n",
            "[ep 22400] loss=1.473767e-02 | all-ones pred≈27.184 (true=30.0) | max|err|≈9.530 | mean|err|≈0.530 | group means≈[0.24,7.25,15.53,27.18] (true=[0,10,20,30])\n",
            "[ep 22450] loss=2.333258e-02 | all-ones pred≈27.904 (true=30.0) | max|err|≈10.198 | mean|err|≈0.554 | group means≈[0.31,7.86,16.25,27.90] (true=[0,10,20,30])\n",
            "[ep 22500] loss=2.546044e-02 | all-ones pred≈27.452 (true=30.0) | max|err|≈9.676 | mean|err|≈0.544 | group means≈[0.26,7.36,15.80,27.45] (true=[0,10,20,30])\n",
            "[ep 22550] loss=1.310558e-02 | all-ones pred≈27.474 (true=30.0) | max|err|≈9.689 | mean|err|≈0.555 | group means≈[0.27,7.28,15.73,27.47] (true=[0,10,20,30])\n",
            "[ep 22600] loss=2.356826e-02 | all-ones pred≈27.249 (true=30.0) | max|err|≈9.593 | mean|err|≈0.527 | group means≈[0.24,7.34,15.65,27.25] (true=[0,10,20,30])\n",
            "[ep 22650] loss=1.017169e-02 | all-ones pred≈27.868 (true=30.0) | max|err|≈10.152 | mean|err|≈0.565 | group means≈[0.33,7.85,16.18,27.87] (true=[0,10,20,30])\n",
            "[ep 22700] loss=1.741524e-02 | all-ones pred≈27.291 (true=30.0) | max|err|≈9.511 | mean|err|≈0.515 | group means≈[0.22,7.22,15.60,27.29] (true=[0,10,20,30])\n",
            "[ep 22750] loss=2.378010e-02 | all-ones pred≈27.767 (true=30.0) | max|err|≈9.919 | mean|err|≈0.555 | group means≈[0.28,7.49,16.03,27.77] (true=[0,10,20,30])\n",
            "[ep 22800] loss=1.750249e-02 | all-ones pred≈27.529 (true=30.0) | max|err|≈9.830 | mean|err|≈0.561 | group means≈[0.29,7.54,15.92,27.53] (true=[0,10,20,30])\n",
            "[ep 22850] loss=1.147992e-02 | all-ones pred≈27.863 (true=30.0) | max|err|≈10.103 | mean|err|≈0.571 | group means≈[0.33,7.82,16.21,27.86] (true=[0,10,20,30])\n",
            "[ep 22900] loss=1.881012e-02 | all-ones pred≈27.187 (true=30.0) | max|err|≈9.563 | mean|err|≈0.529 | group means≈[0.25,7.39,15.68,27.19] (true=[0,10,20,30])\n",
            "[ep 22950] loss=1.352261e-02 | all-ones pred≈27.430 (true=30.0) | max|err|≈9.656 | mean|err|≈0.515 | group means≈[0.24,7.37,15.79,27.43] (true=[0,10,20,30])\n",
            "[ep 23000] loss=2.040882e-02 | all-ones pred≈27.290 (true=30.0) | max|err|≈9.614 | mean|err|≈0.541 | group means≈[0.26,7.47,15.77,27.29] (true=[0,10,20,30])\n",
            "[ep 23050] loss=1.686936e-02 | all-ones pred≈27.724 (true=30.0) | max|err|≈9.837 | mean|err|≈0.538 | group means≈[0.27,7.53,16.00,27.72] (true=[0,10,20,30])\n",
            "[ep 23100] loss=2.015194e-02 | all-ones pred≈27.219 (true=30.0) | max|err|≈9.422 | mean|err|≈0.521 | group means≈[0.22,7.22,15.61,27.22] (true=[0,10,20,30])\n",
            "[ep 23150] loss=1.478627e-02 | all-ones pred≈27.652 (true=30.0) | max|err|≈9.902 | mean|err|≈0.535 | group means≈[0.28,7.67,16.10,27.65] (true=[0,10,20,30])\n",
            "[ep 23200] loss=1.834179e-02 | all-ones pred≈27.197 (true=30.0) | max|err|≈9.558 | mean|err|≈0.529 | group means≈[0.24,7.38,15.69,27.20] (true=[0,10,20,30])\n",
            "[ep 23250] loss=1.390096e-02 | all-ones pred≈27.793 (true=30.0) | max|err|≈9.982 | mean|err|≈0.564 | group means≈[0.31,7.70,16.15,27.79] (true=[0,10,20,30])\n",
            "[ep 23300] loss=1.986043e-02 | all-ones pred≈26.958 (true=30.0) | max|err|≈9.249 | mean|err|≈0.528 | group means≈[0.22,7.06,15.39,26.96] (true=[0,10,20,30])\n",
            "[ep 23350] loss=1.697087e-02 | all-ones pred≈27.792 (true=30.0) | max|err|≈9.941 | mean|err|≈0.546 | group means≈[0.29,7.71,16.16,27.79] (true=[0,10,20,30])\n",
            "[ep 23400] loss=2.151668e-02 | all-ones pred≈27.615 (true=30.0) | max|err|≈9.930 | mean|err|≈0.543 | group means≈[0.29,7.74,16.08,27.61] (true=[0,10,20,30])\n",
            "[ep 23450] loss=1.830284e-02 | all-ones pred≈27.409 (true=30.0) | max|err|≈9.486 | mean|err|≈0.527 | group means≈[0.23,7.16,15.65,27.41] (true=[0,10,20,30])\n",
            "[ep 23500] loss=2.104903e-02 | all-ones pred≈27.285 (true=30.0) | max|err|≈9.475 | mean|err|≈0.534 | group means≈[0.24,7.27,15.69,27.29] (true=[0,10,20,30])\n",
            "[ep 23550] loss=1.665043e-02 | all-ones pred≈27.470 (true=30.0) | max|err|≈9.575 | mean|err|≈0.531 | group means≈[0.25,7.30,15.81,27.47] (true=[0,10,20,30])\n",
            "[ep 23600] loss=2.292135e-02 | all-ones pred≈27.587 (true=30.0) | max|err|≈9.860 | mean|err|≈0.536 | group means≈[0.27,7.65,15.99,27.59] (true=[0,10,20,30])\n",
            "[ep 23650] loss=1.796260e-02 | all-ones pred≈27.614 (true=30.0) | max|err|≈9.712 | mean|err|≈0.549 | group means≈[0.28,7.50,16.00,27.61] (true=[0,10,20,30])\n",
            "[ep 23700] loss=2.404175e-02 | all-ones pred≈27.144 (true=30.0) | max|err|≈9.434 | mean|err|≈0.526 | group means≈[0.23,7.26,15.63,27.14] (true=[0,10,20,30])\n",
            "[ep 23750] loss=1.955628e-02 | all-ones pred≈27.657 (true=30.0) | max|err|≈9.889 | mean|err|≈0.531 | group means≈[0.28,7.68,16.06,27.66] (true=[0,10,20,30])\n",
            "[ep 23800] loss=3.405055e-02 | all-ones pred≈27.592 (true=30.0) | max|err|≈9.826 | mean|err|≈0.552 | group means≈[0.29,7.57,15.95,27.59] (true=[0,10,20,30])\n",
            "[ep 23850] loss=5.310247e-02 | all-ones pred≈27.958 (true=30.0) | max|err|≈10.153 | mean|err|≈0.528 | group means≈[0.30,8.02,16.35,27.96] (true=[0,10,20,30])\n",
            "[ep 23900] loss=7.627548e-02 | all-ones pred≈27.572 (true=30.0) | max|err|≈9.733 | mean|err|≈0.517 | group means≈[0.24,7.35,15.80,27.57] (true=[0,10,20,30])\n",
            "[ep 23950] loss=1.670091e-02 | all-ones pred≈27.560 (true=30.0) | max|err|≈9.958 | mean|err|≈0.540 | group means≈[0.28,7.69,15.99,27.56] (true=[0,10,20,30])\n",
            "[ep 24000] loss=2.717429e-02 | all-ones pred≈28.036 (true=30.0) | max|err|≈10.373 | mean|err|≈0.559 | group means≈[0.33,8.06,16.44,28.04] (true=[0,10,20,30])\n",
            "[ep 24050] loss=1.180422e-02 | all-ones pred≈27.205 (true=30.0) | max|err|≈9.599 | mean|err|≈0.541 | group means≈[0.25,7.31,15.66,27.20] (true=[0,10,20,30])\n",
            "[ep 24100] loss=1.125356e-02 | all-ones pred≈27.723 (true=30.0) | max|err|≈9.959 | mean|err|≈0.517 | group means≈[0.26,7.64,16.07,27.72] (true=[0,10,20,30])\n",
            "[ep 24150] loss=3.394415e-02 | all-ones pred≈27.220 (true=30.0) | max|err|≈9.540 | mean|err|≈0.527 | group means≈[0.23,7.33,15.67,27.22] (true=[0,10,20,30])\n",
            "[ep 24200] loss=6.214366e-03 | all-ones pred≈27.460 (true=30.0) | max|err|≈9.723 | mean|err|≈0.520 | group means≈[0.24,7.44,15.88,27.46] (true=[0,10,20,30])\n",
            "[ep 24250] loss=2.823335e-02 | all-ones pred≈26.886 (true=30.0) | max|err|≈9.235 | mean|err|≈0.515 | group means≈[0.19,6.98,15.33,26.89] (true=[0,10,20,30])\n",
            "[ep 24300] loss=1.609339e-02 | all-ones pred≈27.701 (true=30.0) | max|err|≈9.889 | mean|err|≈0.526 | group means≈[0.26,7.59,16.01,27.70] (true=[0,10,20,30])\n",
            "[ep 24350] loss=2.929362e-02 | all-ones pred≈27.389 (true=30.0) | max|err|≈9.764 | mean|err|≈0.527 | group means≈[0.25,7.56,15.92,27.39] (true=[0,10,20,30])\n",
            "[ep 24400] loss=3.230035e-02 | all-ones pred≈27.424 (true=30.0) | max|err|≈9.539 | mean|err|≈0.525 | group means≈[0.24,7.25,15.75,27.42] (true=[0,10,20,30])\n",
            "[ep 24450] loss=1.534966e-02 | all-ones pred≈27.440 (true=30.0) | max|err|≈9.751 | mean|err|≈0.519 | group means≈[0.25,7.64,15.91,27.44] (true=[0,10,20,30])\n",
            "[ep 24500] loss=1.615743e-02 | all-ones pred≈27.772 (true=30.0) | max|err|≈9.932 | mean|err|≈0.522 | group means≈[0.27,7.67,16.09,27.77] (true=[0,10,20,30])\n",
            "[ep 24550] loss=2.468039e-02 | all-ones pred≈27.180 (true=30.0) | max|err|≈9.427 | mean|err|≈0.517 | group means≈[0.22,7.24,15.54,27.18] (true=[0,10,20,30])\n",
            "[ep 24600] loss=1.609613e-02 | all-ones pred≈27.289 (true=30.0) | max|err|≈9.481 | mean|err|≈0.536 | group means≈[0.25,7.27,15.65,27.29] (true=[0,10,20,30])\n",
            "[ep 24650] loss=1.331334e-02 | all-ones pred≈27.356 (true=30.0) | max|err|≈9.645 | mean|err|≈0.529 | group means≈[0.26,7.55,15.84,27.36] (true=[0,10,20,30])\n",
            "[ep 24700] loss=9.178156e-03 | all-ones pred≈27.270 (true=30.0) | max|err|≈9.419 | mean|err|≈0.527 | group means≈[0.23,7.17,15.64,27.27] (true=[0,10,20,30])\n",
            "[ep 24750] loss=1.686979e-02 | all-ones pred≈27.343 (true=30.0) | max|err|≈9.604 | mean|err|≈0.524 | group means≈[0.24,7.43,15.81,27.34] (true=[0,10,20,30])\n",
            "[ep 24800] loss=1.229763e-02 | all-ones pred≈27.519 (true=30.0) | max|err|≈9.745 | mean|err|≈0.533 | group means≈[0.28,7.61,15.97,27.52] (true=[0,10,20,30])\n",
            "[ep 24850] loss=2.041658e-02 | all-ones pred≈27.136 (true=30.0) | max|err|≈9.462 | mean|err|≈0.523 | group means≈[0.24,7.40,15.65,27.14] (true=[0,10,20,30])\n",
            "[ep 24900] loss=1.680644e-02 | all-ones pred≈27.820 (true=30.0) | max|err|≈10.009 | mean|err|≈0.551 | group means≈[0.30,7.70,16.25,27.82] (true=[0,10,20,30])\n",
            "[ep 24950] loss=1.898764e-02 | all-ones pred≈27.632 (true=30.0) | max|err|≈9.906 | mean|err|≈0.522 | group means≈[0.27,7.84,16.11,27.63] (true=[0,10,20,30])\n",
            "[ep 25000] loss=1.190150e-02 | all-ones pred≈27.432 (true=30.0) | max|err|≈9.543 | mean|err|≈0.516 | group means≈[0.23,7.31,15.80,27.43] (true=[0,10,20,30])\n",
            "[ep 25050] loss=2.121150e-02 | all-ones pred≈27.385 (true=30.0) | max|err|≈9.618 | mean|err|≈0.531 | group means≈[0.25,7.42,15.82,27.38] (true=[0,10,20,30])\n",
            "[ep 25100] loss=1.577175e-02 | all-ones pred≈28.001 (true=30.0) | max|err|≈10.217 | mean|err|≈0.554 | group means≈[0.33,8.02,16.43,28.00] (true=[0,10,20,30])\n",
            "[ep 25150] loss=2.080169e-02 | all-ones pred≈27.194 (true=30.0) | max|err|≈9.448 | mean|err|≈0.520 | group means≈[0.22,7.26,15.65,27.19] (true=[0,10,20,30])\n",
            "[ep 25200] loss=1.257842e-02 | all-ones pred≈27.365 (true=30.0) | max|err|≈9.591 | mean|err|≈0.533 | group means≈[0.27,7.49,15.85,27.37] (true=[0,10,20,30])\n",
            "[ep 25250] loss=1.914604e-02 | all-ones pred≈27.307 (true=30.0) | max|err|≈9.565 | mean|err|≈0.529 | group means≈[0.25,7.48,15.82,27.31] (true=[0,10,20,30])\n",
            "[ep 25300] loss=1.612700e-02 | all-ones pred≈27.296 (true=30.0) | max|err|≈9.493 | mean|err|≈0.537 | group means≈[0.25,7.28,15.76,27.30] (true=[0,10,20,30])\n",
            "[ep 25350] loss=2.001353e-02 | all-ones pred≈27.341 (true=30.0) | max|err|≈9.626 | mean|err|≈0.529 | group means≈[0.26,7.57,15.91,27.34] (true=[0,10,20,30])\n",
            "[ep 25400] loss=1.641075e-02 | all-ones pred≈27.505 (true=30.0) | max|err|≈9.712 | mean|err|≈0.533 | group means≈[0.27,7.52,15.94,27.51] (true=[0,10,20,30])\n",
            "[ep 25450] loss=2.080035e-02 | all-ones pred≈27.163 (true=30.0) | max|err|≈9.465 | mean|err|≈0.516 | group means≈[0.23,7.41,15.71,27.16] (true=[0,10,20,30])\n",
            "[ep 25500] loss=1.561028e-02 | all-ones pred≈27.652 (true=30.0) | max|err|≈9.876 | mean|err|≈0.551 | group means≈[0.30,7.71,16.14,27.65] (true=[0,10,20,30])\n",
            "[ep 25550] loss=2.204750e-02 | all-ones pred≈27.246 (true=30.0) | max|err|≈9.497 | mean|err|≈0.517 | group means≈[0.23,7.42,15.76,27.25] (true=[0,10,20,30])\n",
            "[ep 25600] loss=1.984316e-02 | all-ones pred≈27.648 (true=30.0) | max|err|≈9.884 | mean|err|≈0.527 | group means≈[0.28,7.74,16.10,27.65] (true=[0,10,20,30])\n",
            "[ep 25650] loss=2.468818e-01 | all-ones pred≈27.726 (true=30.0) | max|err|≈9.935 | mean|err|≈0.582 | group means≈[0.34,7.77,16.15,27.73] (true=[0,10,20,30])\n",
            "[ep 25700] loss=2.653297e-02 | all-ones pred≈27.653 (true=30.0) | max|err|≈9.785 | mean|err|≈0.540 | group means≈[0.22,7.58,16.02,27.65] (true=[0,10,20,30])\n",
            "[ep 25750] loss=4.703528e-02 | all-ones pred≈27.235 (true=30.0) | max|err|≈9.454 | mean|err|≈0.529 | group means≈[0.23,7.22,15.59,27.24] (true=[0,10,20,30])\n",
            "[ep 25800] loss=5.576957e-03 | all-ones pred≈27.559 (true=30.0) | max|err|≈9.874 | mean|err|≈0.522 | group means≈[0.27,7.67,16.04,27.56] (true=[0,10,20,30])\n",
            "[ep 25850] loss=2.318895e-02 | all-ones pred≈27.655 (true=30.0) | max|err|≈9.987 | mean|err|≈0.536 | group means≈[0.29,7.90,16.17,27.66] (true=[0,10,20,30])\n",
            "[ep 25900] loss=1.376886e-02 | all-ones pred≈27.324 (true=30.0) | max|err|≈9.545 | mean|err|≈0.514 | group means≈[0.23,7.29,15.75,27.32] (true=[0,10,20,30])\n",
            "[ep 25950] loss=1.594233e-02 | all-ones pred≈27.169 (true=30.0) | max|err|≈9.371 | mean|err|≈0.511 | group means≈[0.22,7.27,15.57,27.17] (true=[0,10,20,30])\n",
            "[ep 26000] loss=1.259187e-02 | all-ones pred≈27.809 (true=30.0) | max|err|≈9.954 | mean|err|≈0.547 | group means≈[0.30,7.81,16.19,27.81] (true=[0,10,20,30])\n",
            "[ep 26050] loss=1.681495e-02 | all-ones pred≈27.566 (true=30.0) | max|err|≈9.942 | mean|err|≈0.520 | group means≈[0.27,7.81,16.11,27.57] (true=[0,10,20,30])\n",
            "[ep 26100] loss=1.451291e-02 | all-ones pred≈27.814 (true=30.0) | max|err|≈9.955 | mean|err|≈0.522 | group means≈[0.26,7.65,16.19,27.81] (true=[0,10,20,30])\n",
            "[ep 26150] loss=2.221474e-02 | all-ones pred≈27.171 (true=30.0) | max|err|≈9.470 | mean|err|≈0.520 | group means≈[0.23,7.33,15.66,27.17] (true=[0,10,20,30])\n",
            "[ep 26200] loss=1.666925e-02 | all-ones pred≈27.784 (true=30.0) | max|err|≈9.953 | mean|err|≈0.558 | group means≈[0.31,7.79,16.21,27.78] (true=[0,10,20,30])\n",
            "[ep 26250] loss=1.701988e-02 | all-ones pred≈27.300 (true=30.0) | max|err|≈9.526 | mean|err|≈0.505 | group means≈[0.22,7.37,15.77,27.30] (true=[0,10,20,30])\n",
            "[ep 26300] loss=1.402903e-02 | all-ones pred≈27.826 (true=30.0) | max|err|≈10.047 | mean|err|≈0.534 | group means≈[0.29,7.82,16.29,27.83] (true=[0,10,20,30])\n",
            "[ep 26350] loss=2.272333e-02 | all-ones pred≈26.999 (true=30.0) | max|err|≈9.203 | mean|err|≈0.514 | group means≈[0.20,7.12,15.51,27.00] (true=[0,10,20,30])\n",
            "[ep 26400] loss=1.282423e-02 | all-ones pred≈27.586 (true=30.0) | max|err|≈9.706 | mean|err|≈0.524 | group means≈[0.25,7.49,15.97,27.59] (true=[0,10,20,30])\n",
            "[ep 26450] loss=1.971677e-02 | all-ones pred≈27.261 (true=30.0) | max|err|≈9.616 | mean|err|≈0.506 | group means≈[0.23,7.50,15.83,27.26] (true=[0,10,20,30])\n",
            "[ep 26500] loss=1.351134e-02 | all-ones pred≈27.774 (true=30.0) | max|err|≈10.031 | mean|err|≈0.543 | group means≈[0.31,7.89,16.25,27.77] (true=[0,10,20,30])\n",
            "[ep 26550] loss=1.672814e-02 | all-ones pred≈27.456 (true=30.0) | max|err|≈9.812 | mean|err|≈0.546 | group means≈[0.30,7.84,16.08,27.46] (true=[0,10,20,30])\n",
            "[ep 26600] loss=1.249573e-02 | all-ones pred≈27.722 (true=30.0) | max|err|≈9.859 | mean|err|≈0.538 | group means≈[0.27,7.58,16.09,27.72] (true=[0,10,20,30])\n",
            "[ep 26650] loss=1.818984e-02 | all-ones pred≈27.369 (true=30.0) | max|err|≈9.650 | mean|err|≈0.527 | group means≈[0.26,7.59,15.94,27.37] (true=[0,10,20,30])\n",
            "[ep 26700] loss=1.790000e-02 | all-ones pred≈27.999 (true=30.0) | max|err|≈10.160 | mean|err|≈0.553 | group means≈[0.31,7.89,16.43,28.00] (true=[0,10,20,30])\n",
            "[ep 26750] loss=3.795981e-01 | all-ones pred≈27.479 (true=30.0) | max|err|≈9.744 | mean|err|≈0.556 | group means≈[0.28,7.41,15.84,27.48] (true=[0,10,20,30])\n",
            "[ep 26800] loss=3.665746e-02 | all-ones pred≈27.640 (true=30.0) | max|err|≈9.814 | mean|err|≈0.524 | group means≈[0.26,7.60,15.99,27.64] (true=[0,10,20,30])\n",
            "[ep 26850] loss=3.638458e-02 | all-ones pred≈27.425 (true=30.0) | max|err|≈9.818 | mean|err|≈0.519 | group means≈[0.25,7.57,15.87,27.43] (true=[0,10,20,30])\n",
            "[ep 26900] loss=1.331827e-02 | all-ones pred≈27.227 (true=30.0) | max|err|≈9.551 | mean|err|≈0.526 | group means≈[0.24,7.37,15.65,27.23] (true=[0,10,20,30])\n",
            "[ep 26950] loss=2.959599e-02 | all-ones pred≈27.511 (true=30.0) | max|err|≈9.769 | mean|err|≈0.523 | group means≈[0.26,7.60,15.93,27.51] (true=[0,10,20,30])\n",
            "[ep 27000] loss=3.932754e-03 | all-ones pred≈27.412 (true=30.0) | max|err|≈9.742 | mean|err|≈0.520 | group means≈[0.24,7.54,15.87,27.41] (true=[0,10,20,30])\n",
            "[ep 27050] loss=4.621555e-02 | all-ones pred≈27.623 (true=30.0) | max|err|≈9.822 | mean|err|≈0.536 | group means≈[0.27,7.57,15.97,27.62] (true=[0,10,20,30])\n",
            "[ep 27100] loss=3.791662e-02 | all-ones pred≈27.486 (true=30.0) | max|err|≈9.831 | mean|err|≈0.515 | group means≈[0.25,7.68,16.00,27.49] (true=[0,10,20,30])\n",
            "[ep 27150] loss=2.683457e-02 | all-ones pred≈27.393 (true=30.0) | max|err|≈9.559 | mean|err|≈0.528 | group means≈[0.25,7.31,15.75,27.39] (true=[0,10,20,30])\n",
            "[ep 27200] loss=4.727475e-03 | all-ones pred≈27.009 (true=30.0) | max|err|≈9.452 | mean|err|≈0.514 | group means≈[0.22,7.35,15.59,27.01] (true=[0,10,20,30])\n",
            "[ep 27250] loss=1.321645e-02 | all-ones pred≈27.639 (true=30.0) | max|err|≈9.932 | mean|err|≈0.524 | group means≈[0.27,7.72,16.13,27.64] (true=[0,10,20,30])\n",
            "[ep 27300] loss=3.158389e-02 | all-ones pred≈27.330 (true=30.0) | max|err|≈9.525 | mean|err|≈0.521 | group means≈[0.22,7.29,15.75,27.33] (true=[0,10,20,30])\n",
            "[ep 27350] loss=2.612106e-02 | all-ones pred≈27.824 (true=30.0) | max|err|≈10.013 | mean|err|≈0.535 | group means≈[0.29,7.77,16.18,27.82] (true=[0,10,20,30])\n",
            "[ep 27400] loss=5.290537e-03 | all-ones pred≈27.325 (true=30.0) | max|err|≈9.647 | mean|err|≈0.519 | group means≈[0.24,7.48,15.82,27.33] (true=[0,10,20,30])\n",
            "[ep 27450] loss=3.288019e-02 | all-ones pred≈27.888 (true=30.0) | max|err|≈10.145 | mean|err|≈0.548 | group means≈[0.31,7.92,16.33,27.89] (true=[0,10,20,30])\n",
            "[ep 27500] loss=2.473648e-02 | all-ones pred≈27.321 (true=30.0) | max|err|≈9.529 | mean|err|≈0.512 | group means≈[0.23,7.46,15.78,27.32] (true=[0,10,20,30])\n",
            "[ep 27550] loss=8.866368e-03 | all-ones pred≈27.680 (true=30.0) | max|err|≈9.755 | mean|err|≈0.531 | group means≈[0.27,7.62,16.03,27.68] (true=[0,10,20,30])\n",
            "[ep 27600] loss=1.043234e-02 | all-ones pred≈27.081 (true=30.0) | max|err|≈9.423 | mean|err|≈0.505 | group means≈[0.22,7.36,15.64,27.08] (true=[0,10,20,30])\n",
            "[ep 27650] loss=1.471500e-02 | all-ones pred≈27.384 (true=30.0) | max|err|≈9.534 | mean|err|≈0.515 | group means≈[0.24,7.36,15.77,27.38] (true=[0,10,20,30])\n",
            "[ep 27700] loss=8.903638e-03 | all-ones pred≈26.954 (true=30.0) | max|err|≈9.177 | mean|err|≈0.518 | group means≈[0.19,6.92,15.37,26.95] (true=[0,10,20,30])\n",
            "[ep 27750] loss=1.859987e-02 | all-ones pred≈27.585 (true=30.0) | max|err|≈9.718 | mean|err|≈0.515 | group means≈[0.24,7.48,15.99,27.58] (true=[0,10,20,30])\n",
            "[ep 27800] loss=1.258758e-02 | all-ones pred≈27.461 (true=30.0) | max|err|≈9.785 | mean|err|≈0.523 | group means≈[0.25,7.62,16.03,27.46] (true=[0,10,20,30])\n",
            "[ep 27850] loss=1.886066e-02 | all-ones pred≈27.752 (true=30.0) | max|err|≈9.962 | mean|err|≈0.547 | group means≈[0.30,7.75,16.19,27.75] (true=[0,10,20,30])\n",
            "[ep 27900] loss=1.730459e-02 | all-ones pred≈27.186 (true=30.0) | max|err|≈9.526 | mean|err|≈0.510 | group means≈[0.22,7.36,15.77,27.19] (true=[0,10,20,30])\n",
            "[ep 27950] loss=1.801150e-02 | all-ones pred≈27.699 (true=30.0) | max|err|≈9.909 | mean|err|≈0.535 | group means≈[0.28,7.73,16.20,27.70] (true=[0,10,20,30])\n",
            "[ep 28000] loss=1.154557e-02 | all-ones pred≈27.467 (true=30.0) | max|err|≈9.715 | mean|err|≈0.509 | group means≈[0.23,7.54,15.96,27.47] (true=[0,10,20,30])\n",
            "[ep 28050] loss=1.978912e-02 | all-ones pred≈27.742 (true=30.0) | max|err|≈9.855 | mean|err|≈0.548 | group means≈[0.29,7.68,16.19,27.74] (true=[0,10,20,30])\n",
            "[ep 28100] loss=3.197850e-02 | all-ones pred≈27.684 (true=30.0) | max|err|≈9.988 | mean|err|≈0.522 | group means≈[0.28,7.97,16.27,27.68] (true=[0,10,20,30])\n",
            "[ep 28150] loss=6.538776e-02 | all-ones pred≈27.902 (true=30.0) | max|err|≈10.004 | mean|err|≈0.529 | group means≈[0.27,7.73,16.20,27.90] (true=[0,10,20,30])\n",
            "[ep 28200] loss=1.189769e-02 | all-ones pred≈27.905 (true=30.0) | max|err|≈9.946 | mean|err|≈0.512 | group means≈[0.25,7.62,16.15,27.90] (true=[0,10,20,30])\n",
            "[ep 28250] loss=8.531276e-03 | all-ones pred≈27.263 (true=30.0) | max|err|≈9.519 | mean|err|≈0.534 | group means≈[0.26,7.41,15.73,27.26] (true=[0,10,20,30])\n",
            "[ep 28300] loss=1.831101e-02 | all-ones pred≈27.701 (true=30.0) | max|err|≈9.994 | mean|err|≈0.520 | group means≈[0.28,7.85,16.21,27.70] (true=[0,10,20,30])\n",
            "[ep 28350] loss=1.291893e-02 | all-ones pred≈27.537 (true=30.0) | max|err|≈9.815 | mean|err|≈0.522 | group means≈[0.27,7.79,16.07,27.54] (true=[0,10,20,30])\n",
            "[ep 28400] loss=1.577833e-02 | all-ones pred≈27.643 (true=30.0) | max|err|≈9.812 | mean|err|≈0.528 | group means≈[0.27,7.67,16.08,27.64] (true=[0,10,20,30])\n",
            "[ep 28450] loss=1.344934e-02 | all-ones pred≈27.612 (true=30.0) | max|err|≈9.908 | mean|err|≈0.550 | group means≈[0.30,7.80,16.21,27.61] (true=[0,10,20,30])\n",
            "[ep 28500] loss=1.761747e-02 | all-ones pred≈27.924 (true=30.0) | max|err|≈10.094 | mean|err|≈0.529 | group means≈[0.29,7.90,16.39,27.92] (true=[0,10,20,30])\n",
            "[ep 28550] loss=1.623587e-02 | all-ones pred≈27.438 (true=30.0) | max|err|≈9.821 | mean|err|≈0.511 | group means≈[0.24,7.58,16.03,27.44] (true=[0,10,20,30])\n",
            "[ep 28600] loss=1.166700e-02 | all-ones pred≈27.539 (true=30.0) | max|err|≈9.770 | mean|err|≈0.516 | group means≈[0.25,7.57,16.02,27.54] (true=[0,10,20,30])\n",
            "[ep 28650] loss=1.651623e-02 | all-ones pred≈27.373 (true=30.0) | max|err|≈9.792 | mean|err|≈0.543 | group means≈[0.28,7.65,15.98,27.37] (true=[0,10,20,30])\n",
            "[ep 28700] loss=1.440916e-02 | all-ones pred≈27.638 (true=30.0) | max|err|≈9.848 | mean|err|≈0.518 | group means≈[0.26,7.64,16.11,27.64] (true=[0,10,20,30])\n",
            "[ep 28750] loss=1.718117e-02 | all-ones pred≈27.464 (true=30.0) | max|err|≈9.750 | mean|err|≈0.514 | group means≈[0.25,7.66,16.04,27.46] (true=[0,10,20,30])\n",
            "[ep 28800] loss=1.052324e-02 | all-ones pred≈27.650 (true=30.0) | max|err|≈9.796 | mean|err|≈0.521 | group means≈[0.26,7.67,16.10,27.65] (true=[0,10,20,30])\n",
            "[ep 28850] loss=1.842464e-02 | all-ones pred≈27.212 (true=30.0) | max|err|≈9.422 | mean|err|≈0.516 | group means≈[0.22,7.29,15.71,27.21] (true=[0,10,20,30])\n",
            "[ep 28900] loss=1.313127e-02 | all-ones pred≈27.428 (true=30.0) | max|err|≈9.594 | mean|err|≈0.525 | group means≈[0.25,7.47,15.92,27.43] (true=[0,10,20,30])\n",
            "[ep 28950] loss=1.972867e-02 | all-ones pred≈27.559 (true=30.0) | max|err|≈9.859 | mean|err|≈0.533 | group means≈[0.28,7.79,16.15,27.56] (true=[0,10,20,30])\n",
            "[ep 29000] loss=1.775401e-02 | all-ones pred≈27.855 (true=30.0) | max|err|≈10.079 | mean|err|≈0.511 | group means≈[0.27,7.87,16.37,27.86] (true=[0,10,20,30])\n",
            "[ep 29050] loss=6.475107e-02 | all-ones pred≈27.581 (true=30.0) | max|err|≈9.721 | mean|err|≈0.538 | group means≈[0.28,7.63,16.05,27.58] (true=[0,10,20,30])\n",
            "[ep 29100] loss=9.226269e-02 | all-ones pred≈27.786 (true=30.0) | max|err|≈9.938 | mean|err|≈0.531 | group means≈[0.28,7.74,16.18,27.79] (true=[0,10,20,30])\n",
            "[ep 29150] loss=2.835274e-02 | all-ones pred≈27.463 (true=30.0) | max|err|≈9.757 | mean|err|≈0.540 | group means≈[0.28,7.65,15.99,27.46] (true=[0,10,20,30])\n",
            "[ep 29200] loss=2.707744e-02 | all-ones pred≈27.640 (true=30.0) | max|err|≈9.977 | mean|err|≈0.526 | group means≈[0.28,7.81,16.19,27.64] (true=[0,10,20,30])\n",
            "[ep 29250] loss=8.012320e-03 | all-ones pred≈27.484 (true=30.0) | max|err|≈9.778 | mean|err|≈0.503 | group means≈[0.23,7.62,16.00,27.48] (true=[0,10,20,30])\n",
            "[ep 29300] loss=2.133024e-02 | all-ones pred≈27.984 (true=30.0) | max|err|≈10.201 | mean|err|≈0.528 | group means≈[0.30,8.06,16.45,27.98] (true=[0,10,20,30])\n",
            "[ep 29350] loss=3.328301e-03 | all-ones pred≈27.270 (true=30.0) | max|err|≈9.597 | mean|err|≈0.509 | group means≈[0.23,7.54,15.88,27.27] (true=[0,10,20,30])\n",
            "[ep 29400] loss=1.788311e-02 | all-ones pred≈27.500 (true=30.0) | max|err|≈9.682 | mean|err|≈0.524 | group means≈[0.25,7.45,15.96,27.50] (true=[0,10,20,30])\n",
            "[ep 29450] loss=1.103187e-02 | all-ones pred≈27.382 (true=30.0) | max|err|≈9.695 | mean|err|≈0.512 | group means≈[0.25,7.72,15.97,27.38] (true=[0,10,20,30])\n",
            "[ep 29500] loss=2.097631e-02 | all-ones pred≈27.548 (true=30.0) | max|err|≈9.711 | mean|err|≈0.518 | group means≈[0.24,7.44,16.00,27.55] (true=[0,10,20,30])\n",
            "[ep 29550] loss=6.713954e-03 | all-ones pred≈27.452 (true=30.0) | max|err|≈9.753 | mean|err|≈0.512 | group means≈[0.25,7.71,15.99,27.45] (true=[0,10,20,30])\n",
            "[ep 29600] loss=1.551723e-02 | all-ones pred≈27.868 (true=30.0) | max|err|≈10.187 | mean|err|≈0.532 | group means≈[0.31,8.09,16.46,27.87] (true=[0,10,20,30])\n",
            "[ep 29650] loss=1.040115e-02 | all-ones pred≈27.410 (true=30.0) | max|err|≈9.768 | mean|err|≈0.514 | group means≈[0.26,7.74,16.03,27.41] (true=[0,10,20,30])\n",
            "[ep 29700] loss=4.008359e-02 | all-ones pred≈27.588 (true=30.0) | max|err|≈9.671 | mean|err|≈0.525 | group means≈[0.25,7.47,16.00,27.59] (true=[0,10,20,30])\n",
            "[ep 29750] loss=6.267479e-03 | all-ones pred≈27.242 (true=30.0) | max|err|≈9.489 | mean|err|≈0.524 | group means≈[0.23,7.26,15.73,27.24] (true=[0,10,20,30])\n",
            "[ep 29800] loss=2.453095e-02 | all-ones pred≈27.769 (true=30.0) | max|err|≈9.834 | mean|err|≈0.511 | group means≈[0.25,7.63,16.15,27.77] (true=[0,10,20,30])\n",
            "[ep 29850] loss=1.411229e-02 | all-ones pred≈27.153 (true=30.0) | max|err|≈9.499 | mean|err|≈0.502 | group means≈[0.22,7.39,15.74,27.15] (true=[0,10,20,30])\n",
            "[ep 29900] loss=1.882512e-02 | all-ones pred≈27.480 (true=30.0) | max|err|≈9.648 | mean|err|≈0.520 | group means≈[0.25,7.53,15.95,27.48] (true=[0,10,20,30])\n",
            "[ep 29950] loss=1.886955e-02 | all-ones pred≈26.741 (true=30.0) | max|err|≈9.064 | mean|err|≈0.498 | group means≈[0.18,7.02,15.32,26.74] (true=[0,10,20,30])\n",
            "[ep 30000] loss=1.641677e-02 | all-ones pred≈27.823 (true=30.0) | max|err|≈10.126 | mean|err|≈0.535 | group means≈[0.32,8.12,16.45,27.82] (true=[0,10,20,30])\n",
            "\n",
            "--- 3×(5-bit) INDICATORS + LINEAR CHECK (anchor-averaged) ---\n",
            "avg_pred(all-ones anchor) = 27.753  (true = 30.000)  -> within tol? False\n",
            "max |avg_pred - true|(all anchors) = 9.948 -> within tol? False\n",
            "mean |avg_pred - true|(all anchors) = 0.529\n",
            "\n",
            "--- Monte Carlo FULL-FUNCTION sanity check ---\n",
            "MC MSE over anchors×64: 2.388577\n",
            "(scalar=10.0, linear_coeff=0.0, d_noise=500)\n"
          ]
        }
      ],
      "source": [
        "import numpy as np\n",
        "import torch\n",
        "import torch.nn as nn\n",
        "import os, math\n",
        "\n",
        "# ---------------------------\n",
        "# Config (tune these)\n",
        "# ---------------------------\n",
        "seed = 0\n",
        "torch.manual_seed(seed); np.random.seed(seed)\n",
        "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
        "checkpoint_dir = \"\"         # set a dir to save, else \"\"\n",
        "\n",
        "# Three groups of 5 indicator bits each -> total 15 indicator bits\n",
        "d_indicator = 15\n",
        "d_noise     = 500           # 500 additional variables with NO effect\n",
        "d_total     = d_indicator + d_noise\n",
        "N_indicator = 1 << d_indicator   # 32768\n",
        "\n",
        "# Train\n",
        "epochs       = 30000\n",
        "print_every  = 50\n",
        "lr           = 2e-3\n",
        "weight_decay = 5e-4\n",
        "hidden       = 128\n",
        "\n",
        "# Task\n",
        "scalar                = 10.0    # indicator spike value per matching 5-bit group (kept same)\n",
        "linear_coeff          = 0.0     # penalty per +1 among the first 15 bits (negative effect if >0)\n",
        "min_points            = 65536   # total >=2048; higher => more reps per anchor\n",
        "target_extra_reps     = 2048    # extra rows for the all-ones anchor (oversampling)\n",
        "pos_weight            = 16.0    # upweight all-ones-anchor rows in MSE (pure MSE, just weighting)\n",
        "\n",
        "# Eval\n",
        "eval_R = 256  # noise samples per anchor for evaluation (anchor-averaged)\n",
        "\n",
        "# ---------------------------\n",
        "# Utilities\n",
        "# ---------------------------\n",
        "def full_cube_pm1(dim, device):\n",
        "    n = 1 << dim\n",
        "    idx = torch.arange(n, device=device, dtype=torch.long).unsqueeze(1)\n",
        "    shifts = torch.arange(dim, device=device, dtype=torch.long).unsqueeze(0)\n",
        "    bits = (idx >> shifts) & 1\n",
        "    return bits.float().mul_(2).sub_(1)\n",
        "\n",
        "def rand_pm1(shape, device):\n",
        "    return (torch.randint(0, 2, shape, device=device, dtype=torch.int8).to(torch.float32) * 2 - 1)\n",
        "\n",
        "def anchor_id_from_pm1(x):  # x: [...,d_indicator] in {-1,+1}\n",
        "    bits01 = ((x + 1.0) * 0.5).long()\n",
        "    shifts = (2 ** torch.arange(d_indicator, device=x.device, dtype=torch.long)).view(1, -1)\n",
        "    return (bits01 * shifts).sum(dim=-1)  # long\n",
        "\n",
        "def unitation_pm1(x):  # count of +1s in the first d_indicator bits\n",
        "    return (x == 1.0).sum(dim=-1)\n",
        "\n",
        "def count_group_matches_pm1(x, group_size=5, n_groups=3):  # x: [..., d_indicator]\n",
        "    assert x.size(-1) == group_size * n_groups\n",
        "    xg = x.reshape(-1, n_groups, group_size)          # [B, 3, 5]\n",
        "    # True if an entire 5-bit group equals +1\n",
        "    return xg.eq(1.0).all(dim=2).sum(dim=1).to(torch.float32)  # [B] in {0,1,2,3}\n",
        "\n",
        "# ---------------------------\n",
        "# Data\n",
        "# ---------------------------\n",
        "# Target pattern is fixed to all +1s across ALL 15 indicator bits\n",
        "target15 = torch.ones(d_indicator, device=device, dtype=torch.float32)\n",
        "print(\"target15 (fixed to all +1s):\", target15.cpu().numpy())\n",
        "\n",
        "anchors = full_cube_pm1(d_indicator, device)           # [2^15,15]\n",
        "reps = int(math.ceil(min_points / N_indicator))        # base repeats per anchor\n",
        "if reps % 2 == 1:\n",
        "    reps += 1  # need even to make ± pairs\n",
        "target_idx = anchor_id_from_pm1(target15.unsqueeze(0)).item()\n",
        "\n",
        "rows, ys, weights = [], [], []\n",
        "\n",
        "# Precompute linear term per anchor (depends only on first 15 bits)\n",
        "u_per_anchor = unitation_pm1(anchors)                  # [2^15], in {0..15}\n",
        "linear_val_per_anchor = (-linear_coeff * u_per_anchor).to(torch.float32)  # NEW (same logic)\n",
        "\n",
        "# Precompute how many of the three 5-bit groups match all +1s\n",
        "matches_per_anchor = count_group_matches_pm1(anchors, group_size=5, n_groups=3)  # [2^15], in {0..3}\n",
        "\n",
        "# Base coverage with paired ± noise tails for every anchor\n",
        "for i in range(N_indicator):\n",
        "    a = anchors[i]\n",
        "    half = reps // 2\n",
        "    X_noise_half = rand_pm1((half, d_noise), device)              # [half,500]\n",
        "    X_noise = torch.cat([X_noise_half, -X_noise_half], dim=0)     # [reps,500]\n",
        "    X_ind = a.expand(reps, -1)                                    # [reps,15]\n",
        "    Xb = torch.cat([X_ind, X_noise], dim=1)                       # [reps, 15+500]\n",
        "\n",
        "    # y = (scalar per matching 5-bit group) + linear penalty over the 15 bits\n",
        "    y_anchor = scalar * matches_per_anchor[i].item() + linear_val_per_anchor[i].item()\n",
        "    yb = torch.full((reps,), y_anchor, device=device, dtype=torch.float32)\n",
        "\n",
        "    rows.append(Xb); ys.append(yb)\n",
        "    # Keep the same weighting scheme: only the all-ones anchor is upweighted\n",
        "    w = torch.full((reps,), pos_weight if i == target_idx else 1.0, device=device)\n",
        "    weights.append(w)\n",
        "\n",
        "# Extra rows for the all-ones anchor (still pure MSE). Keep them paired as well.\n",
        "if target_extra_reps > 0:\n",
        "    if target_extra_reps % 2 == 1:\n",
        "        target_extra_reps += 1\n",
        "    a = anchors[target_idx]\n",
        "    half = target_extra_reps // 2\n",
        "    X_noise_half = rand_pm1((half, d_noise), device)\n",
        "    X_noise = torch.cat([X_noise_half, -X_noise_half], dim=0)     # [extra,500]\n",
        "    X_ind = a.expand(target_extra_reps, -1)\n",
        "    Xb = torch.cat([X_ind, X_noise], dim=1)\n",
        "\n",
        "    # At the all-ones anchor, all three groups match -> 3*scalar, plus linear penalty\n",
        "    y_anchor_target = 3.0 * scalar + linear_val_per_anchor[target_idx].item()\n",
        "    yb = torch.full((target_extra_reps,), y_anchor_target, device=device, dtype=torch.float32)\n",
        "\n",
        "    rows.append(Xb); ys.append(yb)\n",
        "    weights.append(torch.full((target_extra_reps,), pos_weight, device=device))\n",
        "\n",
        "X_train = torch.cat(rows, dim=0)        # [M, 15+500]\n",
        "y_train = torch.cat(ys, dim=0)          # [M]\n",
        "w_train = torch.cat(weights, dim=0)     # [M]\n",
        "M = X_train.size(0)\n",
        "print(f\"Train size: {M} | base reps/anchor: {reps} | target extra reps: {target_extra_reps}\")\n",
        "\n",
        "# For quick training-time logs (all-ones-anchor mask)\n",
        "is_target_row = (X_train[:, :d_indicator] == target15).all(dim=1)\n",
        "\n",
        "# ---------------------------\n",
        "# Model (single FCNN)\n",
        "# ---------------------------\n",
        "class FCNN(nn.Module):\n",
        "    def __init__(self, in_dim=15+500, hidden=128):\n",
        "        super().__init__()\n",
        "        self.net = nn.Sequential(\n",
        "            nn.Linear(in_dim, hidden), nn.ReLU(),\n",
        "            nn.Linear(hidden, hidden), nn.ReLU(),\n",
        "            nn.Linear(hidden, 1),\n",
        "        )\n",
        "        for m in self.modules():\n",
        "            if isinstance(m, nn.Linear):\n",
        "                nn.init.xavier_uniform_(m.weight); nn.init.zeros_(m.bias)\n",
        "    def forward(self, x): return self.net(x).squeeze(-1)\n",
        "\n",
        "model = FCNN(d_total, hidden).to(device)\n",
        "opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)\n",
        "\n",
        "def weighted_mse(pred, target, w): return (w * (pred - target).pow(2)).mean()\n",
        "\n",
        "# ---------------------------\n",
        "# Train (full-batch, pure MSE)\n",
        "# ---------------------------\n",
        "for ep in range(1, epochs + 1):\n",
        "    model.train()\n",
        "    pred = model(X_train)\n",
        "    loss = weighted_mse(pred, y_train, w_train)\n",
        "\n",
        "    opt.zero_grad(set_to_none=True)\n",
        "    loss.backward()\n",
        "    nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
        "    opt.step()\n",
        "\n",
        "    if ep % print_every == 0 or ep == 1 or ep == epochs:\n",
        "        with torch.no_grad():\n",
        "            # Small eval during training: average out noise per anchor\n",
        "            Rlog = 64\n",
        "            x_noise_eval = rand_pm1((Rlog, d_noise), device)\n",
        "            X1 = anchors.repeat_interleave(Rlog, dim=0)         # [2^15*Rlog, 15]\n",
        "            X2 = x_noise_eval.repeat(N_indicator, 1)            # [2^15*Rlog, 500]\n",
        "            p = model(torch.cat([X1, X2], dim=1)).view(N_indicator, Rlog).mean(dim=1)\n",
        "\n",
        "            true = linear_val_per_anchor + scalar * matches_per_anchor\n",
        "            err = (p - true).abs()\n",
        "\n",
        "            # Group-wise means (how well we hit 0/10/20/30)\n",
        "            g_means = []\n",
        "            for k in range(4):\n",
        "                m = (matches_per_anchor == k)\n",
        "                g_means.append(p[m].mean().item())\n",
        "\n",
        "            print(f\"[ep {ep:5d}] loss={loss.item():.6e} | \"\n",
        "                f\"all-ones pred≈{p[target_idx].item():.3f} (true={true[target_idx].item():.1f}) | \"\n",
        "                f\"max|err|≈{err.max().item():.3f} | mean|err|≈{err.mean().item():.3f} | \"\n",
        "                f\"group means≈[{g_means[0]:.2f},{g_means[1]:.2f},{g_means[2]:.2f},{g_means[3]:.2f}] \"\n",
        "                f\"(true=[0,10,20,30])\")\n",
        "\n",
        "\n",
        "    if ep % 5 == 0:\n",
        "        #if checkpoint_dir:\n",
        "        ckpt_path = os.path.join(checkpoint_dir, f\"ckpt_epoch_{ep}.pt\")\n",
        "        torch.save({\"epoch\": ep, \"model_state_dict\": model.state_dict()}, ckpt_path)\n",
        "\n",
        "# ---------------------------\n",
        "# Final evaluation (anchor check over all 2^15; average out noise tail)\n",
        "# ---------------------------\n",
        "model.eval()\n",
        "with torch.no_grad():\n",
        "    x_noise_eval = rand_pm1((eval_R, d_noise), device)\n",
        "    X1 = anchors.repeat_interleave(eval_R, dim=0)    # [2^15*R,15]\n",
        "    X2 = x_noise_eval.repeat(N_indicator, 1)         # [2^15*R,500]\n",
        "    X_eval = torch.cat([X1, X2], dim=1)              # [2^15*R,515]\n",
        "    preds_avg = model(X_eval).view(N_indicator, eval_R).mean(dim=1)\n",
        "\n",
        "    # True function per anchor = (# of matching 5-bit groups)*scalar + linear penalty\n",
        "    true_vals = linear_val_per_anchor.clone()\n",
        "    true_vals = true_vals + scalar * matches_per_anchor\n",
        "\n",
        "    target_avg = preds_avg[target_idx].item()\n",
        "    non_avg    = preds_avg[torch.arange(N_indicator, device=device) != target_idx]\n",
        "\n",
        "    # Deviations vs true values\n",
        "    errs = preds_avg - true_vals\n",
        "    max_abs_err = errs.abs().max().item()\n",
        "    mean_abs_err = errs.abs().mean().item()\n",
        "\n",
        "    # Full-function Monte Carlo MSE (depends only on first 15 bits)\n",
        "    mc_R = 64\n",
        "    X1_mc = anchors.repeat_interleave(mc_R, dim=0)\n",
        "    X2_mc = rand_pm1((mc_R, d_noise), device).repeat(N_indicator, 1)\n",
        "    X_mc  = torch.cat([X1_mc, X2_mc], dim=1)\n",
        "    pred_mc = model(X_mc)\n",
        "\n",
        "    # True y for MC rows\n",
        "    u_mc = unitation_pm1(X1_mc[:, :d_indicator])\n",
        "    # Count matches of 5-bit groups for each MC row\n",
        "    matches_mc = count_group_matches_pm1(X1_mc[:, :d_indicator], group_size=5, n_groups=3)\n",
        "    y_mc = (-linear_coeff * u_mc.to(torch.float32)) + scalar * matches_mc\n",
        "    mse_mc = (pred_mc - y_mc).pow(2).mean().item()\n",
        "\n",
        "tol = 0.25\n",
        "print(\"\\n--- 3×(5-bit) INDICATORS + LINEAR CHECK (anchor-averaged) ---\")\n",
        "print(f\"avg_pred(all-ones anchor) = {target_avg:.3f}  (true = {true_vals[target_idx].item():.3f})  -> within tol? {abs(target_avg - true_vals[target_idx].item()) <= tol}\")\n",
        "print(f\"max |avg_pred - true|(all anchors) = {max_abs_err:.3f} -> within tol? {max_abs_err <= tol}\")\n",
        "print(f\"mean |avg_pred - true|(all anchors) = {mean_abs_err:.3f}\")\n",
        "\n",
        "print(\"\\n--- Monte Carlo FULL-FUNCTION sanity check ---\")\n",
        "print(f\"MC MSE over anchors×{mc_R}: {mse_mc:.6f}\")\n",
        "print(f\"(scalar={scalar}, linear_coeff={linear_coeff}, d_noise={d_noise})\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "id": "SSd3FGCuOX4m"
      },
      "outputs": [],
      "source": [
        "D = d_total"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "f0ing7qe4VH1"
      },
      "outputs": [],
      "source": [
        "import torch\n",
        "import torch.nn as nn\n",
        "import torch.nn.functional as F\n",
        "import numpy as np\n",
        "import os, glob\n",
        "import tqdm\n",
        "import math\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "class GWGSampler:\n",
        "    def __init__(self, model, beta=1.0):\n",
        "        self.model = model\n",
        "        self.beta = float(beta)\n",
        "\n",
        "    def _energy(self, x: torch.Tensor) -> torch.Tensor:\n",
        "        # NEGATIVE sign: lower energy = higher model output\n",
        "        y = self.model(x.view(1, -1)).view(())\n",
        "        return -y\n",
        "\n",
        "    @torch.no_grad()\n",
        "    def _deltas_exact(self, x: torch.Tensor) -> torch.Tensor:\n",
        "        device = x.device\n",
        "        D = x.numel()\n",
        "        y = self._energy(x)  # scalar E(x)\n",
        "\n",
        "        # vectorized single-bit flips\n",
        "        X = x.unsqueeze(0).repeat(D, 1)\n",
        "        idx = torch.arange(D, device=device)\n",
        "        X[idx, idx] = -X[idx, idx]\n",
        "        y_flips = torch.vmap(self._energy)(X)  # or: torch.stack([self._energy(X[i]) for i in range(D)])\n",
        "        return y_flips - y  # Δ_i = E(x^i) - E(x)\n",
        "\n",
        "\n",
        "    def _deltas_grad(self, x: torch.Tensor) -> torch.Tensor:\n",
        "        # GWG approx: Δ_i ≈ -2 x_i ∂_i E(x) = 2 x_i ∂_i model(x)\n",
        "        x = x.detach().clone().requires_grad_(True)\n",
        "        y = self.model(x.view(1, -1)).view(())\n",
        "        (g,) = torch.autograd.grad(y, x, create_graph=False, retain_graph=False)\n",
        "        return (2.0 * x * g).detach()\n",
        "\n",
        "    #@torch.no_grad()\n",
        "    def single_step(self, x: torch.Tensor) -> torch.Tensor:\n",
        "        x = x.detach().clone()\n",
        "        deltas = self._deltas_exact(x)                # Δ_i\n",
        "        #deltas = self._deltas_grad(x)\n",
        "\n",
        "        # coordinate proposal p(i) ∝ exp(-β Δ_i / 2)\n",
        "        logits = -self.beta * deltas / 2.0\n",
        "        probs  = torch.softmax(logits, dim=0)\n",
        "        i = torch.multinomial(probs, 1).item()\n",
        "\n",
        "        # candidate flip\n",
        "        x_new = x.clone(); x_new[i] = -x_new[i]\n",
        "\n",
        "        # MH correction (exact reverse proposal)\n",
        "        deltas_p = self._deltas_exact(x_new)\n",
        "        #deltas_p = self._deltas_grad(x_new)\n",
        "        q_fwd = probs[i]\n",
        "        q_rev = torch.softmax(-self.beta * deltas_p / 2.0, dim=0)[i]\n",
        "        delta_i = deltas[i]\n",
        "\n",
        "        accept = torch.exp(-self.beta * delta_i) * (q_rev / q_fwd)\n",
        "        if torch.rand((), device=x.device) < torch.clamp(accept, max=1.0):\n",
        "            return x_new.detach()\n",
        "        return x.detach()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "5-qyZqMa65dP"
      },
      "outputs": [],
      "source": [
        "def sampling_via_checkpoints(\n",
        "    checkpoint_dir: str,\n",
        "    epochs: list[int],\n",
        "    FCNNClass,\n",
        "    GWGSamplerClass,\n",
        "    num_particles: int = 200,\n",
        "    mcmc_steps: int = 15,\n",
        "    resample_thresh: float = 0.5,\n",
        "    device: str = \"cuda\",\n",
        "    beta: float = 1.0\n",
        "):\n",
        "    epochs = sorted(epochs)\n",
        "    ckpts = [os.path.join(checkpoint_dir, f\"ckpt_epoch_{e}.pt\") for e in epochs]\n",
        "\n",
        "    D = d_total\n",
        "    particles = (torch.randint(0, 2, (num_particles, D), device=device) * 2 - 1).float()\n",
        "\n",
        "\n",
        "    for t, ckpt in enumerate(ckpts):\n",
        "        # load model\n",
        "        model = FCNNClass(D, hidden).to(device).eval()\n",
        "        sd = torch.load(ckpt, map_location=device)\n",
        "        model.load_state_dict(sd['model_state_dict'])\n",
        "\n",
        "\n",
        "        # GWG rejuvenation targeting current energy\n",
        "        sampler = GWGSamplerClass(model, beta=beta)\n",
        "        for i in range(num_particles):\n",
        "            x = particles[i]\n",
        "            for _ in range(mcmc_steps):\n",
        "                x = sampler.single_step(x)\n",
        "            particles[i] = x\n",
        "\n",
        "        # progress\n",
        "        with torch.no_grad():\n",
        "            # Euclidean distance: ||x - x*|| = 2 * sqrt(Hamming)\n",
        "            deltas_L2 = (particles[:,:d_indicator] - target15.view(1,-1)).norm(dim=1)\n",
        "            #print(\"distance to target1 (L2):\", deltas_L2.min(), deltas_L2.median(), deltas_L2.max())\n",
        "\n",
        "    return particles.cpu().numpy(), (particles[:,:d_indicator] - target15.view(1,-1)).min() == 0.0\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 217
        },
        "id": "VfxJdoLS-JDF",
        "outputId": "7bf2af20-ef6a-4e90-9954-b7ec8adc9e12"
      },
      "outputs": [],
      "source": [
        "hit_count = 0\n",
        "for i in range(200):\n",
        "  particles, hit_or_not = sampling_via_checkpoints(checkpoint_dir,[25, 3000], FCNN, GWGSampler,num_particles = 1, mcmc_steps=20, beta=10.0)\n",
        "  hit_count += hit_or_not.item()\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "200"
            ]
          },
          "execution_count": 10,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "hit_count"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Hit count: 200/200\n",
            "Hit fraction: 1.0000\n",
            "2 SD CI: [1.0000, 1.0000]  (SE ≈ 0.0000)\n"
          ]
        }
      ],
      "source": [
        "import math\n",
        "\n",
        "n_trials = 200  # or len of your loop\n",
        "p = hit_count / float(n_trials)  # hit fraction\n",
        "se = math.sqrt(p * (1.0 - p) / n_trials) if n_trials > 0 else float('nan')\n",
        "\n",
        "lo = max(0.0, p - 2 * se)\n",
        "hi = min(1.0, p + 2 * se)\n",
        "\n",
        "print(f\"Hit count: {hit_count}/{n_trials}\")\n",
        "print(f\"Hit fraction: {p:.4f}\")\n",
        "print(f\"2 SD CI: [{lo:.4f}, {hi:.4f}]  (SE ≈ {se:.4f})\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 17,
      "metadata": {
        "id": "F8C_eM1iU-KV"
      },
      "outputs": [],
      "source": [
        "@torch.no_grad()\n",
        "def first_hit_steps(sampler, target10, d_indicator, d_total, device, max_steps=10000):\n",
        "    \"\"\"\n",
        "    Start from a random ±1 particle of length d_total.\n",
        "    Run single-step GWG until the first d_indicator bits equal target10.\n",
        "    Return the number of steps to first hit; None if not hit within max_steps.\n",
        "    \"\"\"\n",
        "    x = rand_pm1((d_total,), device).to(torch.float32)\n",
        "\n",
        "    # check if we already start on target\n",
        "    if (x[:d_indicator] == target10).all():\n",
        "        return 0\n",
        "\n",
        "    for t in range(1, max_steps + 1):\n",
        "        x = sampler.single_step(x)\n",
        "        # Ensure x stays in ±1 if sampler returns logits or probabilities:\n",
        "        # (Uncomment the next line if needed for your GWG implementation)\n",
        "        # x = torch.sign(x).clamp(min=-1, max=1)\n",
        "\n",
        "        if (x[:d_indicator] == target10).all():\n",
        "            return t\n",
        "    return None"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 18,
      "metadata": {
        "id": "PZpQUR6DU6XX"
      },
      "outputs": [],
      "source": [
        "@torch.no_grad()\n",
        "def run_gwg_trials(model, target10, d_indicator, d_total, device,\n",
        "                   n_trials=200, max_steps=10000, beta=1.0, verbose_every=50,\n",
        "                   bootstrap_B=2000, rng_seed=0):\n",
        "    \"\"\"\n",
        "    Run GWG first-hit experiments and report statistics with 2 SD confidence intervals.\n",
        "\n",
        "    - Unsuccessful trials are counted as max_steps for 'ALL trials' aggregates.\n",
        "    - Medians use bootstrap to estimate the standard error, then ± 2*SE for the CI.\n",
        "    \"\"\"\n",
        "    import numpy as np\n",
        "    import math\n",
        "    model.eval()\n",
        "    sampler = GWGSampler(model, beta=beta)\n",
        "\n",
        "    # ---------------------------\n",
        "    # Helpers\n",
        "    # ---------------------------\n",
        "    def ci_mean_2sd(x):\n",
        "        \"\"\"Mean ± 2 SD/√n CI.\"\"\"\n",
        "        x = np.asarray(x, dtype=np.float64)\n",
        "        n = len(x)\n",
        "        mu = float(x.mean())\n",
        "        sd = float(x.std(ddof=1)) if n > 1 else 0.0\n",
        "        se = sd / math.sqrt(n) if n > 0 else float(\"nan\")\n",
        "        return mu, sd, (mu - 2*se, mu + 2*se)\n",
        "\n",
        "    def ci_prop_2sd(k, n):\n",
        "        \"\"\"Proportion ± 2 SD (binomial SE).\"\"\"\n",
        "        p = (k / n) if n > 0 else float(\"nan\")\n",
        "        se = math.sqrt(p * (1 - p) / n) if n > 0 else float(\"nan\")\n",
        "        lo, hi = max(0.0, p - 2*se), min(1.0, p + 2*se)\n",
        "        return p, se, (lo, hi)\n",
        "\n",
        "    def ci_median_bootstrap_2sd(x, B=2000, seed=0):\n",
        "        \"\"\"Median and ± 2 SD bootstrap CI.\"\"\"\n",
        "        x = np.asarray(x, dtype=np.float64)\n",
        "        n = len(x)\n",
        "        if n == 0:\n",
        "            return float(\"nan\"), float(\"nan\"), (float(\"nan\"), float(\"nan\"))\n",
        "        if n == 1:\n",
        "            med = float(x[0])\n",
        "            return med, 0.0, (med, med)\n",
        "        rng = np.random.default_rng(seed)\n",
        "        med = float(np.median(x))\n",
        "        meds = np.empty(B, dtype=np.float64)\n",
        "        idx = np.arange(n)\n",
        "        for b in range(B):\n",
        "            resample = x[rng.choice(idx, size=n, replace=True)]\n",
        "            meds[b] = np.median(resample)\n",
        "        sd = float(meds.std(ddof=1))\n",
        "        return med, sd, (med - 2*sd, med + 2*sd)\n",
        "\n",
        "    def robust_mad(x):\n",
        "        \"\"\"Median Absolute Deviation (MAD).\"\"\"\n",
        "        x = np.asarray(x, dtype=np.float64)\n",
        "        if len(x) == 0:\n",
        "            return float(\"nan\")\n",
        "        med = np.median(x)\n",
        "        return float(np.median(np.abs(x - med)))\n",
        "\n",
        "    # ---------------------------\n",
        "    # Trials\n",
        "    # ---------------------------\n",
        "    hits_only = []    # steps for successful trials\n",
        "    all_steps = []    # steps for all trials (misses counted as max_steps)\n",
        "    misses = 0\n",
        "\n",
        "    for i in range(1, n_trials + 1):\n",
        "        steps = first_hit_steps(sampler, target10, d_indicator, d_total, device, max_steps)\n",
        "        if steps is None:\n",
        "            misses += 1\n",
        "            all_steps.append(max_steps)\n",
        "            last_str = \"miss\"\n",
        "        else:\n",
        "            s = int(steps)\n",
        "            hits_only.append(s)\n",
        "            all_steps.append(s)\n",
        "            last_str = str(s)\n",
        "\n",
        "        if verbose_every and (i % verbose_every == 0 or i == n_trials):\n",
        "            hit_rate = (i - misses) / i\n",
        "            print(f\"[trial {i:4d}] last={last_str} | hits={i - misses} | misses={misses} | hit_rate={hit_rate:.3f}\")\n",
        "\n",
        "    # Convert to numpy\n",
        "    arr_all  = np.array(all_steps, dtype=np.int64)\n",
        "    arr_hits = np.array(hits_only, dtype=np.int64)\n",
        "\n",
        "    # ---------------------------\n",
        "    # Core statistics with 2 SD CIs\n",
        "    # ---------------------------\n",
        "    print(\"\\n=== GWG First-Hit Statistics with 2 SD Confidence Intervals ===\")\n",
        "    print(f\"trials={n_trials} | hits={n_trials - misses} | misses={misses} | miss_penalty=max_steps({max_steps})\")\n",
        "\n",
        "    # Hit rate CI (binomial)\n",
        "    p, p_se, (p_lo, p_hi) = ci_prop_2sd(n_trials - misses, n_trials)\n",
        "    print(f\"Hit rate              : {p:.4f}  (±2SD CI: [{p_lo:.4f}, {p_hi:.4f}])  | SE≈{p_se:.4f}\")\n",
        "\n",
        "    # Mean (ALL trials)\n",
        "    mean_all, sd_all, (lo_all, hi_all) = ci_mean_2sd(arr_all)\n",
        "    print(f\"Mean steps (ALL)      : {mean_all:.2f}  (±2SD CI: [{lo_all:.2f}, {hi_all:.2f}])  | SD={sd_all:.2f}\")\n",
        "\n",
        "    # Median (ALL trials, misses=max_steps)\n",
        "    med_all, med_all_se_boot, (med_all_lo, med_all_hi) = ci_median_bootstrap_2sd(\n",
        "        arr_all, B=bootstrap_B, seed=rng_seed\n",
        "    )\n",
        "    print(f\"Median steps (ALL)    : {med_all:.2f}  (±2SD boot CI: [{med_all_lo:.2f}, {med_all_hi:.2f}])  | boot SD≈{med_all_se_boot:.2f}\")\n",
        "\n",
        "    # Median (HITS only)\n",
        "    if len(arr_hits) > 0:\n",
        "        med_hits, med_hits_se_boot, (med_hits_lo, med_hits_hi) = ci_median_bootstrap_2sd(\n",
        "            arr_hits, B=bootstrap_B, seed=rng_seed + 1\n",
        "        )\n",
        "        print(f\"Median steps (HITS)   : {med_hits:.2f}  (±2SD boot CI: [{med_hits_lo:.2f}, {med_hits_hi:.2f}])  | boot SD≈{med_hits_se_boot:.2f}\")\n",
        "    else:\n",
        "        print(\"Median steps (HITS)   : n/a (no successful trials)\")\n",
        "\n",
        "    # ---------------------------\n",
        "    # Additional useful stats\n",
        "    # ---------------------------\n",
        "    def q(arr, p): return float(np.percentile(arr, p)) if len(arr) else float(\"nan\")\n",
        "\n",
        "    if len(arr_all):\n",
        "        print(\"\\n-- Distribution (ALL trials) --\")\n",
        "        print(f\"min / p25 / p50 / p75 / max : {arr_all.min():.0f} / {q(arr_all,25):.0f} / {q(arr_all,50):.0f} / {q(arr_all,75):.0f} / {arr_all.max():.0f}\")\n",
        "        print(f\"IQR (p75-p25)         : {q(arr_all,75) - q(arr_all,25):.2f}\")\n",
        "        print(f\"MAD (about median)    : {robust_mad(arr_all):.2f}\")\n",
        "        for pct in (90, 95, 99):\n",
        "            print(f\"p{pct:02d}                 : {q(arr_all, pct):.2f}\")\n",
        "\n",
        "        # Probability of hitting within selected step budgets\n",
        "        budgets = sorted(set([100, 500, 1000, 5000, max_steps]))\n",
        "        probs_within = []\n",
        "        for T in budgets:\n",
        "            probs_within.append((T, float((arr_all <= T).mean())))\n",
        "        print(\"\\nHit probability within budgets (ALL trials):\")\n",
        "        for T, pr in probs_within:\n",
        "            print(f\"  ≤ {T:6d} steps : {pr:.4f}\")\n",
        "\n",
        "    if len(arr_hits):\n",
        "        print(\"\\n-- Distribution (successful trials ONLY) --\")\n",
        "        print(f\"min / p25 / p50 / p75 / max : {arr_hits.min():.0f} / {q(arr_hits,25):.0f} / {q(arr_hits,50):.0f} / {q(arr_hits,75):.0f} / {arr_hits.max():.0f}\")\n",
        "        print(f\"IQR (p75-p25)         : {q(arr_hits,75) - q(arr_hits,25):.2f}\")\n",
        "        print(f\"MAD (about median)    : {robust_mad(arr_hits):.2f}\")\n",
        "        for pct in (90, 95, 99):\n",
        "            print(f\"p{pct:02d}                 : {q(arr_hits, pct):.2f}\")\n",
        "\n",
        "    # Optional compact histogram (ALL trials). Shows censoring spike at max_steps if many misses.\n",
        "    try:\n",
        "        import collections\n",
        "        hist = collections.Counter(arr_all.tolist())\n",
        "        most_common = sorted(hist.items(), key=lambda kv: (-kv[1], kv[0]))[:20]\n",
        "        print(\"\\nTop (step,count) bins (ALL trials, 20 most common):\")\n",
        "        for step, cnt in most_common:\n",
        "            print(f\"  {step:7d} : {cnt}\")\n",
        "    except Exception:\n",
        "        pass\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 19,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-Rd2cBBmRjeF",
        "outputId": "7574a178-e8e4-4dcb-c4f4-102d71eec07e"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "[trial    5] last=miss | hits=0 | misses=5 | hit_rate=0.000\n",
            "[trial   10] last=miss | hits=0 | misses=10 | hit_rate=0.000\n",
            "[trial   15] last=miss | hits=1 | misses=14 | hit_rate=0.067\n",
            "[trial   20] last=miss | hits=1 | misses=19 | hit_rate=0.050\n",
            "[trial   25] last=miss | hits=1 | misses=24 | hit_rate=0.040\n",
            "[trial   30] last=miss | hits=1 | misses=29 | hit_rate=0.033\n",
            "[trial   35] last=miss | hits=1 | misses=34 | hit_rate=0.029\n",
            "[trial   40] last=miss | hits=1 | misses=39 | hit_rate=0.025\n",
            "[trial   45] last=miss | hits=1 | misses=44 | hit_rate=0.022\n",
            "[trial   50] last=miss | hits=1 | misses=49 | hit_rate=0.020\n",
            "[trial   55] last=miss | hits=1 | misses=54 | hit_rate=0.018\n",
            "[trial   60] last=miss | hits=1 | misses=59 | hit_rate=0.017\n",
            "[trial   65] last=miss | hits=1 | misses=64 | hit_rate=0.015\n",
            "[trial   70] last=miss | hits=2 | misses=68 | hit_rate=0.029\n",
            "[trial   75] last=miss | hits=2 | misses=73 | hit_rate=0.027\n",
            "[trial   80] last=miss | hits=2 | misses=78 | hit_rate=0.025\n",
            "[trial   85] last=miss | hits=2 | misses=83 | hit_rate=0.024\n",
            "[trial   90] last=miss | hits=2 | misses=88 | hit_rate=0.022\n",
            "[trial   95] last=miss | hits=2 | misses=93 | hit_rate=0.021\n",
            "[trial  100] last=miss | hits=2 | misses=98 | hit_rate=0.020\n",
            "[trial  105] last=miss | hits=2 | misses=103 | hit_rate=0.019\n",
            "[trial  110] last=miss | hits=2 | misses=108 | hit_rate=0.018\n",
            "[trial  115] last=miss | hits=2 | misses=113 | hit_rate=0.017\n",
            "[trial  120] last=miss | hits=2 | misses=118 | hit_rate=0.017\n",
            "[trial  125] last=miss | hits=2 | misses=123 | hit_rate=0.016\n",
            "[trial  130] last=miss | hits=2 | misses=128 | hit_rate=0.015\n",
            "[trial  135] last=813 | hits=3 | misses=132 | hit_rate=0.022\n",
            "[trial  140] last=miss | hits=3 | misses=137 | hit_rate=0.021\n",
            "[trial  145] last=miss | hits=3 | misses=142 | hit_rate=0.021\n",
            "[trial  150] last=miss | hits=3 | misses=147 | hit_rate=0.020\n",
            "[trial  155] last=miss | hits=3 | misses=152 | hit_rate=0.019\n",
            "[trial  160] last=miss | hits=3 | misses=157 | hit_rate=0.019\n",
            "[trial  165] last=miss | hits=3 | misses=162 | hit_rate=0.018\n",
            "[trial  170] last=miss | hits=4 | misses=166 | hit_rate=0.024\n",
            "[trial  175] last=miss | hits=4 | misses=171 | hit_rate=0.023\n",
            "[trial  180] last=miss | hits=4 | misses=176 | hit_rate=0.022\n",
            "[trial  185] last=miss | hits=4 | misses=181 | hit_rate=0.022\n",
            "[trial  190] last=miss | hits=5 | misses=185 | hit_rate=0.026\n",
            "[trial  195] last=miss | hits=5 | misses=190 | hit_rate=0.026\n",
            "[trial  200] last=miss | hits=5 | misses=195 | hit_rate=0.025\n",
            "\n",
            "=== GWG First-Hit Statistics with 2 SD Confidence Intervals ===\n",
            "trials=200 | hits=5 | misses=195 | miss_penalty=max_steps(2000)\n",
            "Hit rate              : 0.0250  (±2SD CI: [0.0029, 0.0471])  | SE≈0.0110\n",
            "Mean steps (ALL)      : 1966.69  (±2SD CI: [1934.21, 1999.18])  | SD=229.71\n",
            "Median steps (ALL)    : 2000.00  (±2SD boot CI: [2000.00, 2000.00])  | boot SD≈0.00\n",
            "Median steps (HITS)   : 813.00  (±2SD boot CI: [-104.20, 1730.20])  | boot SD≈458.60\n",
            "\n",
            "-- Distribution (ALL trials) --\n",
            "min / p25 / p50 / p75 / max : 2 / 2000 / 2000 / 2000 / 2000\n",
            "IQR (p75-p25)         : 0.00\n",
            "MAD (about median)    : 0.00\n",
            "p90                 : 2000.00\n",
            "p95                 : 2000.00\n",
            "p99                 : 2000.00\n",
            "\n",
            "Hit probability within budgets (ALL trials):\n",
            "  ≤    100 steps : 0.0100\n",
            "  ≤    500 steps : 0.0100\n",
            "  ≤   1000 steps : 0.0200\n",
            "  ≤   2000 steps : 1.0000\n",
            "  ≤   5000 steps : 1.0000\n",
            "\n",
            "-- Distribution (successful trials ONLY) --\n",
            "min / p25 / p50 / p75 / max : 2 / 3 / 813 / 914 / 1607\n",
            "IQR (p75-p25)         : 911.00\n",
            "MAD (about median)    : 794.00\n",
            "p90                 : 1329.80\n",
            "p95                 : 1468.40\n",
            "p99                 : 1579.28\n",
            "\n",
            "Top (step,count) bins (ALL trials, 20 most common):\n",
            "     2000 : 195\n",
            "        2 : 1\n",
            "        3 : 1\n",
            "      813 : 1\n",
            "      914 : 1\n",
            "     1607 : 1\n"
          ]
        }
      ],
      "source": [
        "run_gwg_trials(\n",
        "    model=model,\n",
        "    target10=target15,\n",
        "    d_indicator=d_indicator,\n",
        "    d_total=d_total,      # = 10 + 500 in the current script\n",
        "    device=torch.device(\"cuda\"),\n",
        "    n_trials=200,\n",
        "    max_steps=2000,\n",
        "    beta=10.0,\n",
        "    verbose_every=5\n",
        ")\n"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "torch-gpu-env",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.13.5"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
