{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import scipy.stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "text_in = '''\n",
    "\n",
    "LightGBM:                                     mean t_int_l t_int_h    std                           values\n",
    "    Designed features:                                                                                    \n",
    "        Age group   |   0.626 \\pm 0.004   | 0.6323  0.6268  0.6379 0.0040  [0.628 0.630 0.630 0.635 0.638]\n",
    "        Gender      |   0.875 \\pm 0.004   | 0.8798  0.8751  0.8844 0.0033  [0.874 0.879 0.881 0.882 0.882]\n",
    "        Assessment  |   0.591 \\pm 0.003   | 0.5909  0.5872  0.5946 0.0027  [0.588 0.588 0.591 0.593 0.594]\n",
    "        Retail      |   0.545 \\pm 0.001   | 0.5454  0.5439  0.5469 0.0011  [0.544 0.545 0.545 0.546 0.547]\n",
    "    CPC embeddings:         \n",
    "        Age group   |   0.595 \\pm 0.004   | 0.5939  0.5900  0.5977 0.0028  [0.590 0.593 0.594 0.595 0.598]\n",
    "        Gender      |   0.848 \\pm 0.004   | 0.8572  0.8545  0.8600 0.0020  [0.855 0.856 0.857 0.858 0.860]\n",
    "        Assessment  |   0.584 \\pm 0.004   | 0.5838  0.5790  0.5886 0.0035  [0.578 0.584 0.585 0.586 0.587]\n",
    "        Retail      |   0.527 \\pm 0.001   | 0.5265  0.5252  0.5278 0.0009  [0.525 0.526 0.527 0.527 0.528]\n",
    "    MeLES embeddings:       \n",
    "        Age group   |   0.639 \\pm 0.006   | 0.6419  0.6372  0.6466 0.0034  [0.636 0.642 0.643 0.644 0.645]\n",
    "        Gender      |   0.872 \\pm 0.005   | 0.8821  0.8801  0.8840 0.0014  [0.880 0.882 0.882 0.882 0.884]\n",
    "        Assessment  |   0.604 \\pm 0.003   | 0.6041  0.5994  0.6088 0.0034  [0.598 0.604 0.605 0.606 0.607]\n",
    "        Retail      |   0.544 \\pm 0.001   | 0.5439  0.5421  0.5457 0.0013  [0.542 0.544 0.544 0.545 0.545]\n",
    "\n",
    "Scores:                       \n",
    "    Supervised learning:    \n",
    "        Age group   |   0.631 \\pm 0.010   | 0.6285  0.6172  0.6399 0.0082  [0.619 0.625 0.626 0.631 0.641]\n",
    "        Gender      |   0.871 \\pm 0.007   | 0.8741  0.8654  0.8828 0.0063  [0.865 0.872 0.873 0.879 0.881]\n",
    "        Assessment  |   0.601 \\pm 0.006   | 0.6010  0.5948  0.6072 0.0045  [0.596 0.600 0.600 0.601 0.608]\n",
    "        Retail      |   0.543 \\pm 0.002   | 0.5425  0.5403  0.5447 0.0016  [0.540 0.542 0.542 0.544 0.544]\n",
    "    CPC fine-tuning:        \n",
    "        Age group   |   0.621 \\pm 0.007   | 0.6210  0.6166  0.6254 0.0032  [0.617 0.618 0.622 0.623 0.625]\n",
    "        Gender      |   0.873 \\pm 0.007   | 0.8777  0.8726  0.8829 0.0037  [0.874 0.875 0.878 0.879 0.883]\n",
    "        Assessment  |   0.611 \\pm 0.005   | 0.6115  0.6047  0.6184 0.0049  [0.603 0.611 0.613 0.615 0.616]\n",
    "        Retail      |   0.546 \\pm 0.003   | 0.5461  0.5429  0.5492 0.0023  [0.542 0.546 0.546 0.547 0.548]\n",
    "    MeLES fine-tuning:      \n",
    "        Age group   |   0.643 \\pm 0.007   | 0.6383  0.6331  0.6435 0.0037  [0.632 0.637 0.641 0.641 0.641]\n",
    "        Gender      |   0.888 \\pm 0.002   | 0.8959  0.8910  0.9009 0.0036  [0.890 0.896 0.898 0.898 0.898]\n",
    "        Assessment  |   0.614 \\pm 0.003   | 0.6135  0.6095  0.6176 0.0029  [0.608 0.614 0.615 0.615 0.616]\n",
    "        Retail      |   0.549 \\pm 0.001   | 0.5490  0.5479  0.5500 0.0008  [0.548 0.549 0.549 0.550 0.550]\n",
    "'''\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_scores(l):\n",
    "    if l[:8] != ' ' * 8:\n",
    "        return (None, None)\n",
    "    \n",
    "    dataset, mean, scores = [x.strip() for x in l.split('|')]\n",
    "    scores = scores[-30:-1]\n",
    "    scores = np.array([float(x) for x in scores.split()])\n",
    "    return dataset, scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def t_interval(x, p=0.95, eps=1e-9):\n",
    "    n = len(x)\n",
    "    s = x.std(ddof=1)\n",
    "\n",
    "    return scipy.stats.t.interval(p, n - 1, loc=x.mean(), scale=(s + eps) / (n ** 0.5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_deltas(dataset, scores_x, baselines):\n",
    "    if not dataset:\n",
    "        return None, None\n",
    "    \"\"\"\n",
    "    scores_x - row for estimate\n",
    "    scores_y - baseline for this dataset\n",
    "    \n",
    "    \"\"\"\n",
    "    scores_y = baselines[dataset]\n",
    "    \n",
    "    # all possible pairs\n",
    "    pairs = (scores_x.reshape(-1, 1) - scores_y.reshape(1, -1)).flatten()\n",
    "    \n",
    "    # t-stat intervals with 95% configence\n",
    "    t_l, t_h = t_interval(pairs)\n",
    "    \n",
    "    # mean uplift for row vs. baseline\n",
    "    x = pairs.mean()\n",
    "    # delta for 95% confidence interval for `x`\n",
    "    delta = t_h - x\n",
    "    \n",
    "    # return x and delta in %%\n",
    "    return x / scores_y.mean() * 100, delta / scores_y.mean() * 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_deltas(o):\n",
    "    dataset, scores, (uplift_mean, uplift_delta) = o\n",
    "    if not dataset:\n",
    "        return ''\n",
    "    \n",
    "    t_l, t_h = t_interval(scores)\n",
    "    x = scores.mean()\n",
    "    d = t_h - x\n",
    "    \n",
    "    return ' | '.join([\n",
    "        '',\n",
    "        f'{x:.3f} \\pm {d:.3f}',\n",
    "        f'{uplift_mean:4.1f}% \\pm {uplift_delta:4.1f}%'\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "LightGBM:                                     mean t_int_l t_int_h    std                           values\n",
      "    Designed features:                                                                                    \n",
      "        Age group   |   0.626 \\pm 0.004   | 0.6323  0.6268  0.6379 0.0040  [0.628 0.630 0.630 0.635 0.638] | 0.632 \\pm 0.005 |  0.0% \\pm  0.3%\n",
      "        Gender      |   0.875 \\pm 0.004   | 0.8798  0.8751  0.8844 0.0033  [0.874 0.879 0.881 0.882 0.882] | 0.880 \\pm 0.004 |  0.0% \\pm  0.2%\n",
      "        Assessment  |   0.591 \\pm 0.003   | 0.5909  0.5872  0.5946 0.0027  [0.588 0.588 0.591 0.593 0.594] | 0.591 \\pm 0.003 |  0.0% \\pm  0.3%\n",
      "        Retail      |   0.545 \\pm 0.001   | 0.5454  0.5439  0.5469 0.0011  [0.544 0.545 0.545 0.546 0.547] | 0.545 \\pm 0.001 |  0.0% \\pm  0.1%\n",
      "    CPC embeddings:         \n",
      "        Age group   |   0.595 \\pm 0.004   | 0.5939  0.5900  0.5977 0.0028  [0.590 0.593 0.594 0.595 0.598] | 0.594 \\pm 0.004 | -6.0% \\pm  0.3%\n",
      "        Gender      |   0.848 \\pm 0.004   | 0.8572  0.8545  0.8600 0.0020  [0.855 0.856 0.857 0.858 0.860] | 0.857 \\pm 0.002 | -2.5% \\pm  0.2%\n",
      "        Assessment  |   0.584 \\pm 0.004   | 0.5838  0.5790  0.5886 0.0035  [0.578 0.584 0.585 0.586 0.587] | 0.584 \\pm 0.004 | -1.2% \\pm  0.3%\n",
      "        Retail      |   0.527 \\pm 0.001   | 0.5265  0.5252  0.5278 0.0009  [0.525 0.526 0.527 0.527 0.528] | 0.527 \\pm 0.001 | -3.4% \\pm  0.1%\n",
      "    MeLES embeddings:       \n",
      "        Age group   |   0.639 \\pm 0.006   | 0.6419  0.6372  0.6466 0.0034  [0.636 0.642 0.643 0.644 0.645] | 0.642 \\pm 0.004 |  1.6% \\pm  0.3%\n",
      "        Gender      |   0.872 \\pm 0.005   | 0.8821  0.8801  0.8840 0.0014  [0.880 0.882 0.882 0.882 0.884] | 0.882 \\pm 0.002 |  0.3% \\pm  0.2%\n",
      "        Assessment  |   0.604 \\pm 0.003   | 0.6041  0.5994  0.6088 0.0034  [0.598 0.604 0.605 0.606 0.607] | 0.604 \\pm 0.004 |  2.2% \\pm  0.3%\n",
      "        Retail      |   0.544 \\pm 0.001   | 0.5439  0.5421  0.5457 0.0013  [0.542 0.544 0.544 0.545 0.545] | 0.544 \\pm 0.002 | -0.3% \\pm  0.1%\n",
      "\n",
      "Scores:                       \n",
      "    Supervised learning:    \n",
      "        Age group   |   0.631 \\pm 0.010   | 0.6285  0.6172  0.6399 0.0082  [0.619 0.625 0.626 0.631 0.641] | 0.628 \\pm 0.010 | -0.6% \\pm  0.5%\n",
      "        Gender      |   0.871 \\pm 0.007   | 0.8741  0.8654  0.8828 0.0063  [0.865 0.872 0.873 0.879 0.881] | 0.874 \\pm 0.008 | -0.6% \\pm  0.3%\n",
      "        Assessment  |   0.601 \\pm 0.006   | 0.6010  0.5948  0.6072 0.0045  [0.596 0.600 0.600 0.601 0.608] | 0.601 \\pm 0.005 |  1.7% \\pm  0.3%\n",
      "        Retail      |   0.543 \\pm 0.002   | 0.5425  0.5403  0.5447 0.0016  [0.540 0.542 0.542 0.544 0.544] | 0.542 \\pm 0.002 | -0.6% \\pm  0.1%\n",
      "    CPC fine-tuning:        \n",
      "        Age group   |   0.621 \\pm 0.007   | 0.6210  0.6166  0.6254 0.0032  [0.617 0.618 0.622 0.623 0.625] | 0.621 \\pm 0.004 | -1.8% \\pm  0.3%\n",
      "        Gender      |   0.873 \\pm 0.007   | 0.8777  0.8726  0.8829 0.0037  [0.874 0.875 0.878 0.879 0.883] | 0.878 \\pm 0.004 | -0.2% \\pm  0.2%\n",
      "        Assessment  |   0.611 \\pm 0.005   | 0.6115  0.6047  0.6184 0.0049  [0.603 0.611 0.613 0.615 0.616] | 0.612 \\pm 0.006 |  3.5% \\pm  0.4%\n",
      "        Retail      |   0.546 \\pm 0.003   | 0.5461  0.5429  0.5492 0.0023  [0.542 0.546 0.546 0.547 0.548] | 0.546 \\pm 0.003 |  0.1% \\pm  0.2%\n",
      "    MeLES fine-tuning:      \n",
      "        Age group   |   0.643 \\pm 0.007   | 0.6383  0.6331  0.6435 0.0037  [0.632 0.637 0.641 0.641 0.641] | 0.638 \\pm 0.005 |  1.0% \\pm  0.3%\n",
      "        Gender      |   0.888 \\pm 0.002   | 0.8959  0.8910  0.9009 0.0036  [0.890 0.896 0.898 0.898 0.898] | 0.896 \\pm 0.004 |  1.9% \\pm  0.2%\n",
      "        Assessment  |   0.614 \\pm 0.003   | 0.6135  0.6095  0.6176 0.0029  [0.608 0.614 0.615 0.615 0.616] | 0.614 \\pm 0.004 |  3.9% \\pm  0.3%\n",
      "        Retail      |   0.549 \\pm 0.001   | 0.5490  0.5479  0.5500 0.0008  [0.548 0.549 0.549 0.550 0.550] | 0.549 \\pm 0.001 |  0.7% \\pm  0.1%\n",
      "\n"
     ]
    }
   ],
   "source": [
    "text = text_in.split('\\n')\n",
    "text = [(l, get_scores(l)) for l in text]\n",
    "baselines = {dataset: scores for l, (dataset, scores) in text[:8] if dataset}\n",
    "text = [(l, (dataset, scores, get_deltas(dataset, scores, baselines))) for l, (dataset, scores) in text]\n",
    "text = [l + print_deltas(o) for l, o in text]\n",
    "text_out = '\\n'.join(text)\n",
    "\n",
    "print(text_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Age group': array([0.628, 0.63 , 0.63 , 0.635, 0.638]),\n",
       " 'Gender': array([0.874, 0.879, 0.881, 0.882, 0.882]),\n",
       " 'Assessment': array([0.588, 0.588, 0.591, 0.593, 0.594]),\n",
       " 'Retail': array([0.544, 0.545, 0.545, 0.546, 0.547])}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "baselines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
