{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../')\n",
    "import datasets\n",
    "import log_reg\n",
    "from dataproc import extract_wvs\n",
    "from dataproc import get_discharge_summaries\n",
    "from dataproc import concat_and_split\n",
    "from dataproc import build_vocab\n",
    "from dataproc import vocab_index_descriptions\n",
    "from dataproc import word_embeddings\n",
    "from constants import MIMIC_3_DIR, DATA_DIR\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "from collections import Counter, defaultdict\n",
    "import csv\n",
    "import math\n",
    "import operator"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's do some data processing in a much better way, with a notebook.\n",
    "\n",
    "First, let's define some stuff."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Y = 'full' #use all available labels in the dataset for prediction\n",
    "notes_file = '%s/NOTEEVENTS.csv' % MIMIC_3_DIR # raw note events downloaded from MIMIC-III\n",
    "vocab_size = 'full' #don't limit the vocab size to a specific number\n",
    "vocab_min = 3 #discard tokens appearing in fewer than this many documents"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data processing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Combine diagnosis and procedure codes and reformat them"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The codes in MIMIC-III are given in separate files for procedures and diagnoses, and the codes are given without periods, which might lead to collisions if we naively combine them. So we have to add the periods back in the right place."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dfproc = pd.read_csv('%s/PROCEDURES_ICD.csv' % MIMIC_3_DIR)\n",
    "dfdiag = pd.read_csv('%s/DIAGNOSES_ICD.csv' % MIMIC_3_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dfdiag['absolute_code'] = dfdiag.apply(lambda row: str(datasets.reformat(str(row[4]), True)), axis=1)\n",
    "dfproc['absolute_code'] = dfproc.apply(lambda row: str(datasets.reformat(str(row[4]), False)), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dfcodes = pd.concat([dfdiag, dfproc])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dfcodes.to_csv('%s/ALL_CODES.csv' % MIMIC_3_DIR, index=False,\n",
    "               columns=['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'SEQ_NUM', 'absolute_code'],\n",
    "               header=['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'SEQ_NUM', 'ICD9_CODE'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## How many codes are there?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8994"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#In the full dataset (not just discharge summaries)\n",
    "df = pd.read_csv('%s/ALL_CODES.csv' % MIMIC_3_DIR, dtype={\"ICD9_CODE\": str})\n",
    "len(df['ICD9_CODE'].unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Tokenize and preprocess raw text"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Preprocessing time!\n",
    "\n",
    "This will:\n",
    "- Select only discharge summaries and their addenda\n",
    "- remove punctuation and numeric-only tokens, removing 500 but keeping 250mg\n",
    "- lowercase all tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "0it [00:00, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "processing notes file\n",
      "writing to /nethome/jmullenbach3/replication/cnn-medical-text/mimicdata/mimic3//disch_full.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2083180it [02:04, 16731.75it/s]\n"
     ]
    }
   ],
   "source": [
    "#This reads all notes, selects only the discharge summaries, and tokenizes them, returning the output filename\n",
    "disch_full_file = get_discharge_summaries.write_discharge_summaries(out_file=\"%s/disch_full.csv\" % MIMIC_3_DIR)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's read this in and see what kind of data we're working with"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv('%s/disch_full.csv' % MIMIC_3_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "52726"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#How many admissions?\n",
    "len(df['HADM_ID'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Tokens and types\n",
    "types = set()\n",
    "num_tok = 0\n",
    "for row in df.itertuples():\n",
    "    for w in row[4].split():\n",
    "        types.add(w)\n",
    "        num_tok += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num types 150854\n",
      "Num tokens 79801387\n"
     ]
    }
   ],
   "source": [
    "print(\"Num types\", len(types))\n",
    "print(\"Num tokens\", str(num_tok))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Let's sort by SUBJECT_ID and HADM_ID to make a correspondence with the MIMIC-3 label file\n",
    "df = df.sort_values(['SUBJECT_ID', 'HADM_ID'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/nethome/jmullenbach3/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2698: DtypeWarning: Columns (4) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": [
    "#Sort the label file by the same\n",
    "dfl = pd.read_csv('%s/ALL_CODES.csv' % MIMIC_3_DIR)\n",
    "dfl = dfl.sort_values(['SUBJECT_ID', 'HADM_ID'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(52726, 58976)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df['HADM_ID'].unique()), len(dfl['HADM_ID'].unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Consolidate labels with set of discharge summaries"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Looks like there were some HADM_ID's that didn't have discharge summaries, so they weren't included with our notes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Let's filter out these HADM_ID's\n",
    "hadm_ids = set(df['HADM_ID'])\n",
    "with open('%s/ALL_CODES.csv' % MIMIC_3_DIR, 'r') as lf:\n",
    "    with open('%s/ALL_CODES_filtered.csv' % MIMIC_3_DIR, 'w') as of:\n",
    "        w = csv.writer(of)\n",
    "        w.writerow(['SUBJECT_ID', 'HADM_ID', 'ICD9_CODE', 'ADMITTIME', 'DISCHTIME'])\n",
    "        r = csv.reader(lf)\n",
    "        #header\n",
    "        next(r)\n",
    "        for i,row in enumerate(r):\n",
    "            hadm_id = int(row[2])\n",
    "            #print(hadm_id)\n",
    "            #break\n",
    "            if hadm_id in hadm_ids:\n",
    "                w.writerow(row[1:3] + [row[-1], '', ''])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/nethome/jmullenbach3/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2698: DtypeWarning: Columns (2) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": [
    "dfl = pd.read_csv('%s/ALL_CODES_filtered.csv' % MIMIC_3_DIR, index_col=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "52726"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(dfl['HADM_ID'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#we still need to sort it by HADM_ID\n",
    "dfl = dfl.sort_values(['SUBJECT_ID', 'HADM_ID'])\n",
    "dfl.to_csv('%s/ALL_CODES_filtered.csv' % MIMIC_3_DIR, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Append labels to notes in a single file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Now let's append each instance with all of its codes\n",
    "#this is pretty non-trivial so let's use this script I wrote, which requires the notes to be written to file\n",
    "sorted_file = '%s/disch_full.csv' % MIMIC_3_DIR\n",
    "df.to_csv(sorted_file, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CONCATENATING\n",
      "0 done\n",
      "10000 done\n",
      "20000 done\n",
      "30000 done\n",
      "40000 done\n",
      "50000 done\n"
     ]
    }
   ],
   "source": [
    "labeled = concat_and_split.concat_data('%s/ALL_CODES_filtered.csv' % MIMIC_3_DIR, sorted_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/nethome/jmullenbach3/replication/cnn-medical-text/mimicdata/mimic3//notes_labeled.csv\n"
     ]
    }
   ],
   "source": [
    "#name of the file we just made\n",
    "print(labeled)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's sanity check the combined data we just made. Do we have all hadm id's accounted for, and the same vocab stats?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dfnl = pd.read_csv(labeled)\n",
    "#Tokens and types\n",
    "types = set()\n",
    "num_tok = 0\n",
    "for row in dfnl.itertuples():\n",
    "    for w in row[3].split():\n",
    "        types.add(w)\n",
    "        num_tok += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num types 150854 num tokens 79801387\n"
     ]
    }
   ],
   "source": [
    "print(\"num types\", len(types), \"num tokens\", num_tok)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "52726"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(dfnl['HADM_ID'].unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create train/dev/test splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SPLITTING\n",
      "0 read\n",
      "10000 read\n",
      "20000 read\n",
      "30000 read\n",
      "40000 read\n",
      "50000 read\n"
     ]
    }
   ],
   "source": [
    "fname = '%s/notes_labeled.csv' % MIMIC_3_DIR\n",
    "base_name = \"%s/disch\" % MIMIC_3_DIR #for output\n",
    "tr, dv, te = concat_and_split.split_data(fname, base_name=base_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build vocabulary from training data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "reading in data...\n",
      "removing rare terms\n",
      "51917 terms qualify out of 140795 total\n",
      "writing output\n"
     ]
    }
   ],
   "source": [
    "vocab_min = 3\n",
    "vname = '%s/vocab.csv' % MIMIC_3_DIR\n",
    "build_vocab.build_vocab(vocab_min, tr, vname)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Sort each data split by length for batching"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for splt in ['train', 'dev', 'test']:\n",
    "    filename = '%s/disch_%s_split.csv' % (MIMIC_3_DIR, splt)\n",
    "    df = pd.read_csv(filename)\n",
    "    df['length'] = df.apply(lambda row: len(str(row['TEXT']).split()), axis=1)\n",
    "    df = df.sort_values(['length'])\n",
    "    df.to_csv('%s/%s_full.csv' % (MIMIC_3_DIR, splt), index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pre-train word embeddings"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's train word embeddings on all words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building word2vec vocab on /nethome/jmullenbach3/replication/cnn-medical-text/mimicdata/mimic3//disch_full.csv...\n",
      "training...\n",
      "writing embeddings to /nethome/jmullenbach3/replication/cnn-medical-text/mimicdata/mimic3//processed_full.w2v\n"
     ]
    }
   ],
   "source": [
    "w2v_file = word_embeddings.word_embeddings('full', '%s/disch_full.csv' % MIMIC_3_DIR, 100, 0, 5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Write pre-trained word embeddings with new vocab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 51917/51917 [02:58<00:00, 290.28it/s]\n"
     ]
    }
   ],
   "source": [
    "extract_wvs.gensim_to_embeddings('%s/processed_full.w2v' % MIMIC_3_DIR, '%s/vocab.csv' % MIMIC_3_DIR, Y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pre-process code descriptions using the vocab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 22267/22267 [00:00<00:00, 62940.71it/s]\n"
     ]
    }
   ],
   "source": [
    "vocab_index_descriptions.vocab_index_descriptions('%s/vocab.csv' % MIMIC_3_DIR,\n",
    "                                                  '%s/description_vectors.vocab' % MIMIC_3_DIR)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Filter each split to the top 50 diagnosis/procedure codes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Y = 50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#first calculate the top k\n",
    "counts = Counter()\n",
    "dfnl = pd.read_csv('%s/notes_labeled.csv' % MIMIC_3_DIR)\n",
    "for row in dfnl.itertuples():\n",
    "    for label in str(row[4]).split(';'):\n",
    "        counts[label] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "codes_50 = sorted(counts.items(), key=operator.itemgetter(1), reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "codes_50 = [code[0] for code in codes_50[:Y]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['401.9',\n",
       " '38.93',\n",
       " '428.0',\n",
       " '427.31',\n",
       " '414.01',\n",
       " '96.04',\n",
       " '96.6',\n",
       " '584.9',\n",
       " '250.00',\n",
       " '96.71',\n",
       " '272.4',\n",
       " '518.81',\n",
       " '99.04',\n",
       " '39.61',\n",
       " '599.0',\n",
       " '530.81',\n",
       " '96.72',\n",
       " '272.0',\n",
       " '285.9',\n",
       " '88.56',\n",
       " '244.9',\n",
       " '486',\n",
       " '38.91',\n",
       " '285.1',\n",
       " '36.15',\n",
       " '276.2',\n",
       " '496',\n",
       " '99.15',\n",
       " '995.92',\n",
       " 'V58.61',\n",
       " '507.0',\n",
       " '038.9',\n",
       " '88.72',\n",
       " '585.9',\n",
       " '403.90',\n",
       " '311',\n",
       " '305.1',\n",
       " '37.22',\n",
       " '412',\n",
       " '33.24',\n",
       " '39.95',\n",
       " '287.5',\n",
       " '410.71',\n",
       " '276.1',\n",
       " 'V45.81',\n",
       " '424.0',\n",
       " '45.13',\n",
       " 'V15.82',\n",
       " '511.9',\n",
       " '37.23']"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "codes_50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('%s/TOP_%s_CODES.csv' % (MIMIC_3_DIR, str(Y)), 'w') as of:\n",
    "    w = csv.writer(of)\n",
    "    for code in codes_50:\n",
    "        w.writerow([code])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train\n",
      "dev\n",
      "test\n"
     ]
    }
   ],
   "source": [
    "for splt in ['train', 'dev', 'test']:\n",
    "    print(splt)\n",
    "    hadm_ids = set()\n",
    "    with open('%s/%s_50_hadm_ids.csv' % (MIMIC_3_DIR, splt), 'r') as f:\n",
    "        for line in f:\n",
    "            hadm_ids.add(line.rstrip())\n",
    "    with open('%s/notes_labeled.csv' % MIMIC_3_DIR, 'r') as f:\n",
    "        with open('%s/%s_%s.csv' % (MIMIC_3_DIR, splt, str(Y)), 'w') as of:\n",
    "            r = csv.reader(f)\n",
    "            w = csv.writer(of)\n",
    "            #header\n",
    "            w.writerow(next(r))\n",
    "            i = 0\n",
    "            for row in r:\n",
    "                hadm_id = row[1]\n",
    "                if hadm_id not in hadm_ids:\n",
    "                    continue\n",
    "                codes = set(str(row[3]).split(';'))\n",
    "                filtered_codes = codes.intersection(set(codes_50))\n",
    "                if len(filtered_codes) > 0:\n",
    "                    w.writerow(row[:3] + [';'.join(filtered_codes)])\n",
    "                    i += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for splt in ['train', 'dev', 'test']:\n",
    "    filename = '%s/%s_%s.csv' % (MIMIC_3_DIR, splt, str(Y))\n",
    "    df = pd.read_csv(filename)\n",
    "    df['length'] = df.apply(lambda row: len(str(row['TEXT']).split()), axis=1)\n",
    "    df = df.sort_values(['length'])\n",
    "    df.to_csv('%s/%s_%s.csv' % (MIMIC_3_DIR, splt, str(Y)), index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
