{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List, Tuple, Dict\n",
    "\n",
    "import random\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import torch\n",
    "from os.path import join\n",
    "import datamol as dm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "BASE_PATH = \"../neurips2023/large-dataset/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "df_L1000_VCAP.shape (15220, 984)\n",
      "df_L1000_MCF7.shape (11622, 984)\n",
      "df_PCBA.shape (1563664, 1332)\n",
      "df_PCQM4M.shape (3810323, 31)\n"
     ]
    }
   ],
   "source": [
    "df_L1000_VCAP = pd.read_csv(join(BASE_PATH, \"LINCS_L1000_VCAP_0-4.csv.gz\"))\n",
    "print(\"df_L1000_VCAP.shape\", df_L1000_VCAP.shape)\n",
    "df_L1000_MCF7 = pd.read_csv(join(BASE_PATH, \"LINCS_L1000_MCF7_0-4.csv.gz\"))\n",
    "print(\"df_L1000_MCF7.shape\", df_L1000_MCF7.shape)\n",
    "df_PCBA = pd.read_parquet(join(BASE_PATH, \"PCBA_1328_1564k.parquet\"))\n",
    "print(\"df_PCBA.shape\", df_PCBA.shape)\n",
    "df_PCQM4M = pd.read_parquet(join(BASE_PATH, \"PCQM4M_G25_N4.parquet\"))\n",
    "print(\"df_PCQM4M.shape\", df_PCQM4M.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def smiles_to_unique_ids(smiles: List):\n",
    "    return dm.parallelized_with_batches(loop_smiles_to_unique_ids, smiles, batch_size=100, n_jobs=32, progress=True)\n",
    "\n",
    "def loop_smiles_to_unique_ids(smiles: List):\n",
    "    unique_ids = []\n",
    "    for s in smiles:\n",
    "        if not isinstance(s, str):\n",
    "            unique_ids.append(None)\n",
    "            continue\n",
    "        mol = dm.to_mol(s)\n",
    "        if mol is None:\n",
    "            unique_ids.append(None)\n",
    "        else:\n",
    "            unique_ids.append(dm.unique_id(mol))\n",
    "    return unique_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a1bdefc329d14d288510253cc36867a8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/38103 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[16:03:00] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:10] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:37] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:37] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:37] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:44] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:44] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:44] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:44] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:44] WARNING: not removing hydrogen atom without neighbors\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7087b7c562ca46ce837695034e38cb57",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/152 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted'[16:03:49]  for input: 'restricted'\n",
      "SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f651dcb510004bb7afef0b02fb1e3b20",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/116 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n",
      "[16:03:49] SMILES Parse Error: syntax error while parsing: restricted\n",
      "[16:03:49] SMILES Parse Error: Failed parsing SMILES 'restricted' for input: 'restricted'\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1ab2cca58d1f4a36ac5994b897cfe940",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/15636 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[16:03:51] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:52] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:53] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:55] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:59] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:59] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:03:59] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:02] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:02] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:07] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:08] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:08] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:08] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:08] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:09] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:12] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:13] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:13] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:13] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:13] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:13] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:13] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:15] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:15] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:15] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:15] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:15] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:15] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:15] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:15] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:15] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:15] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:16] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:20] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:20] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:22] WARNING: not removing hydrogen atom without neighbors\n",
      "[16:04:22] WARNING: not removing hydrogen atom without neighbors\n"
     ]
    }
   ],
   "source": [
    "unique_ids_QM = smiles_to_unique_ids(df_PCQM4M[\"ordered_smiles\"])\n",
    "unique_ids_L1000_VCAP = smiles_to_unique_ids(df_L1000_VCAP[\"SMILES\"])\n",
    "unique_ids_L1000_MCF7 = smiles_to_unique_ids(df_L1000_MCF7[\"SMILES\"])\n",
    "unique_ids_PCBA = smiles_to_unique_ids(df_PCBA[\"SMILES\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "L1000_VCAP 726\n",
      "L1000_MCF7 1023\n",
      "PCBA 56512\n"
     ]
    }
   ],
   "source": [
    "# Check the number of unique ids that intersect between unique_ids_QM and the other columns\n",
    "intersection_VCAP = set(unique_ids_QM) & set(unique_ids_L1000_VCAP)\n",
    "print(\"L1000_VCAP\", len(intersection_VCAP))\n",
    "\n",
    "intersection_MCF7 = set(unique_ids_QM) & set(unique_ids_L1000_MCF7)\n",
    "print(\"L1000_MCF7\", len(intersection_MCF7))\n",
    "\n",
    "intersection_PCBA = set(unique_ids_QM) & set(unique_ids_PCBA)\n",
    "print(\"PCBA\", len(intersection_PCBA))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "QM_in_VCAP 1443 VCAP_in_QM 748\n",
      "QM_in_MCF7 2373 MCF7_in_QM 1065\n",
      "QM_in_PCBA 91174 PCBA_in_QM 56512\n"
     ]
    }
   ],
   "source": [
    "def find_indices(list_1, list_2):\n",
    "    intersection = set(list_1) & set(list_2)\n",
    "    intersection = {elem for elem in intersection if elem is not None}\n",
    "    is_2_in_1 = np.isin(list_2, list(intersection))\n",
    "    is_1_in_2 = np.isin(list_1, list(intersection))\n",
    "    return is_1_in_2, is_2_in_1\n",
    "\n",
    "QM_in_VCAP, VCAP_in_QM = find_indices(unique_ids_QM, unique_ids_L1000_VCAP)\n",
    "print(\"QM_in_VCAP\", sum(QM_in_VCAP), \"VCAP_in_QM\", sum(VCAP_in_QM))\n",
    "\n",
    "QM_in_MCF7, MCF7_in_QM = find_indices(unique_ids_QM, unique_ids_L1000_MCF7)\n",
    "print(\"QM_in_MCF7\", sum(QM_in_MCF7), \"MCF7_in_QM\", sum(MCF7_in_QM))\n",
    "\n",
    "QM_in_PCBA, PCBA_in_QM = find_indices(unique_ids_QM, unique_ids_PCBA)\n",
    "print(\"QM_in_PCBA\", sum(QM_in_PCBA), \"PCBA_in_QM\", sum(PCBA_in_QM))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_seen_VCAP = np.where(VCAP_in_QM)[0].tolist()\n",
    "test_seen_MCF7 = np.where(MCF7_in_QM)[0].tolist()\n",
    "test_seen_PCBA = np.where(PCBA_in_QM)[0].tolist()\n",
    "\n",
    "train_QM_seen = np.where(QM_in_VCAP | QM_in_MCF7 | QM_in_PCBA)[0].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_random_splits(num_elem, val_ratio, test_ratio, seed=42, ignore_idx=None):\n",
    "    \"\"\"Make random splits of the data into train, validation, and test sets.\n",
    "\n",
    "    Args:\n",
    "        num_elem (int): Number of elements in the dataset.\n",
    "        val_ratio (float): Ratio of the dataset to use for validation.\n",
    "        test_ratio (float): Ratio of the dataset to use for testing.\n",
    "        seed (int): Random seed.\n",
    "        ignore_idx (list): List of indices to ignore.\n",
    "\n",
    "    Returns:\n",
    "        train_idx (list): List of indices for the training set.\n",
    "        val_idx (list): List of indices for the validation set.\n",
    "        test_idx (list): List of indices for the test set.\n",
    "    \"\"\"\n",
    "    # Create a list of indices\n",
    "    idx = list(range(num_elem))\n",
    "    # Remove the indices to ignore\n",
    "    if ignore_idx is not None:\n",
    "        idx = list(set(idx) - set(ignore_idx))\n",
    "        num_elem = len(idx)\n",
    "    # Shuffle the list of indices\n",
    "    random.seed(seed)\n",
    "    random.shuffle(idx)\n",
    "    # Compute the number of elements in each set\n",
    "    num_val = int(num_elem * val_ratio)\n",
    "    num_test = int(num_elem * test_ratio)\n",
    "    num_train = num_elem - num_val - num_test\n",
    "    # Split the list of indices into three sets\n",
    "    train_idx = idx[:num_train]\n",
    "    val_idx = idx[num_train:(num_train + num_val)]\n",
    "    test_idx = idx[(num_train + num_val):]\n",
    "    # Return the three lists of indices\n",
    "    return train_idx, val_idx, test_idx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_VCAP, val_VCAP, test_VCAP = make_random_splits(len(df_L1000_VCAP), 0.04, 0.04, seed=42, ignore_idx=test_seen_VCAP)\n",
    "train_MCF7, val_MCF7, test_MCF7 = make_random_splits(len(df_L1000_MCF7), 0.04, 0.04, seed=42, ignore_idx=test_seen_MCF7)\n",
    "train_PCBA, val_PCBA, test_PCBA = make_random_splits(len(df_PCBA), 0.04, 0.04, seed=42, ignore_idx=test_seen_PCBA)\n",
    "train_QM, val_QM, test_QM = make_random_splits(len(df_PCQM4M), 0.04, 0.04, seed=42, ignore_idx=train_QM_seen)\n",
    "train_QM = np.concatenate((train_QM, train_QM_seen))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_random_splits_file(df, out_file, train_idx, val_idx, test_idx, test_seen_idx=None):\n",
    "    # Save the splits\n",
    "    if test_seen_idx is None:\n",
    "        splits_dict = {\"train\": train_idx, \"val\": val_idx, \"test\": test_idx}\n",
    "    else:\n",
    "        splits_dict = {\"train\": train_idx, \"val\": val_idx, \"test\": test_idx, \"test_seen\": test_seen_idx}\n",
    "\n",
    "    # Check the splits validity\n",
    "    \n",
    "    assert len(set(train_idx).intersection(set(val_idx))) == 0\n",
    "    assert len(set(train_idx).intersection(set(test_idx))) == 0\n",
    "    assert len(set(val_idx).intersection(set(test_idx))) == 0\n",
    "    assert len(train_idx) > 0\n",
    "    assert len(val_idx) > 0\n",
    "    assert len(test_idx) > 0\n",
    "\n",
    "    if test_seen_idx is None:\n",
    "        assert len(df) == len(train_idx) + len(val_idx) + len(test_idx), f\"{len(df)} != {len(train_idx)} + {len(val_idx)} + {len(test_idx)}\"\n",
    "        print(out_file, \"train\", len(train_idx), \"val\", len(val_idx), \"test\", len(test_idx))\n",
    "    else:\n",
    "        assert len(test_seen_idx) > 0\n",
    "        assert len(set(train_idx).intersection(set(test_seen_idx))) == 0\n",
    "        assert len(set(val_idx).intersection(set(test_seen_idx))) == 0\n",
    "        assert len(set(test_idx).intersection(set(test_seen_idx))) == 0\n",
    "        assert len(df) == len(train_idx) + len(val_idx) + len(test_idx) + len(test_seen_idx), f\"{len(df)} != {len(train_idx)} + {len(val_idx)} + {len(test_idx)} + {len(test_seen_idx)}\"\n",
    "        print(out_file, \"train\", len(train_idx), \"val\", len(val_idx), \"test\", len(test_idx), \"test_seen\", len(test_seen_idx))\n",
    "\n",
    "    torch.save(splits_dict, out_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "l1000_vcap_random_splits.pt train 13316 val 578 test 578 test_seen 748\n",
      "l1000_mcf7_random_splits.pt train 9713 val 422 test 422 test_seen 1065\n",
      "pcba_1328_random_splits.pt train 1386580 val 60286 test 60286 test_seen 56512\n",
      "pcqm4m_g25_n4_random_splits.pt train 3512805 val 148759 test 148759\n"
     ]
    }
   ],
   "source": [
    "make_random_splits_file(df_L1000_VCAP, \"l1000_vcap_random_splits.pt\", train_VCAP, val_VCAP, test_VCAP, test_seen_VCAP)\n",
    "make_random_splits_file(df_L1000_MCF7, \"l1000_mcf7_random_splits.pt\", train_MCF7, val_MCF7, test_MCF7, test_seen_MCF7)\n",
    "make_random_splits_file(df_PCBA, \"pcba_1328_random_splits.pt\", train_PCBA, val_PCBA, test_PCBA, test_seen_PCBA)\n",
    "make_random_splits_file(df_PCQM4M, \"pcqm4m_g25_n4_random_splits.pt\", train_QM, val_QM, test_QM)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "graphium",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
