{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import json\n",
    "import pickle\n",
    "import os\n",
    "import pandas as pd\n",
    "\n",
    "from pygsti.circuits import Circuit\n",
    "from pygsti.tools.internalgates import internal_gate_unitaries\n",
    "\n",
    "\n",
    "import sys\n",
    "sys.path.insert(0, '../../../')\n",
    "\n",
    "import ml\n",
    "from ml import encoding\n",
    "\n",
    "unitary_dict = internal_gate_unitaries()\n",
    "\n",
    "dtypes = ['train', 'validate', 'test']\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Package Circuits"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook is designed to process the raw circuits and fidelities into tensors that can be processed by our models. Note that, to aid with training, we actually train the physics-aware networks to predict $10000*[1-F(C)]$, or 10000 times the entanglement infidelity of each circuit. This notebook also partitions the dataset into training, validation, and testing subsets."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Set the experiment number"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp_num = 4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Meta information"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following cell creates any necessary directories as well as the meta information for the dataset. The meta information information contains:\n",
    "   - the number of qubits,\n",
    "   - the number of encoding channels per qubit,\n",
    "   - the maximum weight of the tracked errors,\n",
    "   - the number of hops on the connectivity graph used to generated the list of tracked errors,\n",
    "   - the minimum fidelity cutoff,\n",
    "   - the measurement encoding scheme (this is 0 for entanglement fidelity and 3 for PST),\n",
    "   - the processor's underlying geometry.\n",
    "The meta information (should) be consistent across all 10 datasets."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp_path = f'./experiment_{exp_num}/'\n",
    "sim_path = exp_path + '/simulation_results/'\n",
    "proc_path = exp_path + '/processed_inputs_and_outputs/'\n",
    "os.makedirs(proc_path, exist_ok = True)\n",
    "\n",
    "with open('./experiment_0/pspec.pkl', 'rb') as f:\n",
    "    pspec = pickle.load(f)\n",
    "\n",
    "num_qubits = 4\n",
    "num_channels = 4 + len(pspec.gate_names) - 1\n",
    "max_error_weight = 2 # Can't set this above 2\n",
    "adj_matrix = ml.newtools.ring_adj_matrix(num_qubits)\n",
    "laplace = ml.newtools.laplace_from_qubit_graph(adj_matrix)\n",
    "num_hops = 2\n",
    "cutoff = .85\n",
    "measurement_encoding = 0\n",
    "geometry = 'ring'\n",
    "gate_encoding = encoding.ring_gate_to_index\n",
    "\n",
    "meta = {'cutoff': cutoff, 'num_hops': num_hops, 'max_error_weight': max_error_weight, 'geometry': geometry, \n",
    "        'num_qubits': num_qubits, 'num_channels': num_channels, 'measurement_encoding': measurement_encoding}\n",
    "\n",
    "error_gens = ml.newtools.up_to_weight_k_error_gens_from_qubit_graph(max_error_weight, num_qubits, laplace, num_hops = num_hops)\n",
    "\n",
    "with open(exp_path+'/meta.json', 'w') as f:\n",
    "    json.dump(meta, f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load the circuits"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The next three cells loads the circuits and entanglement fidelities. The second cell removes any low-fidelity circuits, if they exist."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(sim_path + '/dataframe.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "You are using 100.0 percent of the circuits.\n"
     ]
    }
   ],
   "source": [
    "f_mask = df['D:SP'] > cutoff\n",
    "print(f'You are using {100*sum(f_mask) / len(df)} percent of the circuits.')\n",
    "\n",
    "df_good = df[f_mask]\n",
    "df_good = df_good.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "circs = df_good['Circuit']\n",
    "circs = [Circuit(c) for c in circs]\n",
    "sps = list(df_good['D:SP'])\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Process the circuits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0,200,400,600,800,1000,1200,1400,1600,1800,2000,2200,2400,2600,2800,3000,3200,3400,3600,3800,4000,4200,4400,4600,4800,"
     ]
    }
   ],
   "source": [
    "xt, y = encoding.create_input_data(circs = circs, fidelities = sps, tracked_error_gens = error_gens, measurement_encoding = measurement_encoding,\n",
    "                      pspec = pspec, geometry = geometry, num_qubits = num_qubits, num_channels = num_channels,\n",
    "                      indexmapper = gate_encoding, indexmapper_kwargs = {'pspec': pspec}, \n",
    "                      valuemapper = None, valuemapper_kwargs = {},\n",
    "                      max_depth = None, return_separate=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Package the circuits"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The next six cells: \n",
    "1. partition the dataset into training, validation, and testing sets;\n",
    "2. save the processed circuits, entanglement fidelities, and indices;\n",
    "3. and save a new dataframe that contains only the high-fidelity circuits."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_data = {}\n",
    "y_data = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "train_indices, test_indices = train_test_split(np.array(df_good.index), train_size = .75)\n",
    "train_indices, val_indices = train_test_split(train_indices, train_size = .75)\n",
    "\n",
    "indices = {'train': train_indices, 'validate': val_indices, 'test': test_indices}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert(len(np.intersect1d(train_indices, test_indices)) == 0)\n",
    "assert(len(np.intersect1d(train_indices, val_indices)) == 0)\n",
    "assert(len(np.intersect1d(val_indices, test_indices)) == 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_data['train'] = xt[train_indices, :, :]\n",
    "x_data['validate'] = xt[val_indices, :, :]\n",
    "x_data['test'] = xt[test_indices, :, :]\n",
    "\n",
    "# Multiple fidelities by 10,000 as this seems to help the optimizer (could instead play with\n",
    "# both learning rate and the weight initializations).\n",
    "y_data['train'] = 10000*(1 - y[train_indices])\n",
    "y_data['validate'] = 10000*(1 - y[val_indices])\n",
    "y_data['test'] = 10000*(1 - y[test_indices])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.savez_compressed(proc_path+'/processed_high_fidelity_circuits.npz', **x_data)\n",
    "np.savez_compressed(proc_path+'/processed_infidelities.npz', **y_data)\n",
    "np.savez_compressed(proc_path+'/indices.npz', **indices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_good['test_or_train'] = np.empty(len(df_good))\n",
    "df_good.loc[indices['train']]['test_or_train'] = 'train'\n",
    "for dt in dtypes:\n",
    "    df_good.loc[indices[dt], 'test_or_train']= dt\n",
    "\n",
    "df.to_csv(sim_path + '/high-fidelity-dataframe.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Repeat with the mirror circuits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "180"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_other = pd.read_csv(sim_path + '/mirrored_dataframe.csv')\n",
    "\n",
    "o_circs = df_other['Circuit']\n",
    "o_circs = [Circuit(c) for c in o_circs]\n",
    "o_sps = list(df_other['D:SP'])\n",
    "\n",
    "max_depth = max(df['F:Depth'])\n",
    "max_depth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0,200,400,600,"
     ]
    }
   ],
   "source": [
    "other_xt, other_y = encoding.create_input_data(circs = o_circs, fidelities = o_sps, tracked_error_gens = error_gens, measurement_encoding = measurement_encoding,\n",
    "                      pspec = pspec, geometry = geometry, num_qubits = num_qubits, num_channels = num_channels,\n",
    "                      indexmapper = gate_encoding, indexmapper_kwargs = {'pspec': pspec}, \n",
    "                      valuemapper = None, valuemapper_kwargs = {},\n",
    "                      max_depth = max_depth, return_separate=False)\n",
    "\n",
    "other_y = 10000 * (1-other_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.savez_compressed(proc_path + '/processed_mirrored_circuits.npz', circuits = other_xt)\n",
    "np.savez_compressed(proc_path + '/processed_mirrored_infidelities.npz', infidelities = other_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "QPL",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
