{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/22 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "30780445 3421367 (21853, 21853) (24919, 24919) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  5%|▍         | 1/22 [02:50<59:50, 170.96s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "33739126 3811010 (23346, 23346) (24318, 24318) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  9%|▉         | 2/22 [05:43<57:15, 171.76s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "26440978 3105576 (19437, 19437) (19787, 19787) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 14%|█▎        | 3/22 [08:23<52:39, 166.28s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "21796732 2669177 (18659, 18659) (19104, 19104) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 18%|█▊        | 4/22 [10:21<44:16, 147.57s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "21837232 2653013 (17463, 17463) (18088, 18088) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 23%|██▎       | 5/22 [12:17<38:30, 135.93s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "20944627 2535619 (16679, 16679) (17090, 17090) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 27%|██▋       | 6/22 [14:03<33:35, 125.97s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "18282125 2163008 (15147, 15147) (15912, 15912) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 32%|███▏      | 7/22 [15:22<27:36, 110.40s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "17389197 2142174 (14087, 14087) (14628, 14628) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 36%|███▋      | 8/22 [16:24<22:09, 94.94s/it] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11444199 1471354 (10907, 10907) (14111, 14111) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 41%|████      | 9/22 [17:11<17:20, 80.06s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "16221920 1948772 (12757, 12757) (13547, 13547) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 45%|████▌     | 10/22 [18:08<14:35, 72.98s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "16886863 2010265 (13016, 13016) (13485, 13485) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 50%|█████     | 11/22 [19:08<12:38, 68.99s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15097417 1891815 (13006, 13006) (13379, 13379) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 55%|█████▍    | 12/22 [20:03<10:47, 64.76s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9612992 1298672 (9519, 9519) (9609, 9609) 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 59%|█████▉    | 13/22 [20:38<08:21, 55.76s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9228479 1219710 (8722, 8722) (8829, 8829) 2.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 64%|██████▎   | 14/22 [21:10<06:28, 48.60s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8029525 1087449 (7754, 7754) (8253, 8253) 2.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 68%|██████▊   | 15/22 [21:38<04:56, 42.37s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7700840 968514 (7468, 7468) (9024, 9024) 2.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 73%|███████▎  | 16/22 [22:04<03:44, 37.48s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7760881 922161 (7577, 7577) (8120, 8120) 2.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 77%|███████▋  | 17/22 [22:34<02:55, 35.03s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7768547 1055924 (7420, 7420) (7801, 7801) 2.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 18/22 [23:00<02:09, 32.41s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5477190 644012 (5517, 5517) (5904, 5904) 3.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 86%|████████▋ | 19/22 [23:22<01:28, 29.41s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6109415 826787 (5939, 5939) (6292, 6292) 2.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▌| 21/22 [23:51<00:21, 21.17s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2260778 385415 (3394, 3394) (3871, 3871) 3.0\n",
      "2658889 361892 (3298, 3298) (3519, 3519) 4.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 22/22 [24:01<00:00, 65.52s/it]\n"
     ]
    }
   ],
   "source": [
    "import glob\n",
    "import numpy as np\n",
    "import random\n",
    "from tqdm.auto import tqdm\n",
    "import pandas as pd\n",
    "\n",
    "res = 10000\n",
    "denominator = 100 # 16, 50 ,100\n",
    "rate = 1/denominator\n",
    "\n",
    "\n",
    "def loadBothConstraints(stria, strib, res):\n",
    "    contact_mapa  = pd.read_csv(stria, sep='\\t', header=None)\n",
    "    contact_mapb  = pd.read_csv(strib, sep='\\t', header=None)\n",
    "    rowsa         = (contact_mapa.iloc[:,0]/res).astype(int)\n",
    "    colsa         = (contact_mapa.iloc[:,1]/res).astype(int)\n",
    "    valsa         = contact_mapa.iloc[:,2]\n",
    "\n",
    "    rowsb         = (contact_mapb.iloc[:,0]/res).astype(int)\n",
    "    colsb         = (contact_mapb.iloc[:,1]/res).astype(int)\n",
    "    valsb         = contact_mapb.iloc[:,2]\n",
    "\n",
    "    bigbin        = np.max((np.max((rowsa, colsa)), np.max((rowsb, colsb))))\n",
    "    smallbin      = np.min((np.min((rowsa, colsa)), np.min((rowsb, colsb))))\n",
    "\n",
    "    mata          = np.zeros((bigbin-smallbin+1, bigbin-smallbin+1), dtype='float32')\n",
    "    matb          = np.zeros((bigbin-smallbin+1, bigbin-smallbin+1), dtype='float32')\n",
    "\n",
    "    for ra,ca,ia in zip(rowsa, colsa, valsa):\n",
    "        if np.isnan(ia):\n",
    "            ia = 0\n",
    "        a = 0\n",
    "        for i in range(int(ia)):\n",
    "            if (random.random() < rate):\n",
    "                a += 1\n",
    "        ia = a\n",
    "\n",
    "        mata[ra-smallbin, ca-smallbin] = ia\n",
    "        mata[ca-smallbin, ra-smallbin] = ia\n",
    "\n",
    "    for rb,cb,ib in zip(rowsb, colsb, valsb):\n",
    "        if rb-smallbin != cb-smallbin:\n",
    "            continue\n",
    "        matb[rb-smallbin, cb-smallbin] = ib\n",
    "    \n",
    "    diaga         = np.diag(mata)\n",
    "    diagb         = np.diag(matb)\n",
    "    removeidx     = np.unique(np.concatenate((np.argwhere(diaga==0)[:,0], np.argwhere(diagb==0)[:,0], np.argwhere(np.isnan(diagb))[:,0])))\n",
    "    mata = np.delete(mata, removeidx, axis=0)\n",
    "    mata = np.delete(mata, removeidx, axis=1)\n",
    "    per_a       = np.percentile(mata, 99.9)\n",
    "    mata        = np.clip(mata, 0, per_a)\n",
    "    mata        = mata/per_a\n",
    "\n",
    "    print(len(valsa),len(valsb), mata.shape, matb.shape, per_a)\n",
    "\n",
    "    return mata, matb\n",
    "\n",
    "for i in tqdm(range(1,23)):\n",
    "    target, _ = loadBothConstraints(\"Constraints/high_chr\"+str(i)+\"_res_\"+str(res)+\".txt\",\n",
    "                               \"Constraints/low_chr\"+str(i)+\"_res_\"+str(res)+\".txt\",\n",
    "                                res)  \n",
    "    np.save(\"Full_Mats/gm12878_mat_low_\"+str(denominator)+\"_chr\"+str(i)+\"_res_\"+str(res), target)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "globs    = glob.glob(\"Full_Mats/gm12878_mat_low_\"+str(denominator)+\"_chr\"+str(res)+\".npy\")\n",
    "\n",
    "piece_size = 256\n",
    "step = 50\n",
    "\n",
    "def splitPieces(fn, piece_size, step):\n",
    "    data   = np.load(fn)\n",
    "    pieces = []\n",
    "    bound  = data.shape[0]\n",
    "    for i in range(0, bound-piece_size+1, step):\n",
    "        pieces.append(data[i:i+piece_size, i:i+piece_size])\n",
    "    pieces = np.asarray(pieces)\n",
    "    pieces = np.expand_dims(pieces,1)\n",
    "\n",
    "    return pieces\n",
    "\n",
    "for i in range(1,23):\n",
    "            target =  splitPieces(\"Full_Mats/gm12878_mat_low_\"+str(denominator)+\"_chr\"+str(i)+\"_res_\"+str(res)+\".npy\",piece_size, step)\n",
    "            np.save(\"Splits/gm12878_low_\"+str(denominator)+\"_chr_\"+str(i)+\"_res_\"+str(res)+\"_piece_\"+str(piece_size), target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "JaeminKim",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
