{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6cbe4a52",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EBPlusPlusAdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.tsv\r\n",
      "PanCanAtlas_miRNA_sample_information_list.txt\r\n",
      "TCGA-CDR-SupplementalTableS1.xlsx\r\n",
      "TCGA-RPPA-pancan-clean.txt\r\n",
      "Untitled.ipynb\r\n",
      "jhu-usc.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv\r\n",
      "jhu-usc.edu_PANCAN_merged_HumanMethylation27_HumanMethylation450.betaValue_whitelisted.tsv\r\n",
      "jhu-usc.hfQDEKjh.edu_PANCAN_HumanMethylation450.betaValue_whitelisted.tsv.part\r\n",
      "pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.csv\r\n"
     ]
    }
   ],
   "source": [
    "!ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3c731261",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5f90f622",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_gene=pd.read_csv('EBPlusPlusAdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.tsv', delimiter='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b2526346",
   "metadata": {},
   "outputs": [],
   "source": [
    "#dfb=pd.read_csv('jhu-usc.edu_PANCAN_merged_HumanMethylation27_HumanMethylation450.betaValue_whitelisted.tsv', delimiter='\\t')\n",
    "df_mirna=pd.read_csv('pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.csv') #, delimiter='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "33223735",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_meth=pd.read_csv('jhu-usc.edu_PANCAN_merged_HumanMethylation27_HumanMethylation450.betaValue_whitelisted.tsv', delimiter='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "95138b7a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_rppa=pd.read_csv('TCGA-RPPA-pancan-clean.txt', delimiter='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5132c585",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(743, 10826)\n",
      "(7790, 200)\n"
     ]
    }
   ],
   "source": [
    "#print(df_gene.shape)\n",
    "print(df_mirna.shape)\n",
    "#print(df_meth.shape)\n",
    "print(df_rppa.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "f57fe5bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_mirna = df_mirna[df_mirna.Correction == \"Corrected\"].set_index('Genes').drop('Correction',axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "496af9eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_mirna.to_csv('mirna_proc.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "a3767eb9",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_rppa.set_index('SampleID').T.drop('TumorType').to_csv('rppa_proc.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "47f30ba4",
   "metadata": {},
   "outputs": [],
   "source": [
    "#df_meth = df_meth.set_index('Composite Element REF')\n",
    "df_gene = df_gene.set_index('gene_id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "9999dcd7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(22601,)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_meth.mean(axis=1).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ac8ef454",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "KeyboardInterrupt\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#df_meth = df_meth.apply(lambda x: (x-x.mean()).abs(), axis=1)\n",
    "df_gene = df_gene.apply(lambda x: (x-x.mean()).abs(), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d80fc483",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_meth = df_meth.rank()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "6f6563b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_meth_ranked = pd.DataFrame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "a7f72d6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "for column in df_meth:\n",
    "    df_meth_ranked[column]=df_meth[column].nsmallest(800).index.to_series().values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "aadca783",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['cg15569340', 'cg03320844', 'cg22193702', 'cg20383064',\n",
       "       'cg07218192', 'cg22827640', 'cg24441911', 'cg03969997',\n",
       "       'cg15746445', 'cg18204685', 'cg16534233', 'cg20317180',\n",
       "       'cg27470406', 'cg13917504', 'cg00331433', 'cg25993152',\n",
       "       'cg10912268', 'cg13451483', 'cg26656452', 'cg06276653',\n",
       "       'cg18930892', 'cg00164898', 'cg22584335', 'cg13539030',\n",
       "       'cg25605045', 'cg15121304', 'cg24789424', 'cg06684910',\n",
       "       'cg19859270', 'cg17178888', 'cg03947362', 'cg12256228',\n",
       "       'cg18627852', 'cg25544551', 'cg21911019', 'cg15127806',\n",
       "       'cg13199822', 'cg09556292', 'cg13206017', 'cg17142134',\n",
       "       'cg04936930', 'cg01333619', 'cg19523692', 'cg22736107',\n",
       "       'cg24386906', 'cg01192952', 'cg16280667', 'cg14501253',\n",
       "       'cg21167159', 'cg10145369', 'cg23940655', 'cg12788471',\n",
       "       'cg01959848', 'cg21631754', 'cg26728422', 'cg01414934',\n",
       "       'cg03100146', 'cg26895595', 'cg22880820', 'cg05695927',\n",
       "       'cg17575811', 'cg00323915', 'cg21849289', 'cg21439450',\n",
       "       'cg18462653', 'cg20584011', 'cg03544525', 'cg07586911',\n",
       "       'cg07817783', 'cg23759710', 'cg07990939', 'cg06625414',\n",
       "       'cg06615667', 'cg04106190', 'cg19853229', 'cg07668036',\n",
       "       'cg09440340', 'cg06141123', 'cg21722680', 'cg26809911',\n",
       "       'cg12438666', 'cg22986999', 'cg12082129', 'cg25811820',\n",
       "       'cg18178963', 'cg10061247', 'cg17826679', 'cg23698058',\n",
       "       'cg01677177', 'cg11047295', 'cg13792279', 'cg26776924',\n",
       "       'cg27561421', 'cg16558203', 'cg04672450', 'cg22627950',\n",
       "       'cg13585240', 'cg03453449', 'cg04185037', 'cg02283643',\n",
       "       'cg06665984', 'cg06353720', 'cg24615251', 'cg07338205',\n",
       "       'cg17694877', 'cg23242017', 'cg07675682', 'cg25061755',\n",
       "       'cg08084415', 'cg17439694', 'cg10681065', 'cg15540054',\n",
       "       'cg24054653', 'cg07070934', 'cg26207503', 'cg20648463',\n",
       "       'cg15822411', 'cg06273903', 'cg14948436', 'cg05494459',\n",
       "       'cg23828212', 'cg22909609', 'cg15808558', 'cg09416501',\n",
       "       'cg27079104', 'cg27501380', 'cg09831311', 'cg25927124',\n",
       "       'cg20482364', 'cg12038710', 'cg23285573', 'cg06479426',\n",
       "       'cg22933847', 'cg12341414', 'cg05000446', 'cg21045388',\n",
       "       'cg15031780', 'cg16977257', 'cg04415689', 'cg17914753',\n",
       "       'cg20673075', 'cg22658979', 'cg05484458', 'cg15830940',\n",
       "       'cg12195695', 'cg00186141', 'cg23780937', 'cg27409012',\n",
       "       'cg03600318', 'cg18881269', 'cg21665774', 'cg00367438',\n",
       "       'cg19370284', 'cg03373857', 'cg01040062', 'cg01909245',\n",
       "       'cg07132517', 'cg09874600', 'cg01580044', 'cg18091964',\n",
       "       'cg20767936', 'cg01419788', 'cg17122311', 'cg17801256',\n",
       "       'cg02909042', 'cg11832722', 'cg06191203', 'cg15772361',\n",
       "       'cg12549513', 'cg06665322', 'cg18508125', 'cg20957193',\n",
       "       'cg26394737', 'cg13836627', 'cg00149976', 'cg27015174',\n",
       "       'cg01056155', 'cg14916288', 'cg05276137', 'cg17589175',\n",
       "       'cg07027075', 'cg05361811', 'cg16478792', 'cg22005565',\n",
       "       'cg03363175', 'cg12622273', 'cg23376526', 'cg11220060',\n",
       "       'cg01103730', 'cg17356733', 'cg20009671', 'cg08579995',\n",
       "       'cg16338347', 'cg00162401', 'cg13912204', 'cg14079785',\n",
       "       'cg05051316', 'cg19949550', 'cg24727568', 'cg22774127',\n",
       "       'cg10331048', 'cg05044994', 'cg11140785', 'cg22767079',\n",
       "       'cg02968557', 'cg16046376', 'cg22732672', 'cg15442534',\n",
       "       'cg21431091', 'cg18493147', 'cg04460372', 'cg06537829',\n",
       "       'cg07265300', 'cg24560809', 'cg01966465', 'cg21628553',\n",
       "       'cg05441133', 'cg13350783', 'cg23983340', 'cg10485472',\n",
       "       'cg03658707', 'cg25945961', 'cg01056568', 'cg03086563',\n",
       "       'cg05754589', 'cg06630567', 'cg09276363', 'cg05055720',\n",
       "       'cg16794633', 'cg18338296', 'cg07288587', 'cg21415604',\n",
       "       'cg01357263', 'cg20822628', 'cg13739345', 'cg06363129',\n",
       "       'cg16306381', 'cg26624273', 'cg23919568', 'cg05824432',\n",
       "       'cg13576061', 'cg17430393', 'cg02091100', 'cg04749104',\n",
       "       'cg23799276', 'cg13649728', 'cg24946544', 'cg15238224',\n",
       "       'cg12531542', 'cg25134071', 'cg10819446', 'cg23282771',\n",
       "       'cg11513856', 'cg09018862', 'cg07118243', 'cg25955899',\n",
       "       'cg15534366', 'cg26757793', 'cg10035922', 'cg14607813',\n",
       "       'cg14636377', 'cg25989745', 'cg13412615', 'cg16863447',\n",
       "       'cg05876083', 'cg23090824', 'cg23075286', 'cg11261264',\n",
       "       'cg10671066', 'cg11015241', 'cg08568512', 'cg11843760',\n",
       "       'cg01999523', 'cg04402021', 'cg06612452', 'cg13135180',\n",
       "       'cg12654845', 'cg18264687', 'cg02699167', 'cg01991383',\n",
       "       'cg00224234', 'cg13119609', 'cg16688911', 'cg24615044',\n",
       "       'cg18958584', 'cg12431196', 'cg23943360', 'cg18088775',\n",
       "       'cg06282240', 'cg19481052', 'cg13360404', 'cg14808739',\n",
       "       'cg26177629', 'cg00734993', 'cg12916723', 'cg18219226',\n",
       "       'cg15170424', 'cg20524216', 'cg11880010', 'cg19267846',\n",
       "       'cg06577658', 'cg17145806', 'cg19569684', 'cg12683641',\n",
       "       'cg14545899', 'cg17569486', 'cg25013852', 'cg00060882',\n",
       "       'cg00725777', 'cg11843691', 'cg08313788', 'cg25961684',\n",
       "       'cg01328164', 'cg19304352', 'cg04006554', 'cg14203758',\n",
       "       'cg26114571', 'cg14601284', 'cg08504049', 'cg24371157',\n",
       "       'cg08983975', 'cg05254747', 'cg08592230', 'cg25229015',\n",
       "       'cg01668383', 'cg12259256', 'cg00051979', 'cg19229991',\n",
       "       'cg01656216', 'cg22959932', 'cg12106945', 'cg12535715',\n",
       "       'cg22982528', 'cg19308222', 'cg26917625', 'cg05945622',\n",
       "       'cg00303548', 'cg21846903', 'cg03866607', 'cg20938286',\n",
       "       'cg26138224', 'cg05688084', 'cg11574612', 'cg08385610',\n",
       "       'cg14367014', 'cg15801967', 'cg15727249', 'cg21022247',\n",
       "       'cg13804316', 'cg10301967', 'cg21750589', 'cg21713257',\n",
       "       'cg12086773', 'cg17397493', 'cg16274148', 'cg01605984',\n",
       "       'cg23300897', 'cg20497627', 'cg05698069', 'cg19081759',\n",
       "       'cg07684150', 'cg14299177', 'cg08419026', 'cg19540408',\n",
       "       'cg02962299', 'cg00402366', 'cg01151699', 'cg02092466',\n",
       "       'cg10261589', 'cg03247626', 'cg11806565', 'cg22155248',\n",
       "       'cg13859216', 'cg18710985', 'cg00520135', 'cg26916607',\n",
       "       'cg17002259', 'cg12506373', 'cg14222233', 'cg27522780',\n",
       "       'cg08293760', 'cg24022829', 'cg06609475', 'cg16588061',\n",
       "       'cg10219037', 'cg26091119', 'cg24266238', 'cg04184278',\n",
       "       'cg12556134', 'cg12422683', 'cg12694870', 'cg23345864',\n",
       "       'cg15241708', 'cg06887663', 'cg15205507', 'cg01770400',\n",
       "       'cg11819013', 'cg06990379', 'cg02431964', 'cg11353032',\n",
       "       'cg00951770', 'cg01137532', 'cg17229388', 'cg16366323',\n",
       "       'cg10480741', 'cg00324733', 'cg13500819', 'cg06716436',\n",
       "       'cg26198776', 'cg18690395', 'cg04376312', 'cg03138668',\n",
       "       'cg09770154', 'cg24474998', 'cg20792833', 'cg00777555',\n",
       "       'cg20315136', 'cg21633208', 'cg17616707', 'cg04464465',\n",
       "       'cg02164386', 'cg02537909', 'cg15452204', 'cg21548414',\n",
       "       'cg18876487', 'cg00728317', 'cg16579101', 'cg23640701',\n",
       "       'cg18251360', 'cg14535518', 'cg19714749', 'cg26538349',\n",
       "       'cg10148841', 'cg01402045', 'cg13151102', 'cg27644292',\n",
       "       'cg18081881', 'cg26459500', 'cg01137065', 'cg19219366',\n",
       "       'cg08395899', 'cg27342801', 'cg14666892', 'cg05535113',\n",
       "       'cg01145396', 'cg23739036', 'cg19944367', 'cg05675373',\n",
       "       'cg18556540', 'cg02278071', 'cg09667582', 'cg08674093',\n",
       "       'cg20149766', 'cg23455897', 'cg21051086', 'cg07975813',\n",
       "       'cg04466273', 'cg19028369', 'cg07704144', 'cg07899425',\n",
       "       'cg18466173', 'cg00751288', 'cg16205058', 'cg20697204',\n",
       "       'cg15537433', 'cg14561282', 'cg05989429', 'cg26645834',\n",
       "       'cg17704839', 'cg20579480', 'cg14003467', 'cg07297178',\n",
       "       'cg18928687', 'cg12073537', 'cg04305913', 'cg05379350',\n",
       "       'cg22409383', 'cg01244871', 'cg27227797', 'cg10484958',\n",
       "       'cg14602164', 'cg02628202', 'cg01782486', 'cg04574442',\n",
       "       'cg26457013', 'cg11609760', 'cg07947016', 'cg23323879',\n",
       "       'cg17983307', 'cg21207436', 'cg21321885', 'cg09330643',\n",
       "       'cg05275605', 'cg26245202', 'cg09885086', 'cg09430689',\n",
       "       'cg18931895', 'cg23508052', 'cg18015677', 'cg19132372',\n",
       "       'cg13325666', 'cg12360373', 'cg25516544', 'cg10184881',\n",
       "       'cg25425005', 'cg03782727', 'cg18130076', 'cg22182666',\n",
       "       'cg22691736', 'cg12137206', 'cg26536164', 'cg07446572',\n",
       "       'cg24035962', 'cg00974864', 'cg06277481', 'cg07347137',\n",
       "       'cg09805010', 'cg19110523', 'cg27262488', 'cg19976011',\n",
       "       'cg25221254', 'cg09785172', 'cg22461615', 'cg04303901',\n",
       "       'cg01882150', 'cg03072102', 'cg12433691', 'cg06284322',\n",
       "       'cg01134491', 'cg10129493', 'cg18495298', 'cg21820890',\n",
       "       'cg22605342', 'cg22700017', 'cg06200339', 'cg20951444',\n",
       "       'cg07346310', 'cg13014406', 'cg09086179', 'cg21129531',\n",
       "       'cg19843429', 'cg22901146', 'cg20543571', 'cg26845278',\n",
       "       'cg06323290', 'cg10016608', 'cg03543403', 'cg17647273',\n",
       "       'cg15804118', 'cg07285167', 'cg13133148', 'cg06880557',\n",
       "       'cg06484397', 'cg17896097', 'cg27575501', 'cg01909487',\n",
       "       'cg03473294', 'cg01882179', 'cg05046097', 'cg11680158',\n",
       "       'cg17489897', 'cg11189434', 'cg13314167', 'cg18858343',\n",
       "       'cg22494492', 'cg02978542', 'cg26911787', 'cg03243506',\n",
       "       'cg23401756', 'cg13770446', 'cg26565975', 'cg00119079',\n",
       "       'cg04548841', 'cg12379775', 'cg02162412', 'cg05970437',\n",
       "       'cg01796228', 'cg10441365', 'cg00013618', 'cg19744952',\n",
       "       'cg13309018', 'cg03091512', 'cg26354413', 'cg15105660',\n",
       "       'cg24675098', 'cg12658982', 'cg03564506', 'cg00677811',\n",
       "       'cg12628956', 'cg13550608', 'cg08843314', 'cg15070798',\n",
       "       'cg12149391', 'cg01422136', 'cg09628601', 'cg17253459',\n",
       "       'cg02296128', 'cg07576541', 'cg05191792', 'cg10613381',\n",
       "       'cg15554401', 'cg16022279', 'cg26376809', 'cg13397365',\n",
       "       'cg21226234', 'cg18106189', 'cg11693019', 'cg27217148',\n",
       "       'cg11997899', 'cg13082192', 'cg23762517', 'cg10446968',\n",
       "       'cg27600136', 'cg27258787', 'cg22445920', 'cg16072030',\n",
       "       'cg18070061', 'cg03386722', 'cg21518938', 'cg23858074',\n",
       "       'cg26701826', 'cg21137417', 'cg27298262', 'cg15042806',\n",
       "       'cg06952236', 'cg24137081', 'cg11325578', 'cg19229809',\n",
       "       'cg24287460', 'cg23464269', 'cg17386700', 'cg10850119',\n",
       "       'cg10982775', 'cg16994506', 'cg21660727', 'cg09060914',\n",
       "       'cg22563697', 'cg03665785', 'cg25781162', 'cg12743894',\n",
       "       'cg19096475', 'cg12054080', 'cg27553637', 'cg10488185',\n",
       "       'cg03966541', 'cg25475443', 'cg01465620', 'cg21443584',\n",
       "       'cg08269321', 'cg17327492', 'cg09597465', 'cg02599225',\n",
       "       'cg03608974', 'cg21796825', 'cg07775501', 'cg11044823',\n",
       "       'cg27376817', 'cg12582965', 'cg17166338', 'cg14088811',\n",
       "       'cg21772838', 'cg06887414', 'cg09971646', 'cg25514503',\n",
       "       'cg24469114', 'cg03703325', 'cg08214957', 'cg11625178',\n",
       "       'cg18438300', 'cg17533847', 'cg19987219', 'cg09189322',\n",
       "       'cg02890926', 'cg04574507', 'cg26955850', 'cg07155664',\n",
       "       'cg10055471', 'cg19162106', 'cg19219437', 'cg14443380',\n",
       "       'cg06872331', 'cg11700584', 'cg05912121', 'cg07651857',\n",
       "       'cg08049198', 'cg12072803', 'cg13986130', 'cg22577136',\n",
       "       'cg11832543', 'cg03107405', 'cg03774732', 'cg15768203',\n",
       "       'cg15576195', 'cg09296212', 'cg04914305', 'cg06786424',\n",
       "       'cg08434547', 'cg08064891', 'cg05749792', 'cg12062995',\n",
       "       'cg23938476', 'cg14613832', 'cg16003913', 'cg22188495',\n",
       "       'cg13681935', 'cg22111003', 'cg13386234', 'cg17264470',\n",
       "       'cg14027234', 'cg27219973', 'cg00784357', 'cg01803810',\n",
       "       'cg20199333', 'cg04297093', 'cg01469547', 'cg10861017',\n",
       "       'cg06992027', 'cg25929421', 'cg05301852', 'cg26206598',\n",
       "       'cg01743370', 'cg11679069', 'cg00025991', 'cg24603941',\n",
       "       'cg02723533', 'cg21264207', 'cg25898500', 'cg21489873',\n",
       "       'cg08328671', 'cg07285276', 'cg09656405', 'cg27508071',\n",
       "       'cg18071006', 'cg17983064', 'cg13043862', 'cg02718725',\n",
       "       'cg21721155', 'cg05903630', 'cg24777762', 'cg25923856',\n",
       "       'cg08413469', 'cg09657265', 'cg05700681', 'cg21643045',\n",
       "       'cg21660424', 'cg08286169', 'cg19470701', 'cg07433344',\n",
       "       'cg12864581', 'cg17608500', 'cg13272258', 'cg06765947',\n",
       "       'cg19282714', 'cg07602026', 'cg09750385', 'cg01462904',\n",
       "       'cg09081544', 'cg09572106', 'cg25017304', 'cg04706338',\n",
       "       'cg20271396', 'cg07928641', 'cg23521069', 'cg14592406',\n",
       "       'cg04197135', 'cg19319069', 'cg02545106', 'cg27496506',\n",
       "       'cg08141806', 'cg26674929', 'cg09722555', 'cg18758482',\n",
       "       'cg02391387', 'cg08035942', 'cg27043630', 'cg04237003',\n",
       "       'cg07309102', 'cg17698643', 'cg08491025', 'cg02549424',\n",
       "       'cg05532892', 'cg02799466', 'cg08460026', 'cg14607011',\n",
       "       'cg04134624', 'cg20052411', 'cg13966710', 'cg16793061',\n",
       "       'cg05168404', 'cg12716838', 'cg16904599', 'cg20551517',\n",
       "       'cg00882832', 'cg10021735', 'cg02863947', 'cg06066711',\n",
       "       'cg09088508', 'cg13927251', 'cg09503975', 'cg27394046'],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_meth['TCGA-02-0001-01C-01D-0186-05'].nsmallest(800).index.to_series().values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "7d739f28",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_meth_ranked.reset_index(drop=True, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "84726da8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([ 30.,  96., 170., 225., 234., 282., 283., 288., 315., 292., 331.,\n",
       "        335., 382., 370., 386., 436., 449., 430., 447., 401., 448., 403.,\n",
       "        347., 386., 357., 296., 307., 284., 320., 318., 284., 270., 291.,\n",
       "        317., 299., 320., 361., 334., 389., 336., 399., 413., 437., 456.,\n",
       "        454., 397., 452., 474., 447., 405., 429., 442., 420., 373., 331.,\n",
       "        331., 296., 260., 213., 171., 165., 158., 129., 108., 104.,  94.,\n",
       "         91.,  72.,  72.,  59.,  57.,  76.,  66.,  71.,  64.,  70.,  57.,\n",
       "         70.,  66.,  67.,  72.,  58.,  70.,  52.,  57.,  42.,  25.,  32.,\n",
       "         15.,   3.,   5.,   3.,   1.,   0.,   0.,   0.,   0.,   0.,   0.,\n",
       "          1.]),\n",
       " array([  10.  ,   21.78,   33.56,   45.34,   57.12,   68.9 ,   80.68,\n",
       "          92.46,  104.24,  116.02,  127.8 ,  139.58,  151.36,  163.14,\n",
       "         174.92,  186.7 ,  198.48,  210.26,  222.04,  233.82,  245.6 ,\n",
       "         257.38,  269.16,  280.94,  292.72,  304.5 ,  316.28,  328.06,\n",
       "         339.84,  351.62,  363.4 ,  375.18,  386.96,  398.74,  410.52,\n",
       "         422.3 ,  434.08,  445.86,  457.64,  469.42,  481.2 ,  492.98,\n",
       "         504.76,  516.54,  528.32,  540.1 ,  551.88,  563.66,  575.44,\n",
       "         587.22,  599.  ,  610.78,  622.56,  634.34,  646.12,  657.9 ,\n",
       "         669.68,  681.46,  693.24,  705.02,  716.8 ,  728.58,  740.36,\n",
       "         752.14,  763.92,  775.7 ,  787.48,  799.26,  811.04,  822.82,\n",
       "         834.6 ,  846.38,  858.16,  869.94,  881.72,  893.5 ,  905.28,\n",
       "         917.06,  928.84,  940.62,  952.4 ,  964.18,  975.96,  987.74,\n",
       "         999.52, 1011.3 , 1023.08, 1034.86, 1046.64, 1058.42, 1070.2 ,\n",
       "        1081.98, 1093.76, 1105.54, 1117.32, 1129.1 , 1140.88, 1152.66,\n",
       "        1164.44, 1176.22, 1188.  ]),\n",
       " <BarContainer object of 100 artists>)"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPqklEQVR4nO3df6zdd13H8eeLDsbPZZvrlq7t7IjV2JnIsJlDjFkYugmE7p+ZGtEaZ/rPjCAm0MofxD+abGoWYgRNA2iVH6Phh2tmDMzCspjARgcI60pdsbhdV9cCIqDJoPPtH+c7OGvv7T333nN6zvdzn4/k5ny/n/P93vN5396+zud8vj9uqgpJUlueN+0OSJLGz3CXpAYZ7pLUIMNdkhpkuEtSgy6YdgcALrvsstq0adO0uyFJvfLwww9/o6rWzvfcTIT7pk2bOHTo0LS7IUm9kuTfF3rOaRlJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQTFyhKk3Lpl3/8MPlr9/x+in2RBovR+6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXI89zVjOFz1s/kOexabQx3rTrnehOQWuG0jCQ1yJH7DFjqJfBeMi9pMYb7DDPEzy9/3mqJ0zKS1CBH7tISOLpXXzhyl6QGOXKfEk/HGw9/jtL8HLlLUoMMd0lqkNMyM8Zphtnjv4n6yJG7JDXIcJekBhnuktQg59ylZfKCJs0yR+6S1CDDXZIa5LSMesdTE6XFOXKXpAYZ7pLUoJHDPcmaJF9Mcm+3fmmS+5I81j1eMrTt7iTHkhxNctMkOi5JWthS5tzfDBwBLurWdwEHq+qOJLu69bcn2QJsB64BrgT+KclPVtUzY+y3NFHO66vvRgr3JBuA1wN7gLd2zduAG7rlfcD9wNu79rur6mngeJJjwHXAZ8fWa/3QmSHk+daSYPSR+7uAtwEvG2q7oqpOAFTViSSXd+3rgc8NbTfXtT1Hkp3AToCrrrpqab1ehVb7SHK11y8t1aLhnuQNwMmqejjJDSN8z8zTVmc1VO0F9gJs3br1rOelcfLNQavNKCP3VwNvTPI64IXARUk+ADyVZF03al8HnOy2nwM2Du2/AXhynJ2WJJ3bomfLVNXuqtpQVZsYHCj9dFW9CTgA7Og22wHc0y0fALYnuTDJ1cBm4KGx91yStKCVXKF6B7A/yW3A48CtAFV1OMl+4FHgNHC7Z8pI0vm1pHCvqvsZnBVDVX0TuHGB7fYwOLNGWhLvtCiNh/eWOY/O90E9g1Javbz9gCQ1yJH7BDhiljRtjtwlqUGGuyQ1yHCXpAY55y6NgcdZNGsM9wnzniaSpsFpGUlqkOEuSQ0y3CWpQc65ayo8AClNliN3SWqQI/dVwpGytLo4cpekBhnuktQgw12SGuSce2O8IlYSOHKXpCYZ7pLUIMNdkhpkuEtSgzygOiYeyBw/f6bS8hnumjpDXBo/w30FDCXNZ6HfC2/7oPPJOXdJapDhLkkNMtwlqUGGuyQ1yAOqmtck7v/uAWjp/DHctailBr1/GESaPsN9BIaVpL5xzl2SGmS4S1KDDHdJatCic+5JXgg8AFzYbf/RqnpnkkuBjwCbgK8Dv1ZV/9Xtsxu4DXgG+P2q+uREeq8V83iC1KZRRu5PA6+pqp8FXgHcnOR6YBdwsKo2Awe7dZJsAbYD1wA3A+9JsmYCfZckLWDRcK+B73Wrz+++CtgG7Ova9wG3dMvbgLur6umqOg4cA64bZ6clSec20qmQ3cj7YeAngHdX1YNJrqiqEwBVdSLJ5d3m64HPDe0+17Wd+T13AjsBrrrqquVXoJnmhUvSdIwU7lX1DPCKJBcDn0jyM+fYPPN9i3m+515gL8DWrVvPel6TY+BOh8c3dD4t6WyZqvo2cD+DufSnkqwD6B5PdpvNARuHdtsAPLnSjkqSRjfK2TJrgR9U1beTvAh4LXAncADYAdzRPd7T7XIA+FCSu4Argc3AQxPo+1Q46pXUB6NMy6wD9nXz7s8D9lfVvUk+C+xPchvwOHArQFUdTrIfeBQ4DdzeTeuoYb7pSbNl0XCvqi8D187T/k3gxgX22QPsWXHvpsiwktRnXqEqSQ3yrpBDVvtofZT6PeND6odVH+6rPdAltclpGUlqkOEuSQ0y3CWpQYa7JDVo1R9Q1fJ5MFqaXY7cJalBhrskNchwl6QGOecuTYFX+mrSHLlLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNWpX3lvE+5JJa58hdkhpkuEtSg1bNtIxTMZJWE0fuktQgw12SGmS4S1KDDHdJapDhLkkNWjVny0izyj+WrUlw5C5JDVo03JNsTPKZJEeSHE7y5q790iT3JXmse7xkaJ/dSY4lOZrkpkkWIEk62ygj99PAH1bVTwPXA7cn2QLsAg5W1WbgYLdO99x24BrgZuA9SdZMovOSpPktGu5VdaKqvtAtfxc4AqwHtgH7us32Abd0y9uAu6vq6ao6DhwDrhtzvyVJ57CkOfckm4BrgQeBK6rqBAzeAIDLu83WA08M7TbXtZ35vXYmOZTk0KlTp5bRdUnSQkYO9yQvBT4GvKWqvnOuTedpq7MaqvZW1daq2rp27dpRuyFJGsFI4Z7k+QyC/YNV9fGu+akk67rn1wEnu/Y5YOPQ7huAJ8fTXUnSKEY5WybA+4AjVXXX0FMHgB3d8g7gnqH27UkuTHI1sBl4aHxdliQtZpSLmF4N/CbwlSRf6tr+CLgD2J/kNuBx4FaAqjqcZD/wKIMzbW6vqmfG3XFJ0sIWDfeq+mfmn0cHuHGBffYAe1bQL0nSCniFqiQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDfIvMUkzxL/KpHFpOtyH/6NI0mritIwkNajpkbvUZ07RaCUcuUtSgwx3SWqQ0zJSDzhFo6Vy5C5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIe8tIPeN9ZjQKR+6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgRcM9yfuTnEzyyFDbpUnuS/JY93jJ0HO7kxxLcjTJTZPquKTB1arPfknDRhm5/w1w8xltu4CDVbUZONitk2QLsB24ptvnPUnWjK23khZk0GvYouFeVQ8A3zqjeRuwr1veB9wy1H53VT1dVceBY8B14+mqJGlUy51zv6KqTgB0j5d37euBJ4a2m+vazpJkZ5JDSQ6dOnVqmd2QJM1n3AdUM09bzbdhVe2tqq1VtXXt2rVj7oYkrW7LveXvU0nWVdWJJOuAk137HLBxaLsNwJMr6eBSOd8oScsP9wPADuCO7vGeofYPJbkLuBLYDDy00k5KWhrv+a5Fwz3Jh4EbgMuSzAHvZBDq+5PcBjwO3ApQVYeT7AceBU4Dt1fVMxPqu6QRGPSr06LhXlW/vsBTNy6w/R5gz0o6JUlaGa9QlaQGGe6S1CDDXZIaZLhLUoMMd0lq0HLPc5e0SngqZT8Z7tIqcq4ruGchuH0jGR/DXdKyLBTES21fyWtpYc65S1KDHLlLGtm4bsznDf4mz3CXtGKG9exxWkaSGuTIXRIwewct/TSwMoa7pLP0ZW591t6QZonTMpLUIEfuknrF6ZrROHKXpAYZ7pLUIMNdkhrknLukJnjmzHM5cpekBhnuktQgw12SGmS4S1KDPKAqqWmr9UBrE+HuFWuS9FxOy0hSg5oYuUvSMD/NO3KXpCYZ7pLUIMNdkhrknLukVWM1nRbpyF2SGmS4S1KDDHdJatDEwj3JzUmOJjmWZNekXkeSdLaJHFBNsgZ4N/DLwBzw+SQHqurRSbyeJC3VqBc69fXA66TOlrkOOFZV/waQ5G5gG2C4S+qVSZxhcz7O2plUuK8HnhhanwN+fniDJDuBnd3q95IcXeJrXAZ8Y9k9nD0t1dNSLWA9s+y81pI7J/49l1rPjy/0xKTCPfO01XNWqvYCe5f9Asmhqtq63P1nTUv1tFQLWM8sa6kWGG89kzqgOgdsHFrfADw5odeSJJ1hUuH+eWBzkquTvADYDhyY0GtJks4wkWmZqjqd5PeATwJrgPdX1eExv8yyp3RmVEv1tFQLWM8sa6kWGGM9qarFt5Ik9YpXqEpSgwx3SWpQ78K9j7c1SLIxyWeSHElyOMmbu/ZLk9yX5LHu8ZKhfXZ3NR5NctP0ej+/JGuSfDHJvd16n2u5OMlHk3y1+zd6Vc/r+YPu9+yRJB9O8sI+1ZPk/UlOJnlkqG3J/U/yc0m+0j3350nmO0V7GrX8afe79uUkn0hy8dBz46ulqnrzxeDg7NeAlwMvAP4F2DLtfo3Q73XAK7vllwH/CmwB/gTY1bXvAu7slrd0tV0IXN3VvGbadZxR01uBDwH3dut9rmUf8Lvd8guAi/taD4MLCI8DL+rW9wO/3ad6gF8CXgk8MtS25P4DDwGvYnDdzT8CvzojtfwKcEG3fOekaunbyP2HtzWoqu8Dz97WYKZV1Ymq+kK3/F3gCIP/hNsYBAvd4y3d8jbg7qp6uqqOA8cY1D4TkmwAXg+8d6i5r7VcxOA/4PsAqur7VfVtelpP5wLgRUkuAF7M4BqT3tRTVQ8A3zqjeUn9T7IOuKiqPluDdPzboX3Om/lqqapPVdXpbvVzDK4DgjHX0rdwn++2Buun1JdlSbIJuBZ4ELiiqk7A4A0AuLzbbNbrfBfwNuD/htr6WsvLgVPAX3fTTO9N8hJ6Wk9V/QfwZ8DjwAngv6vqU/S0niFL7f/6bvnM9lnzOwxG4jDmWvoW7ove1mCWJXkp8DHgLVX1nXNtOk/bTNSZ5A3Ayap6eNRd5mmbiVo6FzD42PyXVXUt8D8MPvYvZKbr6eaitzH4WH8l8JIkbzrXLvO0zUw9I1io/zNfV5J3AKeBDz7bNM9my66lb+He29saJHk+g2D/YFV9vGt+qvvIRfd4smuf5TpfDbwxydcZTIu9JskH6GctMOjfXFU92K1/lEHY97We1wLHq+pUVf0A+DjwC/S3nmcttf9z/Gi6Y7h9JiTZAbwB+I1uqgXGXEvfwr2XtzXojmy/DzhSVXcNPXUA2NEt7wDuGWrfnuTCJFcDmxkcUJm6qtpdVRuqahODn/+nq+pN9LAWgKr6T+CJJD/VNd3I4NbUvayHwXTM9Ule3P3e3cjgGE9f63nWkvrfTd18N8n13c/ht4b2maokNwNvB95YVf879NR4aznfR4/HcPT5dQzONvka8I5p92fEPv8ig49RXwa+1H29Dvgx4CDwWPd46dA+7+hqPMoUjvKPWNcN/Ohsmd7WArwCONT9+/w9cEnP6/lj4KvAI8DfMTj7ojf1AB9mcLzgBwxGrbctp//A1u5n8DXgL+iuyJ+BWo4xmFt/Ngv+ahK1ePsBSWpQ36ZlJEkjMNwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSg/4fgyNzY8q7dIcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "plt.hist(df_meth_ranked.stack().value_counts(),bins=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3e99706a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "#df_meth = df_meth.reindex(df_meth.var(axis=1).sort_values().index, axis=0) #sort by row variance\n",
    "df_gene = df_gene.dropna().replace(0, np.NaN)\n",
    "df_gene = df_gene.reindex(df_gene.var(axis=1).sort_values().index, axis=0) #sort by row variance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "acd1fc3f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Composite Element REF\n",
       "cg10474368    0.000006\n",
       "cg13946860    0.000008\n",
       "cg00770871    0.000008\n",
       "cg11246695    0.000008\n",
       "cg04589975    0.000008\n",
       "                ...   \n",
       "cg06051311    0.095170\n",
       "cg08668790    0.096515\n",
       "cg21790626    0.097518\n",
       "cg20916523    0.104639\n",
       "cg12374721    0.110510\n",
       "Length: 22601, dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_meth.var(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "05ce9ead",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_meth.dropna().tail(800).to_csv(\"methyl_top800.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "db1e1da3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_gene.dropna().tail(1000).to_csv(\"gene_top1000.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "bb9e3db8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([3042., 3111., 2637., 1495.,  483.,  186.,   70.,   30.,   11.,\n",
       "           4.]),\n",
       " array([ 0. ,  8.3, 16.6, 24.9, 33.2, 41.5, 49.8, 58.1, 66.4, 74.7, 83. ]),\n",
       " <BarContainer object of 10 artists>)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQ9ElEQVR4nO3df6zddX3H8efLgog/iDAKqW1Zmek2gcQymq6by8LEjU7Min+w1URpFpYaghEXl6X4j+6PJizxx0YySKo4SuZkjT9Co6KyzsWZIHhBIpRKaKSDaztaf03cHyj1vT/Op3osp/fe3tueU/p5PpKT8z3v8/l8v5/zSe/rfvs533NuqgpJUh9eMukBSJLGx9CXpI4Y+pLUEUNfkjpi6EtSR06b9ABmc+6559aKFSsmPQxJelF58MEHv1dVi4+sn/Shv2LFCqampiY9DEl6UUny36PqLu9IUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHTvpP5L4Yrdj8+Ykde+/NV03s2JJOfp7pS1JHDH1J6oihL0kdmXVNP8nLgK8CZ7T2n6qq9yc5B/g3YAWwF/jzqvph63MTcB1wCHh3VX2p1S8D7gDOBL4A3Fgn8C+zT3JtXZJORnM5038OeGNVvR5YBaxLshbYDOysqpXAzvaYJBcBG4CLgXXArUkWtX3dBmwCVrbbuuP3UiRJs5k19GvgJ+3h6e1WwHpgW6tvA65u2+uBu6rquap6EtgDrEmyBDirqu5rZ/d3DvWRJI3BnNb0kyxK8jBwALi3qu4Hzq+q/QDt/rzWfCnw9FD36VZb2raPrI863qYkU0mmDh48eAwvR5I0kzmFflUdqqpVwDIGZ+2XzNA8o3YxQ33U8bZW1eqqWr148Qv+2pckaZ6O6eqdqvoR8J8M1uKfaUs2tPsDrdk0sHyo2zJgX6svG1GXJI3JrKGfZHGSV7ftM4E3Ad8GdgAbW7ONwN1tewewIckZSS5k8IbtA20J6Nkka5MEuHaojyRpDObyNQxLgG3tCpyXANur6nNJ7gO2J7kOeAq4BqCqdiXZDjwGPA/cUFWH2r6u55eXbN7TbpKkMZk19KvqW8ClI+rfB644Sp8twJYR9SlgpvcDJEknkJ/IlaSOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUkbn8jVy9iKzY/PmJHHfvzVdN5LiSjo1n+pLUEUNfkjpi6EtSRwx9SeqIoS9JHZk19JMsT/KVJLuT7EpyY6t/IMl3kzzcbm8e6nNTkj1JHk9y5VD9siSPtOduSZIT87IkSaPM5ZLN54H3VtVDSV4FPJjk3vbcR6rqg8ONk1wEbAAuBl4D/HuS36yqQ8BtwCbg68AXgHXAPcfnpUiSZjPrmX5V7a+qh9r2s8BuYOkMXdYDd1XVc1X1JLAHWJNkCXBWVd1XVQXcCVy90BcgSZq7Y1rTT7ICuBS4v5XeleRbST6e5OxWWwo8PdRtutWWtu0j66OOsynJVJKpgwcPHssQJUkzmHPoJ3kl8GngPVX1YwZLNa8FVgH7gQ8dbjqie81Qf2GxamtVra6q1YsXL57rECVJs5hT6Cc5nUHgf6KqPgNQVc9U1aGq+jnwUWBNaz4NLB/qvgzY1+rLRtQlSWMyl6t3AtwO7K6qDw/Vlww1eyvwaNveAWxIckaSC4GVwANVtR94Nsnats9rgbuP0+uQJM3BXK7eeQPwDuCRJA+32vuAtyVZxWCJZi/wToCq2pVkO/AYgyt/bmhX7gBcD9wBnMngqh2v3JGkMZo19Kvqa4xej//CDH22AFtG1KeAS45lgJKk48dP5EpSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSR2YN/STLk3wlye4ku5Lc2OrnJLk3yRPt/uyhPjcl2ZPk8SRXDtUvS/JIe+6WJDkxL0uSNMpczvSfB95bVa8D1gI3JLkI2AzsrKqVwM72mPbcBuBiYB1wa5JFbV+3AZuAle227ji+FknSLGYN/araX1UPte1ngd3AUmA9sK012wZc3bbXA3dV1XNV9SSwB1iTZAlwVlXdV1UF3DnUR5I0Bse0pp9kBXApcD9wflXth8EvBuC81mwp8PRQt+lWW9q2j6yPOs6mJFNJpg4ePHgsQ5QkzWDOoZ/klcCngfdU1Y9najqiVjPUX1is2lpVq6tq9eLFi+c6REnSLOYU+klOZxD4n6iqz7TyM23JhnZ/oNWngeVD3ZcB+1p92Yi6JGlM5nL1ToDbgd1V9eGhp3YAG9v2RuDuofqGJGckuZDBG7YPtCWgZ5Osbfu8dqiPJGkMTptDmzcA7wAeSfJwq70PuBnYnuQ64CngGoCq2pVkO/AYgyt/bqiqQ63f9cAdwJnAPe0mSRqTWUO/qr7G6PV4gCuO0mcLsGVEfQq45FgGKEk6fvxEriR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOzhn6Sjyc5kOTRodoHknw3ycPt9uah525KsifJ40muHKpfluSR9twtSXL8X44kaSZzOdO/A1g3ov6RqlrVbl8ASHIRsAG4uPW5Ncmi1v42YBOwst1G7VOSdALNGvpV9VXgB3Pc33rgrqp6rqqeBPYAa5IsAc6qqvuqqoA7gavnOWZJ0jydtoC+70pyLTAFvLeqfggsBb4+1Ga61X7Wto+sj5RkE4P/FXDBBRcsYIgalxWbPz+xY++9+aqJHVt6sZnvG7m3Aa8FVgH7gQ+1+qh1+pqhPlJVba2q1VW1evHixfMcoiTpSPMK/ap6pqoOVdXPgY8Ca9pT08DyoabLgH2tvmxEXZI0RvMK/bZGf9hbgcNX9uwANiQ5I8mFDN6wfaCq9gPPJlnbrtq5Frh7AeOWJM3DrGv6ST4JXA6cm2QaeD9weZJVDJZo9gLvBKiqXUm2A48BzwM3VNWhtqvrGVwJdCZwT7tJksZo1tCvqreNKN8+Q/stwJYR9SngkmManSTpuPITuZLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI7MGvpJPp7kQJJHh2rnJLk3yRPt/uyh525KsifJ40muHKpfluSR9twtSXL8X44kaSZzOdO/A1h3RG0zsLOqVgI722OSXARsAC5ufW5Nsqj1uQ3YBKxstyP3KUk6wWYN/ar6KvCDI8rrgW1textw9VD9rqp6rqqeBPYAa5IsAc6qqvuqqoA7h/pIksZkvmv651fVfoB2f16rLwWeHmo33WpL2/aR9ZGSbEoylWTq4MGD8xyiJOlIx/uN3FHr9DVDfaSq2lpVq6tq9eLFi4/b4CSpd/MN/Wfakg3t/kCrTwPLh9otA/a1+rIRdUnSGM039HcAG9v2RuDuofqGJGckuZDBG7YPtCWgZ5OsbVftXDvUR5I0JqfN1iDJJ4HLgXOTTAPvB24Gtie5DngKuAagqnYl2Q48BjwP3FBVh9qurmdwJdCZwD3tJkkao1lDv6redpSnrjhK+y3AlhH1KeCSYxqdJOm48hO5ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjiwo9JPsTfJIkoeTTLXaOUnuTfJEuz97qP1NSfYkeTzJlQsdvCTp2ByPM/0/qqpVVbW6Pd4M7KyqlcDO9pgkFwEbgIuBdcCtSRYdh+NLkuboRCzvrAe2te1twNVD9buq6rmqehLYA6w5AceXJB3FQkO/gC8neTDJplY7v6r2A7T781p9KfD0UN/pVpMkjclpC+z/hqral+Q84N4k356hbUbUamTDwS+QTQAXXHDBAocoSTpsQaFfVfva/YEkn2WwXPNMkiVVtT/JEuBAaz4NLB/qvgzYd5T9bgW2AqxevXrkLwbpsBWbPz+R4+69+aqJHFdaiHkv7yR5RZJXHd4G/gR4FNgBbGzNNgJ3t+0dwIYkZyS5EFgJPDDf40uSjt1CzvTPBz6b5PB+/rWqvpjkG8D2JNcBTwHXAFTVriTbgceA54EbqurQgkYvSTom8w79qvoO8PoR9e8DVxylzxZgy3yPKUlaGD+RK0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcW+jdypW5N6s80gn+qUfPnmb4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR3xOn3pRWhSnxHw8wEvfp7pS1JHxh76SdYleTzJniSbx318SerZWJd3kiwC/gn4Y2Aa+EaSHVX12DjHIWl+XFZ68Rv3mv4aYE9VfQcgyV3AesDQl3RUfs/R8TPu0F8KPD30eBr43SMbJdkEbGoPf5Lk8Xke71zge/Ps2wvnaG6cp9mdknOUvz+uuxvnHP36qOK4Qz8javWCQtVWYOuCD5ZMVdXqhe7nVOYczY3zNDvnaHYnwxyN+43caWD50ONlwL4xj0GSujXu0P8GsDLJhUleCmwAdox5DJLUrbEu71TV80neBXwJWAR8vKp2ncBDLniJqAPO0dw4T7NzjmY38TlK1QuW1CVJpyg/kStJHTH0Jakjp2To+1UPoyVZnuQrSXYn2ZXkxlY/J8m9SZ5o92dPeqyTlmRRkm8m+Vx77BwNSfLqJJ9K8u327+n3nKMXSvLX7Wft0SSfTPKySc/TKRf6Q1/18KfARcDbklw02VGdNJ4H3ltVrwPWAje0udkM7KyqlcDO9rh3NwK7hx47R7/qH4EvVtVvA69nMFfO0ZAkS4F3A6ur6hIGF69sYMLzdMqFPkNf9VBVPwUOf9VD96pqf1U91LafZfCDupTB/GxrzbYBV09kgCeJJMuAq4CPDZWdoybJWcAfArcDVNVPq+pHOEejnAacmeQ04OUMPpc00Xk6FUN/1Fc9LJ3QWE5aSVYAlwL3A+dX1X4Y/GIAzpvg0E4G/wD8LfDzoZpz9Eu/ARwE/rktgX0syStwjn5FVX0X+CDwFLAf+N+q+jITnqdTMfTn9FUPPUvySuDTwHuq6seTHs/JJMlbgANV9eCkx3ISOw34HeC2qroU+D86X8oZpa3VrwcuBF4DvCLJ2yc7qlMz9P2qhxkkOZ1B4H+iqj7Tys8kWdKeXwIcmNT4TgJvAP4syV4GS4NvTPIvOEfDpoHpqrq/Pf4Ug18CztGvehPwZFUdrKqfAZ8Bfp8Jz9OpGPp+1cNRJAmDddjdVfXhoad2ABvb9kbg7nGP7WRRVTdV1bKqWsHg385/VNXbcY5+oar+B3g6yW+10hUMvh7dOfpVTwFrk7y8/exdweB9tInO0yn5idwkb2awLnv4qx62THZEJ4ckfwD8F/AIv1yvfh+Ddf3twAUM/qFeU1U/mMggTyJJLgf+pqrekuTXcI5+IckqBm90vxT4DvCXDE4inaMhSf4O+AsGV859E/gr4JVMcJ5OydCXJI12Ki7vSJKOwtCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHfl/jibmGFHSarAAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "plt.hist(df_gene.tail(800).isna().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "998cb354",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(16335, 11069)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_gene.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4a00abfb",
   "metadata": {},
   "source": [
    "# Processing all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "1e7b3a17",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df_g=pd.read_csv('gene_top800.csv', index_col=0).T\n",
    "df_p=pd.read_csv('rppa_proc.csv', index_col=0).T\n",
    "df_m=pd.read_csv('methyl_top800.csv', index_col=0).T\n",
    "df_r=pd.read_csv('mirna_proc.csv', index_col=0).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "f8fac953",
   "metadata": {},
   "outputs": [],
   "source": [
    "def inds(x):\n",
    "    return \"_\".join(x.split('-')[1:4])\n",
    "df_g.index = df_g.index.map(inds)\n",
    "df_p.index = df_p.index.map(inds)\n",
    "df_m.index = df_m.index.map(inds)\n",
    "df_r.index = df_r.index.map(inds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "bb7bcf00",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Genes</th>\n",
       "      <th>hsa-let-7a-2-3p</th>\n",
       "      <th>hsa-let-7a-3p</th>\n",
       "      <th>hsa-let-7a-5p</th>\n",
       "      <th>hsa-let-7b-3p</th>\n",
       "      <th>hsa-let-7b-5p</th>\n",
       "      <th>hsa-let-7c-3p</th>\n",
       "      <th>hsa-let-7c-5p</th>\n",
       "      <th>hsa-let-7d-3p</th>\n",
       "      <th>hsa-let-7d-5p</th>\n",
       "      <th>hsa-let-7e-3p</th>\n",
       "      <th>...</th>\n",
       "      <th>hsa-miR-95-3p</th>\n",
       "      <th>hsa-miR-9-5p</th>\n",
       "      <th>hsa-miR-96-3p</th>\n",
       "      <th>hsa-miR-96-5p</th>\n",
       "      <th>hsa-miR-98-3p</th>\n",
       "      <th>hsa-miR-98-5p</th>\n",
       "      <th>hsa-miR-99a-3p</th>\n",
       "      <th>hsa-miR-99a-5p</th>\n",
       "      <th>hsa-miR-99b-3p</th>\n",
       "      <th>hsa-miR-99b-5p</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>C4_A0F6_01A</th>\n",
       "      <td>0.980776</td>\n",
       "      <td>32.785934</td>\n",
       "      <td>17787.06993</td>\n",
       "      <td>6.725320</td>\n",
       "      <td>6875.798894</td>\n",
       "      <td>2.241773</td>\n",
       "      <td>598.553468</td>\n",
       "      <td>497.533559</td>\n",
       "      <td>321.974688</td>\n",
       "      <td>34.046932</td>\n",
       "      <td>...</td>\n",
       "      <td>0.980776</td>\n",
       "      <td>86.588493</td>\n",
       "      <td>0.280222</td>\n",
       "      <td>93.874256</td>\n",
       "      <td>2.662106</td>\n",
       "      <td>188.869399</td>\n",
       "      <td>0.420332</td>\n",
       "      <td>148.377369</td>\n",
       "      <td>195.034276</td>\n",
       "      <td>78432.081350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CU_A0YO_01A</th>\n",
       "      <td>2.750555</td>\n",
       "      <td>62.595962</td>\n",
       "      <td>20816.36637</td>\n",
       "      <td>11.835721</td>\n",
       "      <td>6014.963519</td>\n",
       "      <td>6.001211</td>\n",
       "      <td>1424.704101</td>\n",
       "      <td>340.652060</td>\n",
       "      <td>168.700702</td>\n",
       "      <td>16.336629</td>\n",
       "      <td>...</td>\n",
       "      <td>3.334006</td>\n",
       "      <td>1624.244358</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>34.506962</td>\n",
       "      <td>1.083552</td>\n",
       "      <td>100.353580</td>\n",
       "      <td>1.333602</td>\n",
       "      <td>412.749939</td>\n",
       "      <td>23.588092</td>\n",
       "      <td>13279.762530</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BT_A0S7_01A</th>\n",
       "      <td>7.085729</td>\n",
       "      <td>64.480134</td>\n",
       "      <td>64187.20949</td>\n",
       "      <td>41.238943</td>\n",
       "      <td>34652.048900</td>\n",
       "      <td>20.548614</td>\n",
       "      <td>3788.172416</td>\n",
       "      <td>335.296694</td>\n",
       "      <td>250.409661</td>\n",
       "      <td>13.321170</td>\n",
       "      <td>...</td>\n",
       "      <td>3.401150</td>\n",
       "      <td>757.039282</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>11.904025</td>\n",
       "      <td>0.850287</td>\n",
       "      <td>76.100729</td>\n",
       "      <td>2.125719</td>\n",
       "      <td>694.826582</td>\n",
       "      <td>59.803552</td>\n",
       "      <td>29537.994880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CU_A0YR_01A</th>\n",
       "      <td>0.805977</td>\n",
       "      <td>17.731490</td>\n",
       "      <td>31852.20320</td>\n",
       "      <td>14.104594</td>\n",
       "      <td>7643.480998</td>\n",
       "      <td>0.805977</td>\n",
       "      <td>147.896743</td>\n",
       "      <td>534.362619</td>\n",
       "      <td>385.659900</td>\n",
       "      <td>5.641838</td>\n",
       "      <td>...</td>\n",
       "      <td>1.208965</td>\n",
       "      <td>686.289247</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>24.985281</td>\n",
       "      <td>0.402988</td>\n",
       "      <td>223.658562</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>11.686664</td>\n",
       "      <td>29.015165</td>\n",
       "      <td>13749.158260</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BL_A0C8_01A</th>\n",
       "      <td>0.803256</td>\n",
       "      <td>32.397995</td>\n",
       "      <td>36701.57350</td>\n",
       "      <td>18.207138</td>\n",
       "      <td>5972.744450</td>\n",
       "      <td>1.874264</td>\n",
       "      <td>220.627670</td>\n",
       "      <td>380.475629</td>\n",
       "      <td>318.357159</td>\n",
       "      <td>16.332874</td>\n",
       "      <td>...</td>\n",
       "      <td>3.213024</td>\n",
       "      <td>92.106697</td>\n",
       "      <td>0.267752</td>\n",
       "      <td>65.866998</td>\n",
       "      <td>1.606512</td>\n",
       "      <td>60.511958</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>34.004507</td>\n",
       "      <td>56.763430</td>\n",
       "      <td>36226.313650</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AG_3584_01A</th>\n",
       "      <td>3.262839</td>\n",
       "      <td>22.251037</td>\n",
       "      <td>23122.91869</td>\n",
       "      <td>20.626534</td>\n",
       "      <td>4760.875509</td>\n",
       "      <td>3.716744</td>\n",
       "      <td>599.077677</td>\n",
       "      <td>1011.271201</td>\n",
       "      <td>146.193566</td>\n",
       "      <td>4.721912</td>\n",
       "      <td>...</td>\n",
       "      <td>13.372890</td>\n",
       "      <td>10624.689830</td>\n",
       "      <td>0.301215</td>\n",
       "      <td>26.510692</td>\n",
       "      <td>0.987116</td>\n",
       "      <td>41.168575</td>\n",
       "      <td>1.073576</td>\n",
       "      <td>109.400542</td>\n",
       "      <td>14.142076</td>\n",
       "      <td>10462.448760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AG_3599_01A</th>\n",
       "      <td>2.002213</td>\n",
       "      <td>29.156045</td>\n",
       "      <td>17890.75858</td>\n",
       "      <td>28.611334</td>\n",
       "      <td>5660.590915</td>\n",
       "      <td>0.830712</td>\n",
       "      <td>222.252058</td>\n",
       "      <td>350.019624</td>\n",
       "      <td>125.544726</td>\n",
       "      <td>3.213865</td>\n",
       "      <td>...</td>\n",
       "      <td>25.697846</td>\n",
       "      <td>188.922382</td>\n",
       "      <td>0.791547</td>\n",
       "      <td>35.627721</td>\n",
       "      <td>0.467073</td>\n",
       "      <td>27.308137</td>\n",
       "      <td>0.144723</td>\n",
       "      <td>45.617460</td>\n",
       "      <td>15.948184</td>\n",
       "      <td>8293.054733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AG_3583_01A</th>\n",
       "      <td>0.675954</td>\n",
       "      <td>19.786044</td>\n",
       "      <td>16792.88093</td>\n",
       "      <td>22.441202</td>\n",
       "      <td>7572.840852</td>\n",
       "      <td>1.282126</td>\n",
       "      <td>143.450672</td>\n",
       "      <td>897.205621</td>\n",
       "      <td>76.690747</td>\n",
       "      <td>1.497562</td>\n",
       "      <td>...</td>\n",
       "      <td>15.811903</td>\n",
       "      <td>147.011379</td>\n",
       "      <td>-0.127951</td>\n",
       "      <td>10.352813</td>\n",
       "      <td>5.508178</td>\n",
       "      <td>51.327181</td>\n",
       "      <td>0.146054</td>\n",
       "      <td>28.874467</td>\n",
       "      <td>22.412294</td>\n",
       "      <td>6427.100677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AG_3598_01A</th>\n",
       "      <td>1.948157</td>\n",
       "      <td>25.893893</td>\n",
       "      <td>23153.80168</td>\n",
       "      <td>41.672361</td>\n",
       "      <td>6127.299751</td>\n",
       "      <td>3.081652</td>\n",
       "      <td>324.642671</td>\n",
       "      <td>498.491969</td>\n",
       "      <td>133.758828</td>\n",
       "      <td>4.168296</td>\n",
       "      <td>...</td>\n",
       "      <td>7.845190</td>\n",
       "      <td>172.341403</td>\n",
       "      <td>-0.127951</td>\n",
       "      <td>15.302715</td>\n",
       "      <td>1.541097</td>\n",
       "      <td>22.493043</td>\n",
       "      <td>0.351305</td>\n",
       "      <td>78.511647</td>\n",
       "      <td>26.015522</td>\n",
       "      <td>18854.100830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AG_3586_01A</th>\n",
       "      <td>1.542847</td>\n",
       "      <td>25.602462</td>\n",
       "      <td>16210.18450</td>\n",
       "      <td>14.560834</td>\n",
       "      <td>6729.794141</td>\n",
       "      <td>0.507704</td>\n",
       "      <td>174.197419</td>\n",
       "      <td>473.362477</td>\n",
       "      <td>100.068910</td>\n",
       "      <td>3.830974</td>\n",
       "      <td>...</td>\n",
       "      <td>5.946141</td>\n",
       "      <td>101.264741</td>\n",
       "      <td>-0.127951</td>\n",
       "      <td>41.509088</td>\n",
       "      <td>0.320099</td>\n",
       "      <td>28.040325</td>\n",
       "      <td>-0.194499</td>\n",
       "      <td>24.498010</td>\n",
       "      <td>22.437265</td>\n",
       "      <td>7005.341851</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10824 rows × 662 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Genes        hsa-let-7a-2-3p  hsa-let-7a-3p  hsa-let-7a-5p  hsa-let-7b-3p  \\\n",
       "C4_A0F6_01A         0.980776      32.785934    17787.06993       6.725320   \n",
       "CU_A0YO_01A         2.750555      62.595962    20816.36637      11.835721   \n",
       "BT_A0S7_01A         7.085729      64.480134    64187.20949      41.238943   \n",
       "CU_A0YR_01A         0.805977      17.731490    31852.20320      14.104594   \n",
       "BL_A0C8_01A         0.803256      32.397995    36701.57350      18.207138   \n",
       "...                      ...            ...            ...            ...   \n",
       "AG_3584_01A         3.262839      22.251037    23122.91869      20.626534   \n",
       "AG_3599_01A         2.002213      29.156045    17890.75858      28.611334   \n",
       "AG_3583_01A         0.675954      19.786044    16792.88093      22.441202   \n",
       "AG_3598_01A         1.948157      25.893893    23153.80168      41.672361   \n",
       "AG_3586_01A         1.542847      25.602462    16210.18450      14.560834   \n",
       "\n",
       "Genes        hsa-let-7b-5p  hsa-let-7c-3p  hsa-let-7c-5p  hsa-let-7d-3p  \\\n",
       "C4_A0F6_01A    6875.798894       2.241773     598.553468     497.533559   \n",
       "CU_A0YO_01A    6014.963519       6.001211    1424.704101     340.652060   \n",
       "BT_A0S7_01A   34652.048900      20.548614    3788.172416     335.296694   \n",
       "CU_A0YR_01A    7643.480998       0.805977     147.896743     534.362619   \n",
       "BL_A0C8_01A    5972.744450       1.874264     220.627670     380.475629   \n",
       "...                    ...            ...            ...            ...   \n",
       "AG_3584_01A    4760.875509       3.716744     599.077677    1011.271201   \n",
       "AG_3599_01A    5660.590915       0.830712     222.252058     350.019624   \n",
       "AG_3583_01A    7572.840852       1.282126     143.450672     897.205621   \n",
       "AG_3598_01A    6127.299751       3.081652     324.642671     498.491969   \n",
       "AG_3586_01A    6729.794141       0.507704     174.197419     473.362477   \n",
       "\n",
       "Genes        hsa-let-7d-5p  hsa-let-7e-3p  ...  hsa-miR-95-3p  hsa-miR-9-5p  \\\n",
       "C4_A0F6_01A     321.974688      34.046932  ...       0.980776     86.588493   \n",
       "CU_A0YO_01A     168.700702      16.336629  ...       3.334006   1624.244358   \n",
       "BT_A0S7_01A     250.409661      13.321170  ...       3.401150    757.039282   \n",
       "CU_A0YR_01A     385.659900       5.641838  ...       1.208965    686.289247   \n",
       "BL_A0C8_01A     318.357159      16.332874  ...       3.213024     92.106697   \n",
       "...                    ...            ...  ...            ...           ...   \n",
       "AG_3584_01A     146.193566       4.721912  ...      13.372890  10624.689830   \n",
       "AG_3599_01A     125.544726       3.213865  ...      25.697846    188.922382   \n",
       "AG_3583_01A      76.690747       1.497562  ...      15.811903    147.011379   \n",
       "AG_3598_01A     133.758828       4.168296  ...       7.845190    172.341403   \n",
       "AG_3586_01A     100.068910       3.830974  ...       5.946141    101.264741   \n",
       "\n",
       "Genes        hsa-miR-96-3p  hsa-miR-96-5p  hsa-miR-98-3p  hsa-miR-98-5p  \\\n",
       "C4_A0F6_01A       0.280222      93.874256       2.662106     188.869399   \n",
       "CU_A0YO_01A       0.000000      34.506962       1.083552     100.353580   \n",
       "BT_A0S7_01A       0.000000      11.904025       0.850287      76.100729   \n",
       "CU_A0YR_01A       0.000000      24.985281       0.402988     223.658562   \n",
       "BL_A0C8_01A       0.267752      65.866998       1.606512      60.511958   \n",
       "...                    ...            ...            ...            ...   \n",
       "AG_3584_01A       0.301215      26.510692       0.987116      41.168575   \n",
       "AG_3599_01A       0.791547      35.627721       0.467073      27.308137   \n",
       "AG_3583_01A      -0.127951      10.352813       5.508178      51.327181   \n",
       "AG_3598_01A      -0.127951      15.302715       1.541097      22.493043   \n",
       "AG_3586_01A      -0.127951      41.509088       0.320099      28.040325   \n",
       "\n",
       "Genes        hsa-miR-99a-3p  hsa-miR-99a-5p  hsa-miR-99b-3p  hsa-miR-99b-5p  \n",
       "C4_A0F6_01A        0.420332      148.377369      195.034276    78432.081350  \n",
       "CU_A0YO_01A        1.333602      412.749939       23.588092    13279.762530  \n",
       "BT_A0S7_01A        2.125719      694.826582       59.803552    29537.994880  \n",
       "CU_A0YR_01A        0.000000       11.686664       29.015165    13749.158260  \n",
       "BL_A0C8_01A        0.000000       34.004507       56.763430    36226.313650  \n",
       "...                     ...             ...             ...             ...  \n",
       "AG_3584_01A        1.073576      109.400542       14.142076    10462.448760  \n",
       "AG_3599_01A        0.144723       45.617460       15.948184     8293.054733  \n",
       "AG_3583_01A        0.146054       28.874467       22.412294     6427.100677  \n",
       "AG_3598_01A        0.351305       78.511647       26.015522    18854.100830  \n",
       "AG_3586_01A       -0.194499       24.498010       22.437265     7005.341851  \n",
       "\n",
       "[10824 rows x 662 columns]"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "f69d39fd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([], dtype='object')"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_g.index.intersection(df_p.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "569a6b9f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([], dtype='object')"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_g.index.intersection(df_m.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "6113b52a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['TCGA-02-0001-01C-01D-0186-05', 'TCGA-02-0003-01A-01D-0186-05',\n",
       "       'TCGA-02-0006-01B-01D-0186-05', 'TCGA-02-0007-01A-01D-0186-05',\n",
       "       'TCGA-02-0009-01A-01D-0186-05', 'TCGA-02-0010-01A-01D-0186-05',\n",
       "       'TCGA-02-0011-01B-01D-0186-05', 'TCGA-02-0014-01A-01D-0186-05',\n",
       "       'TCGA-02-0021-01A-01D-0186-05', 'TCGA-02-0024-01B-01D-0186-05',\n",
       "       ...\n",
       "       'TCGA-W5-AA2R-11A-11D-A418-05', 'TCGA-W5-AA2U-11A-11D-A418-05',\n",
       "       'TCGA-W5-AA2X-11A-11D-A418-05', 'TCGA-W5-AA30-11A-11D-A418-05',\n",
       "       'TCGA-W5-AA31-11A-11D-A418-05', 'TCGA-W5-AA34-11A-11D-A418-05',\n",
       "       'TCGA-X7-A8D6-11A-22D-A424-05', 'TCGA-X7-A8D7-11A-11D-A424-05',\n",
       "       'TCGA-YB-A89D-11A-11D-A368-05', 'TCGA-ZU-A8S4-11A-11D-A418-05'],\n",
       "      dtype='object', length=12039)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_m.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "3d83cb7b",
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "'builtin_function_or_method' object is not iterable",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Input \u001b[0;32mIn [53]\u001b[0m, in \u001b[0;36m<cell line: 5>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m#t_m=['-'.join(x.split('-')[1:4]) for x in df_m.index]\u001b[39;00m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;66;03m#t_g=['-'.join(x.split('-')[1:4]) for x in df_g.index]\u001b[39;00m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m#t_p=['-'.join(x.split('-')[1:4]) for x in df_p.index]\u001b[39;00m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;66;03m#t_r=['-'.join(x.split('-')[1:4]) for x in df_r.index]\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(df_m\u001b[38;5;241m.\u001b[39mindex\u001b[38;5;241m.\u001b[39mto_list()) \u001b[38;5;241m&\u001b[39m \u001b[38;5;28;43mset\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mt_g\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;241m&\u001b[39m \u001b[38;5;28mset\u001b[39m(t_p\u001b[38;5;241m.\u001b[39mindex) \u001b[38;5;241m&\u001b[39m \u001b[38;5;28mset\u001b[39m(t_r\u001b[38;5;241m.\u001b[39mindex))\n",
      "\u001b[0;31mTypeError\u001b[0m: 'builtin_function_or_method' object is not iterable"
     ]
    }
   ],
   "source": [
    "#t_m=['-'.join(x.split('-')[1:4]) for x in df_m.index]\n",
    "#t_g=['-'.join(x.split('-')[1:4]) for x in df_g.index]\n",
    "#t_p=['-'.join(x.split('-')[1:4]) for x in df_p.index]\n",
    "#t_r=['-'.join(x.split('-')[1:4]) for x in df_r.index]\n",
    "len(set(df_m.index.to_list()) & set(t_g.index) & set(t_p.index) & set(t_r.index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "4ecdcc87",
   "metadata": {},
   "outputs": [],
   "source": [
    "joint_idx = sorted(list(set(df_m.index.to_list()) & set(df_g.index.to_list()) & set(df_p.index.to_list()) & set(df_r.index.to_list())))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "2a3864ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_m=df_m.loc[joint_idx].reset_index().drop_duplicates(subset='index', keep='last').set_index('index')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "cafa2269",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_g=df_g.loc[joint_idx].reset_index().drop_duplicates(subset='index', keep='last').set_index('index')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "62ddb0f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_p=df_p.loc[joint_idx].reset_index().drop_duplicates(subset='index', keep='last').set_index('index')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "016717ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_r=df_r.loc[joint_idx].reset_index().drop_duplicates(subset='index', keep='last').set_index('index')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "5c03f645",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_m.to_csv('unnorm_m.csv')\n",
    "df_g.to_csv('unnorm_g.csv')\n",
    "df_p.to_csv('unnorm_p.csv')\n",
    "df_r.to_csv('unnorm_r.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "d5f0bff2",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_m=(df_m-df_m.mean())/df_m.std()\n",
    "df_g=(df_g-df_g.mean())/df_g.std()\n",
    "df_p=(df_p-df_p.mean())/df_p.std()\n",
    "df_r=(df_r-df_r.mean())/df_r.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "9e6477f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_m.to_csv('norm_m.csv')\n",
    "df_g.to_csv('norm_g.csv')\n",
    "df_p.to_csv('norm_p.csv')\n",
    "df_r.to_csv('norm_r.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c5a73744",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df_m = pd.read_csv('norm_m.csv', index_col=0)\n",
    "df_g = pd.read_csv('norm_g.csv', index_col=0)\n",
    "df_p = pd.read_csv('norm_p.csv', index_col=0)\n",
    "df_r = pd.read_csv('norm_r.csv', index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "82e6e7cb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14.856979902726602\n",
      "83.72562760034862\n",
      "3.70456583272228\n",
      "82.32616281526866\n",
      "-10.858894138207347\n",
      "-2.57937893807929\n",
      "-3.5166882006545452\n",
      "-1.7276260399214667\n"
     ]
    }
   ],
   "source": [
    "print(df_p.max().max())\n",
    "print(df_g.max().max())\n",
    "print(df_m.max().max())\n",
    "print(df_r.max().max())\n",
    "print(df_p.min().min())\n",
    "print(df_g.min().min())\n",
    "print(df_m.min().min())\n",
    "print(df_r.min().min())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "5ede26aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_p = df_p.fillna(-10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "c657e81c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "0\n",
      "0.0\n",
      "0\n"
     ]
    }
   ],
   "source": [
    "print(df_m.isnull().sum().sum())\n",
    "print(df_g.isnull().sum().sum())\n",
    "print(df_p.isnull().sum().sum()/df_p.size)\n",
    "print(df_r.isnull().sum().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "4595248d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generator():\n",
    "    total = df_m.shape[0]\n",
    "    for idx in range(total):\n",
    "        i = df_m.index[idx]\n",
    "        m = df_m.iloc[idx].values.tolist()\n",
    "        g = df_g.iloc[idx].values.tolist()\n",
    "        p = df_p.iloc[idx].values.tolist()\n",
    "        r = df_r.iloc[idx].values.tolist()\n",
    "        yield {\"tcga_index\":i, \"gene\": {\"values\": g},\n",
    "              \"protein\": {\"values\": p},\n",
    "              \"methylation\": {\"values\":m},\n",
    "              \"mirna\": {\"values\": r}}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "5a76f99c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "390cc732",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c56fb3ac7d954d3b8113dbd653d6c9e9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split: 0 examples [00:00, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ds = datasets.Dataset.from_generator(generator)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "2dd36cab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ced4f3205ca34c2985c74c29d14137fa",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/7017 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ds.save_to_disk('tcga_dataset')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "cc1761bb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gene:800\n",
      "protein:198\n",
      "methylation:800\n",
      "mirna:662\n"
     ]
    }
   ],
   "source": [
    "for col in list(ds[0].keys())[1:]:\n",
    "    print(f\"{col}:{len(ds[0][col]['data'])}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "635c25a5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['gene', 'protein', 'methylation', 'mirna']"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(ds[0].keys())[1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "9e063b9d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7017"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "id": "a5d7dcb9",
   "metadata": {},
   "outputs": [],
   "source": [
    "#!pip install openpyxl\n",
    "#!ls\n",
    "df=pd.read_excel('TCGA-CDR-SupplementalTableS1.xlsx', index_col=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "b8091451",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['Unnamed: 0', 'type', 'age_at_initial_pathologic_diagnosis', 'gender',\n",
      "       'race', 'ajcc_pathologic_tumor_stage', 'clinical_stage',\n",
      "       'histological_type', 'histological_grade', 'initial_pathologic_dx_year',\n",
      "       'menopause_status', 'birth_days_to', 'vital_status', 'tumor_status',\n",
      "       'last_contact_days_to', 'death_days_to', 'cause_of_death',\n",
      "       'new_tumor_event_type', 'new_tumor_event_site',\n",
      "       'new_tumor_event_site_other', 'new_tumor_event_dx_days_to',\n",
      "       'treatment_outcome_first_course', 'margin_status', 'residual_tumor',\n",
      "       'OS', 'OS.time', 'DSS', 'DSS.time', 'DFI', 'DFI.time', 'PFI',\n",
      "       'PFI.time', 'Redaction'],\n",
      "      dtype='object')\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:ylabel='Frequency'>"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAATW0lEQVR4nO3dfbBcd33f8fcHGfwAdrBr2RWSHNkdhURmEj8IxxTaAk5ixzTYtKUV08RuSqKUmBlImGkkkil0OppxOjwknhQHUVxsAhjxaBVwE9tlwmQGLK6JE0mWVSuxY1+kWErIxA5lDDbf/rG/G2+l1T17rbt39977fs3s7DnfPWf3+5ur8cfnnN+eTVUhSdJsnjfuBiRJk8+wkCR1MiwkSZ0MC0lSJ8NCktTppHE3MCpnn312rVu3btxtSNKict999/1VVa08ur5kw2LdunVMTU2Nuw1JWlSS/MWguqehJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ2W7De4T8S6LV8cy+c+cuPrxvK5ktTFIwtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktRpZGGRZG2SLyfZl2Rvkre1+ruTfDPJ/e1xdd8+W5McSLI/yZV99UuT7G6v3ZQko+pbknSsUd4b6mngHVX1jSSnA/cluau99v6qek//xkk2AJuAC4GXAHcn+aGqega4GdgMfA34EnAVcOcIe5ck9RnZkUVVHaqqb7TlJ4F9wOpZdrkGuL2qnqqqh4EDwGVJVgFnVNVXq6qA24BrR9W3JOlYC3LNIsk64GLg3lZ6a5I/TXJLkjNbbTXwWN9u0622ui0fXR/0OZuTTCWZOnLkyHwOQZKWtZGHRZIXAZ8B3l5VT9A7pfSPgIuAQ8B7ZzYdsHvNUj+2WLW9qjZW1caVK1eeaOuSpGakYZHk+fSC4mNV9VmAqnq8qp6pqu8DHwIua5tPA2v7dl8DHGz1NQPqkqQFMsrZUAE+DOyrqvf11Vf1bfYGYE9b3glsSnJykvOB9cCuqjoEPJnk8vae1wF3jKpvSdKxRjkb6pXAzwG7k9zfau8E3pTkInqnkh4BfgmgqvYm2QE8QG8m1Q1tJhTAW4CPAKfSmwXlTChJWkAjC4uq+iMGX2/40iz7bAO2DahPAS+bv+4kSXPhN7glSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdRhYWSdYm+XKSfUn2Jnlbq5+V5K4kD7XnM/v22ZrkQJL9Sa7sq1+aZHd77aYkGVXfkqRjjfLI4mngHVX1I8DlwA1JNgBbgHuqaj1wT1unvbYJuBC4CvhAkhXtvW4GNgPr2+OqEfYtSTrKyMKiqg5V1Tfa8pPAPmA1cA1wa9vsVuDatnwNcHtVPVVVDwMHgMuSrALOqKqvVlUBt/XtI0laAAtyzSLJOuBi4F7g3Ko6BL1AAc5pm60GHuvbbbrVVrflo+uDPmdzkqkkU0eOHJnXMUjScjbysEjyIuAzwNur6onZNh1Qq1nqxxartlfVxqrauHLlyrk3K0kaaKRhkeT59ILiY1X12VZ+vJ1aoj0fbvVpYG3f7muAg62+ZkBdkrRARjkbKsCHgX1V9b6+l3YC17fl64E7+uqbkpyc5Hx6F7J3tVNVTya5vL3ndX37SJIWwEkjfO9XAj8H7E5yf6u9E7gR2JHkzcCjwBsBqmpvkh3AA/RmUt1QVc+0/d4CfAQ4FbizPSRJC2RkYVFVf8Tg6w0AVxxnn23AtgH1KeBl89edJGku/Aa3JKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSp01BhkcRfqZOkZWzYI4vfTbIryS8nefEoG5IkTZ6hwqKqXgX8W2AtMJXk40l+cqSdSZImxtDXLKrqIeA3gF8D/hlwU5IHk/yLUTUnSZoMw16z+NEk7wf2Aa8FfqaqfqQtv3+E/UmSJsBJQ273O8CHgHdW1XdmilV1MMlvjKQzSdLEGDYsrga+U1XPACR5HnBKVf3fqvroyLqTJE2EYa9Z3A2c2rd+WqtJkpaBYcPilKr6u5mVtnzaaFqSJE2aYcPi20kumVlJcinwnVm2lyQtIcNes3g78KkkB9v6KuDfjKQjSdLEGSosqurrSX4YeCkQ4MGq+t5IO5MkTYxhjywAXg6sa/tcnISqum0kXUmSJsqwX8r7KPAe4FX0QuPlwMaOfW5JcjjJnr7au5N8M8n97XF132tbkxxIsj/JlX31S5Psbq/dlCRzHKMk6QQNe2SxEdhQVTWH9/4IvS/zHX308f6qek9/IckGYBNwIfAS4O4kP9S+13EzsBn4GvAl4Crgzjn0IUk6QcPOhtoD/MO5vHFVfQX41pCbXwPcXlVPVdXDwAHgsiSrgDOq6qstqG4Drp1LH5KkEzfskcXZwANJdgFPzRSr6vXP4TPfmuQ6YAp4R1X9DbCa3pHDjOlW+15bPro+UJLN9I5COO+8855Da5KkQYYNi3fP0+fdDPwXoNrze4F/T2+G1dFqlvpAVbUd2A6wcePGuZwykyTNYtips3+Y5AeB9VV1d5LTgBVz/bCqenxmOcmHgC+01Wl6v5UxYw1wsNXXDKhLkhbQsLOhfhH4NPDBVloNfH6uH9auQcx4A71rIQA7gU1JTk5yPrAe2FVVh4Ank1zeZkFdB9wx18+VJJ2YYU9D3QBcBtwLvR9CSnLObDsk+QTwauDsJNPAu4BXJ7mI3qmkR4Bfau+3N8kO4AHgaeCGmTvcAm+hN7PqVHqzoJwJJUkLbNiweKqqvjvzFYckJzHLtQOAqnrTgPKHZ9l+G7BtQH0KeNmQfUqSRmDYqbN/mOSdwKntt7c/BfzP0bUlSZokw4bFFuAIsJveqaMv0fs9bknSMjDsbKjv0/tZ1Q+Nth1J0iQaKiySPMyAaxRVdcG8dyRJmjhzuTfUjFOANwJnzX87kqRJNNQ1i6r6677HN6vqt4DXjrY1SdKkGPY01CV9q8+jd6Rx+kg6kiRNnGFPQ723b/lpel+o+9fz3o0kaSINOxvqNaNuRJI0uYY9DfWrs71eVe+bn3YkSZNoLrOhXk7vhn8APwN8BXhsFE1JkibLXH786JKqehJ6v6UNfKqqfmFUjUmSJsewt/s4D/hu3/p3gXXz3o0kaSINe2TxUWBXks/R+yb3G+j9HrYkaRkYdjbUtiR3Av+klX6+qv54dG1JkibJsKehAE4Dnqiq3wam2y/aSZKWgWF/VvVdwK8BW1vp+cDvjaopSdJkGfbI4g3A64FvA1TVQbzdhyQtG8OGxXerqmi3KU/ywtG1JEmaNMOGxY4kHwRenOQXgbvxh5AkadnonA2VJMAngR8GngBeCvynqrprxL1JkiZEZ1hUVSX5fFVdChgQkrQMDXsa6mtJXj7STiRJE2vYb3C/BvgPSR6hNyMq9A46fnRUjUmSJsesYZHkvKp6FPjpBepHkjSBuo4sPk/vbrN/keQzVfUvF6AnSdKE6bpmkb7lC0bZiCRpcnWFRR1nWZK0jHSdhvqxJE/QO8I4tS3Dsxe4zxhpd5KkiTDrkUVVraiqM6rq9Ko6qS3PrM8aFEluSXI4yZ6+2llJ7kryUHs+s++1rUkOJNmf5Mq++qVJdrfXbmpfEpQkLaC53KJ8rj4CXHVUbQtwT1WtB+5p6yTZAGwCLmz7fCDJirbPzcBmYH17HP2ekqQRG1lYVNVXgG8dVb4GuLUt3wpc21e/vaqeqqqHgQPAZUlWAWdU1VfbjQxv69tHkrRARnlkMci5VXUIoD2f0+qrgcf6tptutdVt+ei6JGkBLXRYHM+g6xA1S33wmySbk0wlmTpy5Mi8NSdJy91Ch8Xj7dQS7flwq08Da/u2WwMcbPU1A+oDVdX2qtpYVRtXrlw5r41L0nK20GGxE7i+LV8P3NFX35Tk5Pbb3uuBXe1U1ZNJLm+zoK7r20eStECGvZHgnCX5BPBq4Owk08C7gBvp/ZDSm4FHgTcCVNXeJDuAB4CngRuq6pn2Vm+hN7PqVODO9pAkLaCRhUVVvek4L11xnO23AdsG1KeAl81ja5KkOZqUC9ySpAlmWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE4njbsBPWvdli+O7bMfufF1Y/tsSZPPIwtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1GksYZHkkSS7k9yfZKrVzkpyV5KH2vOZfdtvTXIgyf4kV46jZ0lazsZ5ZPGaqrqoqja29S3APVW1HrinrZNkA7AJuBC4CvhAkhXjaFiSlqtJOg11DXBrW74VuLavfntVPVVVDwMHgMsWvj1JWr7GFRYF/EGS+5JsbrVzq+oQQHs+p9VXA4/17TvdasdIsjnJVJKpI0eOjKh1SVp+xnW7j1dW1cEk5wB3JXlwlm0zoFaDNqyq7cB2gI0bNw7cRpI0d2M5sqiqg+35MPA5eqeVHk+yCqA9H26bTwNr+3ZfAxxcuG4lSQseFklemOT0mWXgp4A9wE7g+rbZ9cAdbXknsCnJyUnOB9YDuxa2a0la3sZxGupc4HNJZj7/41X1v5J8HdiR5M3Ao8AbAapqb5IdwAPA08ANVfXMGPqWpGVrwcOiqv4c+LEB9b8GrjjOPtuAbSNuTZJ0HJM0dVaSNKEMC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUacF/g1uTad2WL47lcx+58XVj+VxJc+ORhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkTn4pT2M1ri8Dgl8IlObCIwtJUqdFExZJrkqyP8mBJFvG3Y8kLSeL4jRUkhXAfwN+EpgGvp5kZ1U9MN7OtJh5PyxpeIsiLIDLgANV9ecASW4HrgEMCy0647xOMy4G5OK3WMJiNfBY3/o08ONHb5RkM7C5rf5dkv3P8fPOBv7qOe67GDi+xW3RjS+/OafNF9345mjSx/eDg4qLJSwyoFbHFKq2A9tP+MOSqaraeKLvM6kc3+Lm+Ba3xTq+xXKBexpY27e+Bjg4pl4kadlZLGHxdWB9kvOTvADYBOwcc0+StGwsitNQVfV0krcCvw+sAG6pqr0j/MgTPpU14Rzf4ub4FrdFOb5UHXPqX5Kk/89iOQ0lSRojw0KS1Mmw6LNYbymSZG2SLyfZl2Rvkre1+llJ7kryUHs+s2+frW2c+5Nc2Ve/NMnu9tpNSQZNWx6LJCuS/HGSL7T1JTO+JC9O8ukkD7a/4yuW2Ph+pf3b3JPkE0lOWczjS3JLksNJ9vTV5m08SU5O8slWvzfJugUd4CBV5aN33WYF8GfABcALgD8BNoy7ryF7XwVc0pZPB/4PsAH4r8CWVt8C/GZb3tDGdzJwfhv3ivbaLuAV9L7bcifw0+MeX984fxX4OPCFtr5kxgfcCvxCW34B8OKlMj56X6p9GDi1re8A/t1iHh/wT4FLgD19tXkbD/DLwO+25U3AJ8f+dxx3A5PyaH+w3+9b3wpsHXdfz3Esd9C7j9Z+YFWrrQL2DxobvVlmr2jbPNhXfxPwwXGPp/WyBrgHeC3PhsWSGB9wRvuPaY6qL5XxzdyB4Sx6MzC/APzUYh8fsO6osJi38cxs05ZPoveN74xqLMM8PA31rEG3FFk9pl6es3a4ejFwL3BuVR0CaM/ntM2ON9bVbfno+iT4LeA/At/vqy2V8V0AHAH+RzvN9t+TvJAlMr6q+ibwHuBR4BDwt1X1ByyR8fWZz/H8/T5V9TTwt8A/GFnnQzAsnjXULUUmWZIXAZ8B3l5VT8y26YBazVIfqyT/HDhcVfcNu8uA2sSOj97/OV4C3FxVFwPfpnca43gW1fjauftr6J2CeQnwwiQ/O9suA2oTO74hPJfxTNxYDYtnLepbiiR5Pr2g+FhVfbaVH0+yqr2+Cjjc6scb63RbPro+bq8EXp/kEeB24LVJfo+lM75pYLqq7m3rn6YXHktlfD8BPFxVR6rqe8BngX/M0hnfjPkcz9/vk+Qk4AeAb42s8yEYFs9atLcUaTMoPgzsq6r39b20E7i+LV9P71rGTH1Tm3FxPrAe2NUOnZ9Mcnl7z+v69hmbqtpaVWuqah29v8v/rqqfZemM7y+Bx5K8tJWuoHf7/SUxPnqnny5Pclrr6wpgH0tnfDPmczz97/Wv6P2bH+9R1DgvmEzaA7ia3kyiPwN+fdz9zKHvV9E7RP1T4P72uJreOc57gIfa81l9+/x6G+d++maUABuBPe2132HMF9UGjPXVPHuBe8mMD7gImGp/w88DZy6x8f1n4MHW20fpzQxatOMDPkHv+sv36B0FvHk+xwOcAnwKOEBvxtQF4/4bersPSVInT0NJkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSp0/8DGzBLb7lnNT4AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "print(df.columns)\n",
    "df.death_days_to.plot.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "bf2bf117",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 754.,   nan,   53., ..., 1089., 4269., 2553.])"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['DFI.time'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "8370be2d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.index = df.index.map(lambda x: \"_\".join(x.split('-')[1:]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "1442902e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'04_1348_01A'"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ds[0]['tcga_index']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "id": "bcc275e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "du={idx:name for idx,name in enumerate(df['type'].unique())}\n",
    "dud = {v:k for k,v in du.items()}\n",
    "pd.DataFrame.from_dict(du, orient=\"index\").to_csv(\"cancer_type.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "id": "59893048",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ACC': 0,\n",
       " 'BLCA': 1,\n",
       " 'BRCA': 2,\n",
       " 'CESC': 3,\n",
       " 'CHOL': 4,\n",
       " 'COAD': 5,\n",
       " 'DLBC': 6,\n",
       " 'ESCA': 7,\n",
       " 'GBM': 8,\n",
       " 'HNSC': 9,\n",
       " 'KICH': 10,\n",
       " 'KIRC': 11,\n",
       " 'KIRP': 12,\n",
       " 'LAML': 13,\n",
       " 'LGG': 14,\n",
       " 'LIHC': 15,\n",
       " 'LUAD': 16,\n",
       " 'LUSC': 17,\n",
       " 'MESO': 18,\n",
       " 'OV': 19,\n",
       " 'PAAD': 20,\n",
       " 'PCPG': 21,\n",
       " 'PRAD': 22,\n",
       " 'READ': 23,\n",
       " 'SARC': 24,\n",
       " 'SKCM': 25,\n",
       " 'STAD': 26,\n",
       " 'TGCT': 27,\n",
       " 'THCA': 28,\n",
       " 'THYM': 29,\n",
       " 'UCEC': 30,\n",
       " 'UCS': 31,\n",
       " 'UVM': 32}"
      ]
     },
     "execution_count": 178,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dud"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "id": "49a23582",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "62cca13d976a48f99ef1805a48f580e4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/7017 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import numpy as np\n",
    "def map_labels(batch):\n",
    "    #print(batch)\n",
    "    match_labels = \"_\".join(batch['tcga_index'].split('_')[:-1]) #for x in batch['tcga_index']\n",
    "    y1 = df.type.get(match_labels, None) #df.loc[match_labels]['type']# for x in match_labels]\n",
    "    if y1:\n",
    "        x = np.zeros(len(dud))\n",
    "        x[dud[y1]]=1\n",
    "        y1=x.tolist() #dud[y1]\n",
    "    else:\n",
    "        y1=np.zeros(len(dud))\n",
    "    y2 = df.death_days_to.get(match_labels, None)\n",
    "    return {\"Labels\": {\"data\": y1}}#, \"ttd\": y2}}\n",
    "    #return y\n",
    "ds=ds.map(map_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "08ccd285",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len([x for x in ds['cancer_type'] if x ==\"NaN\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "id": "01e1c3b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = ds.rename_column('cancer_type',\"Labels\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "id": "6a49d071",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6ce4fae95f7b4797b2403f7765c5696d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/7017 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ds.save_to_disk('/shared/tcga_dataset')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "id": "a6880219",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dataset({\n",
       "    features: ['tcga_index', 'gene', 'protein', 'methylation', 'mirna', 'Labels', 'ttd'],\n",
       "    num_rows: 7017\n",
       "})"
      ]
     },
     "execution_count": 177,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "74331193",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'cancer_type': 19, 'ttd': 1483.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1348.0},\n",
       " {'cancer_type': 19, 'ttd': 1720.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 121.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 303.0},\n",
       " {'cancer_type': 16, 'ttd': 731.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 244.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 274.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 457.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 275.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 2717.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1069.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 637.0},\n",
       " {'cancer_type': 19, 'ttd': 1123.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1399.0},\n",
       " {'cancer_type': 19, 'ttd': 2182.0},\n",
       " {'cancer_type': 19, 'ttd': 2012.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1484.0},\n",
       " {'cancer_type': 19, 'ttd': 868.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 531.0},\n",
       " {'cancer_type': 19, 'ttd': 2648.0},\n",
       " {'cancer_type': 19, 'ttd': 2553.0},\n",
       " {'cancer_type': 19, 'ttd': 2012.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1039.0},\n",
       " {'cancer_type': 19, 'ttd': 1993.0},\n",
       " {'cancer_type': 19, 'ttd': 1650.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 965.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 962.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 329.0},\n",
       " {'cancer_type': 17, 'ttd': 669.0},\n",
       " {'cancer_type': 17, 'ttd': 53.0},\n",
       " {'cancer_type': 17, 'ttd': 623.0},\n",
       " {'cancer_type': 17, 'ttd': 17.0},\n",
       " {'cancer_type': 17, 'ttd': 974.0},\n",
       " {'cancer_type': 17, 'ttd': 587.0},\n",
       " {'cancer_type': 17, 'ttd': 291.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 1975.0},\n",
       " {'cancer_type': 17, 'ttd': 1933.0},\n",
       " {'cancer_type': 17, 'ttd': 445.0},\n",
       " {'cancer_type': 17, 'ttd': 1346.0},\n",
       " {'cancer_type': 17, 'ttd': 24.0},\n",
       " {'cancer_type': 17, 'ttd': 2625.0},\n",
       " {'cancer_type': 17, 'ttd': 2170.0},\n",
       " {'cancer_type': 17, 'ttd': 357.0},\n",
       " {'cancer_type': 17, 'ttd': 573.0},\n",
       " {'cancer_type': 17, 'ttd': 916.0},\n",
       " {'cancer_type': 17, 'ttd': 1912.0},\n",
       " {'cancer_type': 17, 'ttd': 1713.0},\n",
       " {'cancer_type': 17, 'ttd': 506.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1562.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1189.0},\n",
       " {'cancer_type': 19, 'ttd': 1018.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 2342.0},\n",
       " {'cancer_type': 19, 'ttd': 1646.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 863.0},\n",
       " {'cancer_type': 19, 'ttd': 568.0},\n",
       " {'cancer_type': 19, 'ttd': 1324.0},\n",
       " {'cancer_type': 19, 'ttd': 260.0},\n",
       " {'cancer_type': 19, 'ttd': 2218.0},\n",
       " {'cancer_type': 19, 'ttd': 3224.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 676.0},\n",
       " {'cancer_type': 19, 'ttd': 820.0},\n",
       " {'cancer_type': 19, 'ttd': 1746.0},\n",
       " {'cancer_type': 19, 'ttd': 1955.0},\n",
       " {'cancer_type': 19, 'ttd': 493.0},\n",
       " {'cancer_type': 19, 'ttd': 1721.0},\n",
       " {'cancer_type': 19, 'ttd': 1249.0},\n",
       " {'cancer_type': 19, 'ttd': 1579.0},\n",
       " {'cancer_type': 19, 'ttd': 1259.0},\n",
       " {'cancer_type': 19, 'ttd': 1767.0},\n",
       " {'cancer_type': 19, 'ttd': 2692.0},\n",
       " {'cancer_type': 19, 'ttd': 2148.0},\n",
       " {'cancer_type': 19, 'ttd': 1213.0},\n",
       " {'cancer_type': 19, 'ttd': 594.0},\n",
       " {'cancer_type': 19, 'ttd': 1341.0},\n",
       " {'cancer_type': 19, 'ttd': 1384.0},\n",
       " {'cancer_type': 19, 'ttd': 1451.0},\n",
       " {'cancer_type': 19, 'ttd': 787.0},\n",
       " {'cancer_type': 19, 'ttd': 524.0},\n",
       " {'cancer_type': 19, 'ttd': 2742.0},\n",
       " {'cancer_type': 19, 'ttd': 2688.0},\n",
       " {'cancer_type': 19, 'ttd': 1163.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 919.0},\n",
       " {'cancer_type': 19, 'ttd': 2467.0},\n",
       " {'cancer_type': 19, 'ttd': 4624.0},\n",
       " {'cancer_type': 19, 'ttd': 1364.0},\n",
       " {'cancer_type': 19, 'ttd': 1769.0},\n",
       " {'cancer_type': 19, 'ttd': 1059.0},\n",
       " {'cancer_type': 19, 'ttd': 3337.0},\n",
       " {'cancer_type': 19, 'ttd': 562.0},\n",
       " {'cancer_type': 19, 'ttd': 1354.0},\n",
       " {'cancer_type': 19, 'ttd': 1736.0},\n",
       " {'cancer_type': 19, 'ttd': 24.0},\n",
       " {'cancer_type': 19, 'ttd': 11.0},\n",
       " {'cancer_type': 19, 'ttd': 1446.0},\n",
       " {'cancer_type': 19, 'ttd': 962.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 25.0},\n",
       " {'cancer_type': 19, 'ttd': 2049.0},\n",
       " {'cancer_type': 19, 'ttd': 1102.0},\n",
       " {'cancer_type': 19, 'ttd': 506.0},\n",
       " {'cancer_type': 19, 'ttd': 1699.0},\n",
       " {'cancer_type': 19, 'ttd': 1620.0},\n",
       " {'cancer_type': 19, 'ttd': 31.0},\n",
       " {'cancer_type': 19, 'ttd': 820.0},\n",
       " {'cancer_type': 19, 'ttd': 1004.0},\n",
       " {'cancer_type': 19, 'ttd': 1583.0},\n",
       " {'cancer_type': 19, 'ttd': 1279.0},\n",
       " {'cancer_type': 19, 'ttd': 1064.0},\n",
       " {'cancer_type': 19, 'ttd': 1977.0},\n",
       " {'cancer_type': 19, 'ttd': 1155.0},\n",
       " {'cancer_type': 19, 'ttd': 1033.0},\n",
       " {'cancer_type': 19, 'ttd': 91.0},\n",
       " {'cancer_type': 19, 'ttd': 395.0},\n",
       " {'cancer_type': 19, 'ttd': 1249.0},\n",
       " {'cancer_type': 19, 'ttd': 2009.0},\n",
       " {'cancer_type': 19, 'ttd': 457.0},\n",
       " {'cancer_type': 19, 'ttd': 565.0},\n",
       " {'cancer_type': 19, 'ttd': 840.0},\n",
       " {'cancer_type': 19, 'ttd': 518.0},\n",
       " {'cancer_type': 19, 'ttd': 394.0},\n",
       " {'cancer_type': 19, 'ttd': 627.0},\n",
       " {'cancer_type': 19, 'ttd': 1162.0},\n",
       " {'cancer_type': 19, 'ttd': 9.0},\n",
       " {'cancer_type': 19, 'ttd': 1799.0},\n",
       " {'cancer_type': 19, 'ttd': 1891.0},\n",
       " {'cancer_type': 19, 'ttd': 1583.0},\n",
       " {'cancer_type': 19, 'ttd': 396.0},\n",
       " {'cancer_type': 19, 'ttd': 1492.0},\n",
       " {'cancer_type': 19, 'ttd': 1157.0},\n",
       " {'cancer_type': 19, 'ttd': 92.0},\n",
       " {'cancer_type': 19, 'ttd': 365.0},\n",
       " {'cancer_type': 19, 'ttd': 1369.0},\n",
       " {'cancer_type': 19, 'ttd': 608.0},\n",
       " {'cancer_type': 19, 'ttd': 1278.0},\n",
       " {'cancer_type': 19, 'ttd': 90.0},\n",
       " {'cancer_type': 19, 'ttd': 883.0},\n",
       " {'cancer_type': 19, 'ttd': 821.0},\n",
       " {'cancer_type': 19, 'ttd': 2400.0},\n",
       " {'cancer_type': 19, 'ttd': 1448.0},\n",
       " {'cancer_type': 19, 'ttd': 1470.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1187.0},\n",
       " {'cancer_type': 19, 'ttd': 1229.0},\n",
       " {'cancer_type': 19, 'ttd': 1032.0},\n",
       " {'cancer_type': 19, 'ttd': 949.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1106.0},\n",
       " {'cancer_type': 19, 'ttd': 515.0},\n",
       " {'cancer_type': 19, 'ttd': 728.0},\n",
       " {'cancer_type': 19, 'ttd': 1815.0},\n",
       " {'cancer_type': 19, 'ttd': 555.0},\n",
       " {'cancer_type': 19, 'ttd': 951.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 528.0},\n",
       " {'cancer_type': 19, 'ttd': 2634.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1104.0},\n",
       " {'cancer_type': 19, 'ttd': 2621.0},\n",
       " {'cancer_type': 19, 'ttd': 2621.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1088.0},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 1, 'ttd': 734.0},\n",
       " {'cancer_type': 1, 'ttd': nan},\n",
       " {'cancer_type': 1, 'ttd': 3183.0},\n",
       " {'cancer_type': 1, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': 618.0},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': 6972.0},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': 7, 'ttd': 784.0},\n",
       " {'cancer_type': 7, 'ttd': 435.0},\n",
       " {'cancer_type': 7, 'ttd': 180.0},\n",
       " {'cancer_type': 7, 'ttd': 128.0},\n",
       " {'cancer_type': 7, 'ttd': 987.0},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': 627.0},\n",
       " {'cancer_type': 20, 'ttd': 607.0},\n",
       " {'cancer_type': 20, 'ttd': 691.0},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': 143.0},\n",
       " {'cancer_type': 20, 'ttd': 103.0},\n",
       " {'cancer_type': 20, 'ttd': 394.0},\n",
       " {'cancer_type': 20, 'ttd': 292.0},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 3, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': 1771.0},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': 139.0},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1579.0},\n",
       " {'cancer_type': 19, 'ttd': 75.0},\n",
       " {'cancer_type': 19, 'ttd': 1366.0},\n",
       " {'cancer_type': 19, 'ttd': 1058.0},\n",
       " {'cancer_type': 19, 'ttd': 914.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 3924.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 5287.0},\n",
       " {'cancer_type': 17, 'ttd': 3149.0},\n",
       " {'cancer_type': 17, 'ttd': 428.0},\n",
       " {'cancer_type': 17, 'ttd': 47.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 143.0},\n",
       " {'cancer_type': 17, 'ttd': 9.0},\n",
       " {'cancer_type': 17, 'ttd': 211.0},\n",
       " {'cancer_type': 17, 'ttd': 3376.0},\n",
       " {'cancer_type': 17, 'ttd': 1344.0},\n",
       " {'cancer_type': 17, 'ttd': 3600.0},\n",
       " {'cancer_type': 17, 'ttd': 80.0},\n",
       " {'cancer_type': 17, 'ttd': 1000.0},\n",
       " {'cancer_type': 17, 'ttd': 1984.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 276.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 515.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 151.0},\n",
       " {'cancer_type': 17, 'ttd': 34.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 123.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 695.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 737.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 1143.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 1147.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 2086.0},\n",
       " {'cancer_type': 17, 'ttd': 1679.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 52.0},\n",
       " {'cancer_type': 17, 'ttd': 740.0},\n",
       " {'cancer_type': 17, 'ttd': 59.0},\n",
       " {'cancer_type': 17, 'ttd': 1841.0},\n",
       " {'cancer_type': 17, 'ttd': 1107.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 519.0},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': 224.0},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': 458.0},\n",
       " {'cancer_type': 20, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': 308.0},\n",
       " {'cancer_type': 24, 'ttd': nan},\n",
       " {'cancer_type': 24, 'ttd': nan},\n",
       " {'cancer_type': 24, 'ttd': 599.0},\n",
       " {'cancer_type': 24, 'ttd': 437.0},\n",
       " {'cancer_type': 24, 'ttd': 567.0},\n",
       " {'cancer_type': 24, 'ttd': nan},\n",
       " {'cancer_type': 2, 'ttd': nan},\n",
       " {'cancer_type': 2, 'ttd': nan},\n",
       " {'cancer_type': 20, 'ttd': 2182.0},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': 2488.0},\n",
       " {'cancer_type': 29, 'ttd': 775.0},\n",
       " {'cancer_type': 29, 'ttd': 2910.0},\n",
       " {'cancer_type': 18, 'ttd': 1715.0},\n",
       " {'cancer_type': 18, 'ttd': 361.0},\n",
       " {'cancer_type': 18, 'ttd': 885.0},\n",
       " {'cancer_type': 18, 'ttd': 1302.0},\n",
       " {'cancer_type': 18, 'ttd': 385.0},\n",
       " {'cancer_type': 18, 'ttd': 414.0},\n",
       " {'cancer_type': 18, 'ttd': 826.0},\n",
       " {'cancer_type': 5, 'ttd': nan},\n",
       " {'cancer_type': 26, 'ttd': nan},\n",
       " {'cancer_type': 25, 'ttd': nan},\n",
       " {'cancer_type': 25, 'ttd': 395.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 24, 'ttd': 17.0},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 18, 'ttd': 142.0},\n",
       " {'cancer_type': 18, 'ttd': nan},\n",
       " {'cancer_type': 18, 'ttd': 536.0},\n",
       " {'cancer_type': 18, 'ttd': 630.0},\n",
       " {'cancer_type': 18, 'ttd': 1156.0},\n",
       " {'cancer_type': 18, 'ttd': 860.0},\n",
       " {'cancer_type': 18, 'ttd': 741.0},\n",
       " {'cancer_type': 4, 'ttd': 339.0},\n",
       " {'cancer_type': 4, 'ttd': 445.0},\n",
       " {'cancer_type': 4, 'ttd': nan},\n",
       " {'cancer_type': 11, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 684.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 1190.0},\n",
       " {'cancer_type': 17, 'ttd': 559.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 166.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 2378.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 89.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 761.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 1026.0},\n",
       " {'cancer_type': 16, 'ttd': 808.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 987.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 500.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 557.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 574.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 409.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 2318.0},\n",
       " {'cancer_type': 16, 'ttd': 855.0},\n",
       " {'cancer_type': 16, 'ttd': 869.0},\n",
       " {'cancer_type': 16, 'ttd': 1081.0},\n",
       " {'cancer_type': 16, 'ttd': 1421.0},\n",
       " {'cancer_type': 16, 'ttd': 428.0},\n",
       " {'cancer_type': 16, 'ttd': 999.0},\n",
       " {'cancer_type': 16, 'ttd': 268.0},\n",
       " {'cancer_type': 16, 'ttd': 896.0},\n",
       " {'cancer_type': 16, 'ttd': 905.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 488.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 677.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 879.0},\n",
       " {'cancer_type': 16, 'ttd': 260.0},\n",
       " {'cancer_type': 16, 'ttd': 1229.0},\n",
       " {'cancer_type': 16, 'ttd': 1135.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 300.0},\n",
       " {'cancer_type': 28, 'ttd': nan},\n",
       " {'cancer_type': 30, 'ttd': nan},\n",
       " {'cancer_type': 4, 'ttd': nan},\n",
       " {'cancer_type': 2, 'ttd': nan},\n",
       " {'cancer_type': 3, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 27, 'ttd': nan},\n",
       " {'cancer_type': 22, 'ttd': nan},\n",
       " {'cancer_type': 5, 'ttd': nan},\n",
       " {'cancer_type': 5, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 1, 'ttd': nan},\n",
       " {'cancer_type': 1, 'ttd': nan},\n",
       " {'cancer_type': 1, 'ttd': 510.0},\n",
       " {'cancer_type': 1, 'ttd': 522.0},\n",
       " {'cancer_type': 1, 'ttd': 1064.0},\n",
       " {'cancer_type': 1, 'ttd': nan},\n",
       " {'cancer_type': 1, 'ttd': nan},\n",
       " {'cancer_type': 1, 'ttd': 19.0},\n",
       " {'cancer_type': 1, 'ttd': 1270.0},\n",
       " {'cancer_type': 1, 'ttd': 1556.0},\n",
       " {'cancer_type': 1, 'ttd': nan},\n",
       " {'cancer_type': 1, 'ttd': nan},\n",
       " {'cancer_type': 1, 'ttd': 311.0},\n",
       " {'cancer_type': 1, 'ttd': nan},\n",
       " {'cancer_type': 1, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 624.0},\n",
       " {'cancer_type': 16, 'ttd': 2174.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 478.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 1499.0},\n",
       " {'cancer_type': 16, 'ttd': 250.0},\n",
       " {'cancer_type': 16, 'ttd': 282.0},\n",
       " {'cancer_type': 16, 'ttd': 434.0},\n",
       " {'cancer_type': 16, 'ttd': 1235.0},\n",
       " {'cancer_type': 16, 'ttd': 2393.0},\n",
       " {'cancer_type': 16, 'ttd': 257.0},\n",
       " {'cancer_type': 16, 'ttd': 460.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 1288.0},\n",
       " {'cancer_type': 16, 'ttd': 119.0},\n",
       " {'cancer_type': 16, 'ttd': 777.0},\n",
       " {'cancer_type': 16, 'ttd': 336.0},\n",
       " {'cancer_type': 16, 'ttd': 370.0},\n",
       " {'cancer_type': 16, 'ttd': 189.0},\n",
       " {'cancer_type': 16, 'ttd': 1268.0},\n",
       " {'cancer_type': 16, 'ttd': 22.0},\n",
       " {'cancer_type': 16, 'ttd': 308.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 1856.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 266.0},\n",
       " {'cancer_type': 17, 'ttd': 835.0},\n",
       " {'cancer_type': 16, 'ttd': 1043.0},\n",
       " {'cancer_type': 16, 'ttd': 929.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 701.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 171.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 464.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 176.0},\n",
       " {'cancer_type': 16, 'ttd': 237.0},\n",
       " {'cancer_type': 16, 'ttd': 1379.0},\n",
       " {'cancer_type': 16, 'ttd': 995.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 995.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 375.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 562.0},\n",
       " {'cancer_type': 17, 'ttd': 198.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 1.0},\n",
       " {'cancer_type': 17, 'ttd': 412.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 501.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 1045.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 286.0},\n",
       " {'cancer_type': 19, 'ttd': 1046.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 624.0},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': 5, 'ttd': 290.0},\n",
       " {'cancer_type': None, 'ttd': None},\n",
       " {'cancer_type': 1, 'ttd': 76.0},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': 1508.0},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 12, 'ttd': nan},\n",
       " {'cancer_type': 30, 'ttd': nan},\n",
       " {'cancer_type': 2, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 29, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 372.0},\n",
       " {'cancer_type': 17, 'ttd': 311.0},\n",
       " {'cancer_type': 17, 'ttd': 2945.0},\n",
       " {'cancer_type': 17, 'ttd': 1154.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 637.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1484.0},\n",
       " {'cancer_type': 19, 'ttd': 1089.0},\n",
       " {'cancer_type': 19, 'ttd': 74.0},\n",
       " {'cancer_type': 19, 'ttd': 1024.0},\n",
       " {'cancer_type': 19, 'ttd': 1329.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 479.0},\n",
       " {'cancer_type': 19, 'ttd': 1161.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1875.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 1688.0},\n",
       " {'cancer_type': 19, 'ttd': 197.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 629.0},\n",
       " {'cancer_type': 19, 'ttd': nan},\n",
       " {'cancer_type': 19, 'ttd': 676.0},\n",
       " {'cancer_type': 16, 'ttd': 139.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 444.0},\n",
       " {'cancer_type': 16, 'ttd': 1454.0},\n",
       " {'cancer_type': 16, 'ttd': 594.0},\n",
       " {'cancer_type': 16, 'ttd': 1725.0},\n",
       " {'cancer_type': 16, 'ttd': 1653.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 456.0},\n",
       " {'cancer_type': 17, 'ttd': 345.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 628.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 1167.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 516.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 365.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 153.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 11, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 210.0},\n",
       " {'cancer_type': 16, 'ttd': 1600.0},\n",
       " {'cancer_type': 16, 'ttd': 711.0},\n",
       " {'cancer_type': 16, 'ttd': 922.0},\n",
       " {'cancer_type': 16, 'ttd': 281.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 1531.0},\n",
       " {'cancer_type': 16, 'ttd': 340.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 2027.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 1516.0},\n",
       " {'cancer_type': 16, 'ttd': 1115.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 899.0},\n",
       " {'cancer_type': 17, 'ttd': 2224.0},\n",
       " {'cancer_type': 17, 'ttd': 2284.0},\n",
       " {'cancer_type': 17, 'ttd': 708.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 2133.0},\n",
       " {'cancer_type': 17, 'ttd': 3253.0},\n",
       " {'cancer_type': 17, 'ttd': 5.0},\n",
       " {'cancer_type': 17, 'ttd': 1423.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 803.0},\n",
       " {'cancer_type': 17, 'ttd': 1655.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 3838.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 678.0},\n",
       " {'cancer_type': 17, 'ttd': 840.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 4961.0},\n",
       " {'cancer_type': 16, 'ttd': 826.0},\n",
       " {'cancer_type': 16, 'ttd': 173.0},\n",
       " {'cancer_type': 16, 'ttd': 586.0},\n",
       " {'cancer_type': 16, 'ttd': 626.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 666.0},\n",
       " {'cancer_type': 16, 'ttd': 1215.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 593.0},\n",
       " {'cancer_type': 16, 'ttd': 1171.0},\n",
       " {'cancer_type': 16, 'ttd': 976.0},\n",
       " {'cancer_type': 16, 'ttd': 179.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 697.0},\n",
       " {'cancer_type': 16, 'ttd': 291.0},\n",
       " {'cancer_type': 16, 'ttd': 3169.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 258.0},\n",
       " {'cancer_type': 16, 'ttd': 2681.0},\n",
       " {'cancer_type': 16, 'ttd': 949.0},\n",
       " {'cancer_type': 16, 'ttd': 244.0},\n",
       " {'cancer_type': 16, 'ttd': 1622.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 1197.0},\n",
       " {'cancer_type': 16, 'ttd': 321.0},\n",
       " {'cancer_type': 16, 'ttd': 1528.0},\n",
       " {'cancer_type': 16, 'ttd': 3361.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 294.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 195.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 1001.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 23.0},\n",
       " {'cancer_type': 17, 'ttd': 402.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 161.0},\n",
       " {'cancer_type': 17, 'ttd': 94.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 550.0},\n",
       " {'cancer_type': 17, 'ttd': 687.0},\n",
       " {'cancer_type': 17, 'ttd': 921.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 604.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 927.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 455.0},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 376.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 1046.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 124.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 694.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 444.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 116.0},\n",
       " {'cancer_type': 16, 'ttd': 737.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': nan},\n",
       " {'cancer_type': 17, 'ttd': 322.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 1258.0},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': nan},\n",
       " {'cancer_type': 16, 'ttd': 434.0},\n",
       " ...]"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "id": "73aa44ed",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dataset({\n",
       "    features: ['Labels', 'COVAREP', 'FACET', 'OpenFace', 'glove_vectors'],\n",
       "    num_rows: 23248\n",
       "})"
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datasets.load_from_disk('/shared/cmu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "id": "70c8555d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2324.8"
      ]
     },
     "execution_count": 176,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "23248/10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8fd419fe",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dti",
   "language": "python",
   "name": "dti"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
