{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import requests\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "def download_file(url):\n",
    "    local_filename = url.split('/')[-1]\n",
    "    # NOTE the stream=True parameter below\n",
    "    with requests.get(url, stream=True) as r:\n",
    "        r.raise_for_status()\n",
    "        with open(local_filename, 'wb') as f:\n",
    "            for chunk in r.iter_content(chunk_size=8192): \n",
    "                # If you have chunk encoded response uncomment if\n",
    "                # and set chunk_size parameter to None.\n",
    "                #if chunk: \n",
    "                f.write(chunk)\n",
    "    return local_filename"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "download_file(\"https://github.com/washingtonpost/data-2C-beyond-the-limit-usa/raw/main/data/processed/climdiv_state_year.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_fwf(\n",
    "    'climdiv-tmpcst-v1.0.0-20200106', \n",
    "    widths=[1, 2, 1, 2, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], \n",
    "    names=[\n",
    "        \"dont_care\", \"noaa_state_order\", \"divisional_number\", \"code\", \"year\", \n",
    "        \"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\", \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"\n",
    "    ])\n",
    "new_df = df.loc[(df['noaa_state_order'] <= 48) & (df['dont_care']==0)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1,\n",
       " 2,\n",
       " 3,\n",
       " 4,\n",
       " 5,\n",
       " 6,\n",
       " 7,\n",
       " 8,\n",
       " 9,\n",
       " 10,\n",
       " 11,\n",
       " 12,\n",
       " 13,\n",
       " 14,\n",
       " 15,\n",
       " 16,\n",
       " 17,\n",
       " 18,\n",
       " 19,\n",
       " 20,\n",
       " 21,\n",
       " 22,\n",
       " 23,\n",
       " 24,\n",
       " 25,\n",
       " 26,\n",
       " 27,\n",
       " 28,\n",
       " 29,\n",
       " 30,\n",
       " 31,\n",
       " 32,\n",
       " 33,\n",
       " 34,\n",
       " 35,\n",
       " 36,\n",
       " 37,\n",
       " 38,\n",
       " 39,\n",
       " 40,\n",
       " 41,\n",
       " 42,\n",
       " 43,\n",
       " 44,\n",
       " 45,\n",
       " 46,\n",
       " 47,\n",
       " 48}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fips = set(new_df['noaa_state_order'])\n",
    "years = set(new_df['years'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[43.1, 37.4, 54.5, ..., 59.7, 53.2, 44.9],\n",
       "       [43.5, 47.7, 52.5, ..., 63.2, 57.3, 46.4],\n",
       "       [41.8, 51.1, 60.2, ..., 67.1, 54.2, 47.4],\n",
       "       ...,\n",
       "       [52.4, 55.6, 58.4, ..., 65.8, 55.6, 46.9],\n",
       "       [40.4, 58. , 55.4, ..., 67.4, 50.4, 49. ],\n",
       "       [46.6, 56.1, 55. , ..., 67.8, 50.6, 51.3]])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_df[new_df['noaa_state_order']==1].loc[:,\"Jan\":].to_numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "domain_datasets = []\n",
    "for domain in fips:\n",
    "    xs, ys = [], []\n",
    "    temp = torch.FloatTensor(new_df[new_df['noaa_state_order']==domain].loc[:,\"Jan\":].to_numpy())\n",
    "    domain_datasets += [torch.utils.data.TensorDataset(temp[:,:6],temp[:,6:])]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "with open('tpt48.pkl','wb') as f:\n",
    "    pickle.dump(domain_datasets, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n",
      "torch.Size([1, 6]) torch.Size([1, 6])\n"
     ]
    }
   ],
   "source": [
    "loader = torch.utils.data.DataLoader(domain_datasets[0])\n",
    "for data in loader:\n",
    "    x, y = data\n",
    "    print(x.shape, y.shape)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "202c7727d3250454fd1268b2df45b804534eaeca3a681973efa0ffc83187296f"
  },
  "kernelspec": {
   "display_name": "Python 3.10.4 ('pFedHN')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
