{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3",
   "language": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from pathlib import Path\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import rasterio\n",
    "\n",
    "def load_tiles(data_dir, state):\n",
    "    all_tiles_path = None\n",
    "    path = str(Path(data_dir) / Path(f\"{state}_extended-test_tiles.csv\"))\n",
    "    df = pd.read_csv(path)\n",
    "    fns = df[[\"naip-new_fn\", \"lc_fn\"]].values\n",
    "    all_tiles_path = fns if all_tiles_path is None else np.concatenate((all_tiles_path, fns), axis=0)\n",
    "    print(f\"Loaded {fns.shape[0]} test tiles from {state}_extended-test_tiles.csv\")\n",
    "\n",
    "    return all_tiles_path\n",
    "\n",
    "def generate_test_patches(state):\n",
    "    data_dir = \"/scratch/forest/datasets/chesapeake_data\"\n",
    "    folder = state + \"_extended-test_patches\"\n",
    "    csv = data_dir + \"/\" + folder + \".csv\"\n",
    "\n",
    "    output_dir = Path(data_dir) / Path(folder)\n",
    "    output_dir.mkdir(parents=False, exist_ok=True)\n",
    "\n",
    "    fns = load_tiles(data_dir, state)\n",
    "\n",
    "    patch_fns = []\n",
    "    for i in range(fns.shape[0]):\n",
    "        tile_name = fns[i][0].split('/')[-1].split(\"_naip-new.tif\")[0]\n",
    "        naip_fn = os.path.join(data_dir, fns[i][0])\n",
    "        lc_fn = os.path.join(data_dir, fns[i][1])\n",
    "\n",
    "        # Read NAIP high resolution imagery\n",
    "        naip_fid = rasterio.open(naip_fn, \"r\")\n",
    "        naip_tile = naip_fid.read().astype(np.float32)  # 4 * H * W\n",
    "        naip_fid.close()\n",
    "\n",
    "        # Read Land Cover high resolution labels\n",
    "        lc_fid = rasterio.open(lc_fn, \"r\")\n",
    "        lc_tile = lc_fid.read().astype(np.uint8)\n",
    "        lc_fid.close()\n",
    "\n",
    "        _, H, W = naip_tile.shape\n",
    "        h, w = 256, 256\n",
    "\n",
    "        for p_id in range(500):\n",
    "            patch_name = state + \"-\" + tile_name + f\"-{p_id}.npz\"\n",
    "            patch_fns.append(folder + \"/\" + patch_name)\n",
    "            save_path = output_dir / Path(patch_name)\n",
    "\n",
    "            patch = np.zeros((29, h, w)).astype(np.float32)\n",
    "            x = np.random.randint(0, H - h)\n",
    "            y = np.random.randint(0, W - w)\n",
    "            patch[0 : 4, :, :] = naip_tile[:, x : x + h, y : y + w]\n",
    "            patch[8, :, :] = lc_tile[0, x : x + h, y : y + w]\n",
    "\n",
    "            np.savez(str(save_path), patch)\n",
    "        \n",
    "        print(f\"Finished {i + 1} / {fns.shape[0]} tiles in state {state}\")\n",
    "\n",
    "    df = {'patch_fn':patch_fns}\n",
    "    df = pd.DataFrame(df, columns=[\"patch_fn\"])\n",
    "    df.to_csv(csv, header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Loaded 20 test tiles from md_1m_2013_extended-test_tiles.csv\n",
      "Finished 1 / 20 tiles in state md_1m_2013\n",
      "Finished 2 / 20 tiles in state md_1m_2013\n",
      "Finished 3 / 20 tiles in state md_1m_2013\n",
      "Finished 4 / 20 tiles in state md_1m_2013\n",
      "Finished 5 / 20 tiles in state md_1m_2013\n",
      "Finished 6 / 20 tiles in state md_1m_2013\n",
      "Finished 7 / 20 tiles in state md_1m_2013\n",
      "Finished 8 / 20 tiles in state md_1m_2013\n",
      "Finished 9 / 20 tiles in state md_1m_2013\n",
      "Finished 10 / 20 tiles in state md_1m_2013\n",
      "Finished 11 / 20 tiles in state md_1m_2013\n",
      "Finished 12 / 20 tiles in state md_1m_2013\n",
      "Finished 13 / 20 tiles in state md_1m_2013\n",
      "Finished 14 / 20 tiles in state md_1m_2013\n",
      "Finished 15 / 20 tiles in state md_1m_2013\n",
      "Finished 16 / 20 tiles in state md_1m_2013\n",
      "Finished 17 / 20 tiles in state md_1m_2013\n",
      "Finished 18 / 20 tiles in state md_1m_2013\n",
      "Finished 19 / 20 tiles in state md_1m_2013\n",
      "Finished 20 / 20 tiles in state md_1m_2013\n",
      "Loaded 20 test tiles from va_1m_2014_extended-test_tiles.csv\n",
      "Finished 1 / 20 tiles in state va_1m_2014\n",
      "Finished 2 / 20 tiles in state va_1m_2014\n",
      "Finished 3 / 20 tiles in state va_1m_2014\n",
      "Finished 4 / 20 tiles in state va_1m_2014\n",
      "Finished 5 / 20 tiles in state va_1m_2014\n",
      "Finished 6 / 20 tiles in state va_1m_2014\n",
      "Finished 7 / 20 tiles in state va_1m_2014\n",
      "Finished 8 / 20 tiles in state va_1m_2014\n",
      "Finished 9 / 20 tiles in state va_1m_2014\n",
      "Finished 10 / 20 tiles in state va_1m_2014\n",
      "Finished 11 / 20 tiles in state va_1m_2014\n",
      "Finished 12 / 20 tiles in state va_1m_2014\n",
      "Finished 13 / 20 tiles in state va_1m_2014\n",
      "Finished 14 / 20 tiles in state va_1m_2014\n",
      "Finished 15 / 20 tiles in state va_1m_2014\n",
      "Finished 16 / 20 tiles in state va_1m_2014\n",
      "Finished 17 / 20 tiles in state va_1m_2014\n",
      "Finished 18 / 20 tiles in state va_1m_2014\n",
      "Finished 19 / 20 tiles in state va_1m_2014\n",
      "Finished 20 / 20 tiles in state va_1m_2014\n",
      "Loaded 20 test tiles from wv_1m_2014_extended-test_tiles.csv\n",
      "Finished 1 / 20 tiles in state wv_1m_2014\n",
      "Finished 2 / 20 tiles in state wv_1m_2014\n",
      "Finished 3 / 20 tiles in state wv_1m_2014\n",
      "Finished 4 / 20 tiles in state wv_1m_2014\n",
      "Finished 5 / 20 tiles in state wv_1m_2014\n",
      "Finished 6 / 20 tiles in state wv_1m_2014\n",
      "Finished 7 / 20 tiles in state wv_1m_2014\n",
      "Finished 8 / 20 tiles in state wv_1m_2014\n",
      "Finished 9 / 20 tiles in state wv_1m_2014\n",
      "Finished 10 / 20 tiles in state wv_1m_2014\n",
      "Finished 11 / 20 tiles in state wv_1m_2014\n",
      "Finished 12 / 20 tiles in state wv_1m_2014\n",
      "Finished 13 / 20 tiles in state wv_1m_2014\n",
      "Finished 14 / 20 tiles in state wv_1m_2014\n",
      "Finished 15 / 20 tiles in state wv_1m_2014\n",
      "Finished 16 / 20 tiles in state wv_1m_2014\n",
      "Finished 17 / 20 tiles in state wv_1m_2014\n",
      "Finished 18 / 20 tiles in state wv_1m_2014\n",
      "Finished 19 / 20 tiles in state wv_1m_2014\n",
      "Finished 20 / 20 tiles in state wv_1m_2014\n",
      "Loaded 20 test tiles from de_1m_2013_extended-test_tiles.csv\n",
      "Finished 1 / 20 tiles in state de_1m_2013\n",
      "Finished 2 / 20 tiles in state de_1m_2013\n",
      "Finished 3 / 20 tiles in state de_1m_2013\n",
      "Finished 4 / 20 tiles in state de_1m_2013\n",
      "Finished 5 / 20 tiles in state de_1m_2013\n",
      "Finished 6 / 20 tiles in state de_1m_2013\n",
      "Finished 7 / 20 tiles in state de_1m_2013\n",
      "Finished 8 / 20 tiles in state de_1m_2013\n",
      "Finished 9 / 20 tiles in state de_1m_2013\n",
      "Finished 10 / 20 tiles in state de_1m_2013\n",
      "Finished 11 / 20 tiles in state de_1m_2013\n",
      "Finished 12 / 20 tiles in state de_1m_2013\n",
      "Finished 13 / 20 tiles in state de_1m_2013\n",
      "Finished 14 / 20 tiles in state de_1m_2013\n",
      "Finished 15 / 20 tiles in state de_1m_2013\n",
      "Finished 16 / 20 tiles in state de_1m_2013\n",
      "Finished 17 / 20 tiles in state de_1m_2013\n",
      "Finished 18 / 20 tiles in state de_1m_2013\n",
      "Finished 19 / 20 tiles in state de_1m_2013\n",
      "Finished 20 / 20 tiles in state de_1m_2013\n",
      "Loaded 20 test tiles from ny_1m_2013_extended-test_tiles.csv\n",
      "Finished 1 / 20 tiles in state ny_1m_2013\n",
      "Finished 2 / 20 tiles in state ny_1m_2013\n",
      "Finished 3 / 20 tiles in state ny_1m_2013\n",
      "Finished 4 / 20 tiles in state ny_1m_2013\n",
      "Finished 5 / 20 tiles in state ny_1m_2013\n",
      "Finished 6 / 20 tiles in state ny_1m_2013\n",
      "Finished 7 / 20 tiles in state ny_1m_2013\n",
      "Finished 8 / 20 tiles in state ny_1m_2013\n",
      "Finished 9 / 20 tiles in state ny_1m_2013\n",
      "Finished 10 / 20 tiles in state ny_1m_2013\n",
      "Finished 11 / 20 tiles in state ny_1m_2013\n",
      "Finished 12 / 20 tiles in state ny_1m_2013\n",
      "Finished 13 / 20 tiles in state ny_1m_2013\n",
      "Finished 14 / 20 tiles in state ny_1m_2013\n",
      "Finished 15 / 20 tiles in state ny_1m_2013\n",
      "Finished 16 / 20 tiles in state ny_1m_2013\n",
      "Finished 17 / 20 tiles in state ny_1m_2013\n",
      "Finished 18 / 20 tiles in state ny_1m_2013\n",
      "Finished 19 / 20 tiles in state ny_1m_2013\n",
      "Finished 20 / 20 tiles in state ny_1m_2013\n",
      "Loaded 20 test tiles from pa_1m_2013_extended-test_tiles.csv\n",
      "Finished 1 / 20 tiles in state pa_1m_2013\n",
      "Finished 2 / 20 tiles in state pa_1m_2013\n",
      "Finished 3 / 20 tiles in state pa_1m_2013\n",
      "Finished 4 / 20 tiles in state pa_1m_2013\n",
      "Finished 5 / 20 tiles in state pa_1m_2013\n",
      "Finished 6 / 20 tiles in state pa_1m_2013\n",
      "Finished 7 / 20 tiles in state pa_1m_2013\n",
      "Finished 8 / 20 tiles in state pa_1m_2013\n",
      "Finished 9 / 20 tiles in state pa_1m_2013\n",
      "Finished 10 / 20 tiles in state pa_1m_2013\n",
      "Finished 11 / 20 tiles in state pa_1m_2013\n",
      "Finished 12 / 20 tiles in state pa_1m_2013\n",
      "Finished 13 / 20 tiles in state pa_1m_2013\n",
      "Finished 14 / 20 tiles in state pa_1m_2013\n",
      "Finished 15 / 20 tiles in state pa_1m_2013\n",
      "Finished 16 / 20 tiles in state pa_1m_2013\n",
      "Finished 17 / 20 tiles in state pa_1m_2013\n",
      "Finished 18 / 20 tiles in state pa_1m_2013\n",
      "Finished 19 / 20 tiles in state pa_1m_2013\n",
      "Finished 20 / 20 tiles in state pa_1m_2013\n"
     ]
    }
   ],
   "source": [
    "states = [\"md_1m_2013\", \"va_1m_2014\", \"wv_1m_2014\", \"de_1m_2013\", \"ny_1m_2013\", \"pa_1m_2013\"]\n",
    "for state in states:\n",
    "    generate_test_patches(state)\n",
    "\n",
    "\n"
   ]
  }
 ]
}