{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "import PIL\n",
    "from PIL import Image\n",
    "import os, re, random, pickle\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "import torch.nn\n",
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dir: ./data/CroppedYale/yaleB09\n",
      "dir: ./data/CroppedYale/yaleB30\n",
      "dir: ./data/CroppedYale/yaleB11\n",
      "dir: ./data/CroppedYale/yaleB01\n",
      "dir: ./data/CroppedYale/yaleB08\n",
      "dir: ./data/CroppedYale/yaleB02\n",
      "dir: ./data/CroppedYale/yaleB29\n",
      "dir: ./data/CroppedYale/yaleB16\n",
      "dir: ./data/CroppedYale/yaleB10\n",
      "dir: ./data/CroppedYale/yaleB24\n",
      "dir: ./data/CroppedYale/yaleB04\n",
      "dir: ./data/CroppedYale/yaleB26\n",
      "dir: ./data/CroppedYale/yaleB15\n",
      "dir: ./data/CroppedYale/yaleB27\n",
      "dir: ./data/CroppedYale/yaleB37\n",
      "dir: ./data/CroppedYale/yaleB03\n",
      "dir: ./data/CroppedYale/yaleB23\n",
      "dir: ./data/CroppedYale/yaleB36\n",
      "dir: ./data/CroppedYale/yaleB31\n",
      "dir: ./data/CroppedYale/yaleB13\n",
      "dir: ./data/CroppedYale/yaleB18\n",
      "dir: ./data/CroppedYale/yaleB12\n",
      "dir: ./data/CroppedYale/yaleB05\n",
      "dir: ./data/CroppedYale/yaleB39\n",
      "dir: ./data/CroppedYale/yaleB06\n",
      "dir: ./data/CroppedYale/yaleB25\n",
      "dir: ./data/CroppedYale/yaleB21\n",
      "dir: ./data/CroppedYale/yaleB32\n",
      "dir: ./data/CroppedYale/yaleB17\n",
      "dir: ./data/CroppedYale/yaleB28\n",
      "dir: ./data/CroppedYale/yaleB22\n",
      "dir: ./data/CroppedYale/yaleB33\n",
      "dir: ./data/CroppedYale/yaleB20\n",
      "dir: ./data/CroppedYale/yaleB38\n",
      "dir: ./data/CroppedYale/yaleB07\n",
      "dir: ./data/CroppedYale/yaleB34\n",
      "dir: ./data/CroppedYale/yaleB35\n",
      "dir: ./data/CroppedYale/yaleB19\n",
      "person 0\n",
      "person 1\n",
      "person 2\n",
      "person 3\n",
      "person 4\n",
      "person 5\n",
      "person 6\n",
      "person 7\n",
      "person 8\n",
      "person 9\n",
      "person 10\n",
      "person 11\n",
      "person 12\n",
      "person 13\n",
      "person 14\n",
      "person 15\n",
      "person 16\n",
      "person 17\n",
      "person 18\n",
      "person 19\n",
      "person 20\n",
      "person 21\n",
      "person 22\n",
      "person 23\n",
      "person 24\n",
      "person 25\n",
      "person 26\n",
      "person 27\n",
      "person 28\n",
      "person 29\n",
      "person 30\n",
      "person 31\n",
      "person 32\n",
      "person 33\n",
      "person 34\n",
      "person 35\n",
      "person 36\n",
      "person 37\n"
     ]
    }
   ],
   "source": [
    "# Download .zip file from [http://vision.ucsd.edu/extyaleb/CroppedYaleBZip/CroppedYale.zip]\n",
    "\n",
    "data_home = './data/CroppedYale'\n",
    "x_list = []\n",
    "for (root, dirs, files) in os.walk(data_home):\n",
    "    if len(files) > 0:\n",
    "        images = []\n",
    "        print(\"dir: %s\" % (root))\n",
    "        for file in files:\n",
    "            if bool(re.search(\"A([+]|-)[0-9]+E([+]|-)[0-9]+[.]pgm$\", file)):\n",
    "                images.append(\"%s/%s\" % (root, file))\n",
    "        x_list.append(images)\n",
    "p_list = np.array([i for i in range(1, 14)] + [i for i in range(15, 40)])\n",
    "\n",
    "mattr = np.empty([0,128*128])\n",
    "ptr = []\n",
    "atr = []\n",
    "etr = []\n",
    "matte = np.empty([0,128*128])\n",
    "pte = []\n",
    "ate = []\n",
    "ete = []\n",
    "for i in range(len(x_list)):\n",
    "    print(\"person %i\" % i)\n",
    "    train, test = train_test_split(x_list[i], test_size = 0.3, random_state = 123)\n",
    "    emptytr = np.zeros((len(train),128*128))\n",
    "    emptyte = np.zeros((len(test),128*128))\n",
    "    for j in range(len(train)):\n",
    "        emptytr[j,:] = np.array(Image.open(train[j]).resize((128,128), Image.BILINEAR)).flatten()\n",
    "        ptr.append(np.argwhere(p_list == int(train[j][-19:-17]))[0,0])\n",
    "        atr.append(float(train[j][-12:-8]))\n",
    "        etr.append(float(train[j][-7:-4]))\n",
    "    mattr = np.vstack((mattr, emptytr))\n",
    "    for j in range(len(test)):\n",
    "        emptyte[j,:] = np.array(Image.open(test[j]).resize((128,128), Image.BILINEAR)).flatten()\n",
    "        pte.append(np.argwhere(p_list == int(test[j][-19:-17]))[0,0])\n",
    "        ate.append(float(test[j][-12:-8]))\n",
    "        ete.append(float(test[j][-7:-4]))\n",
    "    matte = np.vstack((matte, emptyte))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "os.makedirs('./data/YaleBFace128', exist_ok=True)\n",
    "data_path = './data/YaleBFace128/YaleBFaceTrain.dat'\n",
    "all_data={'image':mattr.astype(np.int32), 'person':np.array(ptr), 'azimuth':np.array(atr), 'elevation':np.array(etr)}\n",
    "with open(data_path, 'wb') as f:\n",
    "    pickle.dump(all_data, f)\n",
    "data_path = './data/YaleBFace128/YaleBFaceTest.dat'\n",
    "all_data={'image':matte.astype(np.int32), 'person':np.array(pte), 'azimuth':np.array(ate), 'elevation':np.array(ete)}\n",
    "with open(data_path, 'wb') as f:\n",
    "    pickle.dump(all_data, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import numpy as np\n",
    "with open('./data/YaleBFace128/YaleBFaceTrain.dat', 'rb') as f:\n",
    "            train_data = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 8,  8,  8, ..., 17, 17, 17])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data['person']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1664, 16384)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data['image'].shape"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "torch",
   "language": "python",
   "name": "torch"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
