{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "from embeds import fix_corrupt, multi_inner_align\n",
    "import numpy as np\n",
    "from hypertools.tools import align"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "def read_fmri(path: str) -> pd.DataFrame:\n",
    "    dfs = []\n",
    "    \n",
    "    for f_name in os.listdir(path):\n",
    "        pulled = {}\n",
    "        with open(path + f_name, 'r') as f:\n",
    "            for line in f:\n",
    "                word, *vec = line.split()\n",
    "                pulled[word] = vec\n",
    "        pulled = fix_corrupt(pulled)\n",
    "        dfs.append(pd.DataFrame(pulled).T.astype(float))\n",
    "    \n",
    "    dfs = list(multi_inner_align(dfs))\n",
    "    \n",
    "    return dfs\n",
    "\n",
    "fmris_text = read_fmri('../../data/embeds_train/fmri_text/')\n",
    "fmris_speech = read_fmri('../../data/embeds_train/fmri_speech/')\n",
    "len(fmris_text), len(fmris_speech)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "fb6a39731d1dc10e",
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Hyperalignment approach"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "42baae307f1f2602"
  },
  {
   "cell_type": "code",
   "source": [
    "def hyper_align(dfs: list) -> pd.DataFrame:\n",
    "    df = np.mean(align(dfs, align='hyper'), axis=0)\n",
    "    return pd.DataFrame(df, index=dfs[0].index)\n",
    "    \n",
    "    \n",
    "fMRI_text_hyper_align = hyper_align(fmris_text)\n",
    "fMRI_text_hyper_align"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "4858a2731330e93f",
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "fMRI_speech_hyper_align = hyper_align(fmris_speech)\n",
    "fMRI_speech_hyper_align"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "dbef8d952f534a2c",
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Saving"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "e7c7a5b947a4e640"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# Subsetting to only the words in psychNorms norms\n",
    "to_pull = set(\n",
    "    pd.read_csv('../../data/psychNorms/psychNorms.zip', index_col=0, low_memory=False, compression='zip').index\n",
    ")\n",
    "fMRI_text_hyper_align = fMRI_text_hyper_align.loc[fMRI_text_hyper_align.index.isin(to_pull)].astype(float)\n",
    "fMRI_speech_hyper_align = fMRI_speech_hyper_align.loc[fMRI_speech_hyper_align.index.isin(to_pull)].astype(float)\n",
    "\n",
    "# Saving \n",
    "fMRI_text_hyper_align.to_csv('../../data/embeds/fMRI_text_hyper_align.csv')\n",
    "fMRI_speech_hyper_align.to_csv('../../data/embeds/fMRI_speech_hyper_align.csv')"
   ],
   "id": "fe9fc85ee929a493",
   "outputs": [],
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
