{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import random\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import scipy.io\n",
    "\n",
    "from src.utility import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Does a few generic processing steps before saving data+metadata dfs in correct format\n",
    "# data_df must have a \"task\" column and metadata_df a \"_task\" column which are used to determine task matches\n",
    "# Filters for erronous (NaN) data and/or metadata, all-zero columns,\n",
    "# Removes tasks w few instances\n",
    "# Re-computes \"task\", \"task_count\" variables\n",
    "# Saves dataframes\n",
    "def save_dfs(data_df, metadata_df, fn_root, min_instances_per_task=5):\n",
    "\n",
    "    # Merge dataframes\n",
    "    join_df = pd.merge(data_df, metadata_df, left_on=\"task\", right_on=\"_task\")\n",
    "    join_df = join_df.drop(columns=[\"task\"])\n",
    "\n",
    "    # Filter instances for problems\n",
    "    if join_df.isna().any().any():\n",
    "        print(\"WARNING: Removing NaN rows of data and/or metadata.\")\n",
    "    join_df = join_df[~join_df.isna().any(axis=1)]\n",
    "\n",
    "    # Fix task-related meta-variables\n",
    "    join_df[\"_task_count\"] = join_df.groupby(\"_task\").transform(\"count\").iloc[:,0]\n",
    "    join_df = join_df[join_df[\"_task_count\"] >= min_instances_per_task]\n",
    "\n",
    "    join_df = join_df.sort_values(by=\"_task_count\", ascending=False)\n",
    "    join_df[\"_task\"] = join_df.groupby(\"_task\", sort=False).ngroup()\n",
    "    join_df[\"task\"] = join_df[\"_task\"]\n",
    "    join_df = join_df.sort_values(by=\"_task\")\n",
    "\n",
    "    # Filter features where all vals are equal\n",
    "    join_df = join_df[[c for c in join_df.columns if (len(set(join_df[c])) > 1 or c.startswith(\"_\"))]]\n",
    "\n",
    "    # Data df\n",
    "    data_cols = [c for c in data_df.columns if c in join_df.columns]\n",
    "    if \"task\" not in data_cols: data_cols += [\"task\"]\n",
    "    data_df = join_df[data_cols]\n",
    "\n",
    "    # Metadata df\n",
    "    metadata_cols = [c for c in metadata_df.columns if c in join_df.columns]\n",
    "    if \"_task\" not in metadata_cols: metadata_cols += [\"_task\"]\n",
    "    if \"_task_count\" not in metadata_cols: metadata_cols += [\"_task_count\"]\n",
    "    metadata_df = join_df[metadata_cols].drop_duplicates()\n",
    "    if \"_task_name\" not in metadata_df.columns: metadata_df[\"_task_name\"] = metadata_df[\"_task\"]\n",
    "\n",
    "    # Joined (STL) df\n",
    "    join_cols = [c for c in join_df.columns if not c.startswith(\"_\")]\n",
    "    join_df = join_df[join_cols]\n",
    "\n",
    "    # 1hot encoded STL df\n",
    "    data_df_1H = data_df.copy()\n",
    "    data_df_1H[\"t\"] = data_df_1H[\"task\"].astype(object)\n",
    "    onehot = pd.get_dummies(data_df_1H[[\"t\"]])\n",
    "    data_df_1H[onehot.columns] = onehot\n",
    "    data_df_1H = data_df_1H.drop(\"t\", axis=1)\n",
    "\n",
    "    # Save dataframes\n",
    "    data_df.to_csv(fn_root+\"_data.csv\", index=False)\n",
    "    metadata_df.to_csv(fn_root+\"_metadata.csv\", index=False)\n",
    "    join_df.to_csv(fn_root+\"_STL_metadata.csv\", index=False)\n",
    "    data_df_1H.to_csv(fn_root+\"_STL_onehot.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Cubic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "out_filepath = r\"data\\cubic\\cubic\"\n",
    "\n",
    "np.random.seed(0)\n",
    "\n",
    "n_tasks = 100\n",
    "n_data_per_task = 500\n",
    "domain = [-1, 1]\n",
    "coef_range = [-1, 1]\n",
    "degree = 3\n",
    "sigma = 0.1\n",
    "sigma_metadata = 0\n",
    "\n",
    "# Create metadata table\n",
    "coefs = [chr(i) for i in range(97, 98+degree)]\n",
    "metadata = np.random.rand(n_tasks, degree+1)\n",
    "metadata = metadata * (coef_range[1] - coef_range[0]) + coef_range[0]\n",
    "metadata_df = pd.DataFrame(metadata, columns=coefs)\n",
    "metadata_df[\"_task\"] = metadata_df.index\n",
    "metadata_df[\"_task_name\"] = \"\"\n",
    "for i in range(0, len(coefs)):\n",
    "    if i > 0:\n",
    "        metadata_df[\"_task_name\"] += \" + \"\n",
    "    metadata_df[\"_task_name\"] += metadata_df[coefs[i]].round(2).astype(str)\n",
    "    if i < len(coefs)-1:\n",
    "        metadata_df[\"_task_name\"] += \"x\"\n",
    "    if i < len(coefs)-2:\n",
    "        metadata_df[\"_task_name\"] += str(len(coefs)-1-i)\n",
    "\n",
    "# Create data table\n",
    "feature_df = pd.DataFrame()\n",
    "x = np.random.rand(n_tasks*n_data_per_task)\n",
    "x = x * (domain[1] - domain[0]) + domain[0]\n",
    "feature_df[\"x\"] = x\n",
    "task = np.repeat(np.arange(n_tasks), n_data_per_task)\n",
    "feature_df[\"task\"] = task\n",
    "for i, row in feature_df.iterrows():\n",
    "    x = row[\"x\"]\n",
    "    metadata = metadata_df.iloc[int(row[\"task\"])]\n",
    "    y = np.random.normal()*sigma\n",
    "    p = degree\n",
    "    for coef in metadata[coefs]:\n",
    "        y += coef * x**p\n",
    "        p -= 1\n",
    "    feature_df.at[i, \"label\"] = y\n",
    "\n",
    "# Add noise to metadata\n",
    "metadata_df[coefs] += np.random.rand(n_tasks,degree+1)*sigma_metadata\n",
    "\n",
    "save_dfs(feature_df, metadata_df, out_filepath)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Robot Arm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "save = True\n",
    "plot = False\n",
    "\n",
    "num_tasks = 100\n",
    "num_instances_per_task = 500\n",
    "dof = 3                        # Degrees of freedom; number of joints in arm\n",
    "join_length = [0, 1]\n",
    "\n",
    "np.random.seed(0)\n",
    "\n",
    "save = True\n",
    "output_filename_root = r\"data\\robot_arm\\robot_arm\"\n",
    "\n",
    "metadata_colnames = [\"l{}\".format(i) for i in range(1,dof+1)]\n",
    "lengths = np.random.uniform(low=join_length[0], high=join_length[1], size=(num_tasks, dof))\n",
    "metadata_df = pd.DataFrame(lengths, columns=metadata_colnames)\n",
    "metadata_df[\"_task\"] = np.arange(num_tasks)\n",
    "metadata_df[\"_task_name\"] = metadata_df[\"_task\", \n",
    "\n",
    "shp = (num_tasks*num_instances_per_task,)\n",
    "angles = np.stack([np.random.uniform(low=-np.pi, high=0, size=shp),\n",
    "                   np.random.uniform(low=-np.pi, high=0, size=shp),\n",
    "                   np.random.uniform(low=-np.pi/2, high=np.pi/2, size=shp)], axis=1)\n",
    "x = np.zeros(shp)\n",
    "y = np.zeros(shp)\n",
    "for i in range(dof):\n",
    "    x += np.repeat(lengths[:,i], num_instances_per_task) * np.cos(np.sum(angles[:,:i+1], axis=1))\n",
    "    y += np.repeat(lengths[:,i], num_instances_per_task) * np.sin(np.sum(angles[:,:i+1], axis=1))\n",
    "feature_vals = np.concatenate([x.reshape(-1,1), y.reshape(-1,1), angles], axis=1)\n",
    "feature_colnames = [\"x\", \"y\", \"label1\", \"label2\", \"label3\"]\n",
    "feature_df = pd.DataFrame(feature_vals, columns=feature_colnames)\n",
    "feature_df[\"task\"] = np.repeat(np.arange(num_tasks), num_instances_per_task)\n",
    "\n",
    "\n",
    "if plot:\n",
    "    for i in range(10):\n",
    "        plt.scatter(x[i*num_instances_per_task:(i+1)*num_instances_per_task], y[i*num_instances_per_task:(i+1)*num_instances_per_task])\n",
    "        plt.show()\n",
    "\n",
    "if save:\n",
    "    save_dfs(feature_df, metadata_df, output_filename_root)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
