{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e17df356-981b-49a4-9725-b605b4322a97",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import numpy as np\n",
    "sys.path.append(\"../config/\")\n",
    "import config_path\n",
    "import dataset_info\n",
    "sys.path.append(\"../methods/emmix\")\n",
    "import sp_tensor\n",
    "from tabulate import tabulate\n",
    "\n",
    "def load_data_real(dataset_name, tvt=\"train\", normalize=True, check_empty=False):\n",
    "    assert tvt in [\"train\", \"valid\", \"test\"], \"tvt need to be train/valid/test\"\n",
    "    \n",
    "    if not(dataset_name in dataset_info.real_datasets_list):\n",
    "        error_message = f\"please chose one of exsiting real datasets name {dataset_info.real_datasets_list}\"\n",
    "        raise NameError(error_message)\n",
    "    \n",
    "    coords_path = os.path.join(config_path.data_repo_real, dataset_name,f\"X_{tvt}_coords.npy\")\n",
    "    values_path = os.path.join(config_path.data_repo_real, dataset_name,f\"X_{tvt}_values.npy\")\n",
    "    \n",
    "    coords = np.load(coords_path)\n",
    "    values = np.load(values_path)\n",
    "\n",
    "    tensor_size = dataset_info.tensor_sizes[dataset_name]\n",
    "\n",
    "    T = sp_tensor.Sp_tensor(coords, values, tensor_size, normalize=normalize, check_empty=False)\n",
    "    return T\n",
    "\n",
    "def show_dataset_detail():\n",
    "    nnz   = dict()\n",
    "    shape = dict()\n",
    "    sizes = dict()\n",
    "    dims  = dict()\n",
    "    datas = []\n",
    "    for dataset_name in sorted(dataset_info.real_datasets_list):\n",
    "        T = load_data_real(dataset_name)\n",
    "        data = dict()\n",
    "\n",
    "        data[\"Name\"] = dataset_name\n",
    "        data[\"Tensor dim\"] = T.tensor_dim\n",
    "        data[\"NNZ\"] = T.nnz\n",
    "        data[\"Tensor size\"] = np.prod(T.tensor_size)\n",
    "        data[\"sparsity\"] =  T.nnz / np.prod(T.tensor_size)\n",
    "        data[\"class\"] = T.tensor_size[-1]\n",
    "        #data[\"Tensor shape\"] = T.tensor_size\n",
    "        datas.append(data)\n",
    "        \n",
    "    print(tabulate(datas, headers='keys', floatfmt=[\"g\", \"g\", \"g\", \".2e\", \".2e\", \"g\"]))\n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "816c2659-76b0-424b-acff-f36b518b1718",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(os.getcwd())\n",
    "os.chdir('../')\n",
    "print(os.getcwd())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cdb45339-65ee-4a3a-abb4-6b0954f5c08b",
   "metadata": {},
   "outputs": [],
   "source": [
    "show_dataset_detail()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
