{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[],"dockerImageVersionId":30698,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"#@title Imports\n%reset -f\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\nfrom tqdm import tqdm\nimport copy\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom mpl_toolkits.axes_grid1 import make_axes_locatable\nfrom itertools import product as cartesian_prod\nfrom sklearn.metrics import pairwise_distances\n\nfrom sklearn import tree\nfrom sklearn import cluster, mixture\nimport zipfile\nimport shutil\nimport urllib.request\nimport numpy as np\nimport pandas as pd\nfrom sklearn.preprocessing import StandardScaler, LabelEncoder\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import train_test_split, GridSearchCV\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.svm import SVC\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.neighbors import NearestCentroid\nfrom scipy.io import arff\n\n\nnp.set_printoptions(precision=4)\n\n\n#@title Importing Packages\nimport os\nimport random\nfrom copy import deepcopy\nimport torchvision\nimport torchvision.transforms as transforms","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2024-04-22T22:36:14.963637Z","iopub.execute_input":"2024-04-22T22:36:14.964076Z","iopub.status.idle":"2024-04-22T22:36:24.571821Z","shell.execute_reply.started":"2024-04-22T22:36:14.964042Z","shell.execute_reply":"2024-04-22T22:36:24.570670Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"code","source":"DATA_NAME =\"syn\"","metadata":{"execution":{"iopub.status.busy":"2024-04-22T22:36:27.733566Z","iopub.execute_input":"2024-04-22T22:36:27.734177Z","iopub.status.idle":"2024-04-22T22:36:27.740687Z","shell.execute_reply.started":"2024-04-22T22:36:27.734141Z","shell.execute_reply":"2024-04-22T22:36:27.739453Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\nprint(device)","metadata":{"execution":{"iopub.status.busy":"2024-04-22T22:36:27.939110Z","iopub.execute_input":"2024-04-22T22:36:27.939507Z","iopub.status.idle":"2024-04-22T22:36:27.946246Z","shell.execute_reply.started":"2024-04-22T22:36:27.939478Z","shell.execute_reply":"2024-04-22T22:36:27.944938Z"},"trusted":true},"execution_count":3,"outputs":[{"name":"stdout","text":"cpu\n","output_type":"stream"}]},{"cell_type":"code","source":"!git clone https://github.com/anonymousgithub09/pygln.git\n%cd pygln\n!pip install -e .","metadata":{"execution":{"iopub.status.busy":"2024-04-22T22:36:32.041098Z","iopub.execute_input":"2024-04-22T22:36:32.041487Z","iopub.status.idle":"2024-04-22T22:36:56.562648Z","shell.execute_reply.started":"2024-04-22T22:36:32.041457Z","shell.execute_reply":"2024-04-22T22:36:56.561571Z"},"trusted":true},"execution_count":4,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/pty.py:89: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n  pid, fd = os.forkpty()\n","output_type":"stream"},{"name":"stdout","text":"Cloning into 'pygln'...\nremote: Enumerating objects: 102, done.\u001b[K\nremote: Counting objects: 100% (102/102), done.\u001b[K\nremote: Compressing objects: 100% (69/69), done.\u001b[K\nremote: Total 102 (delta 35), reused 88 (delta 29), pack-reused 0\u001b[K\nReceiving objects: 100% (102/102), 653.31 KiB | 11.26 MiB/s, done.\nResolving deltas: 100% (35/35), done.\n/kaggle/working/pygln\nObtaining file:///kaggle/working/pygln\n  Preparing metadata (setup.py) ... \u001b[?25ldone\n\u001b[?25hRequirement already satisfied: jax in /opt/conda/lib/python3.10/site-packages (from PyGLN==0.1.0) (0.4.26)\nRequirement already satisfied: jaxlib in /opt/conda/lib/python3.10/site-packages (from PyGLN==0.1.0) (0.4.26)\nCollecting jupyter (from PyGLN==0.1.0)\n  Downloading jupyter-1.0.0-py2.py3-none-any.whl.metadata (995 bytes)\nRequirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from PyGLN==0.1.0) (1.26.4)\nRequirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from PyGLN==0.1.0) (2.2.2)\nRequirement already satisfied: scikit-learn in /opt/conda/lib/python3.10/site-packages (from PyGLN==0.1.0) (1.2.2)\nRequirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from PyGLN==0.1.0) (1.11.4)\nRequirement already satisfied: tensorflow in /opt/conda/lib/python3.10/site-packages (from PyGLN==0.1.0) (2.15.0)\nRequirement already satisfied: torch in /opt/conda/lib/python3.10/site-packages (from PyGLN==0.1.0) (2.1.2+cpu)\nRequirement already satisfied: torchvision in /opt/conda/lib/python3.10/site-packages (from PyGLN==0.1.0) (0.16.2+cpu)\nRequirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from PyGLN==0.1.0) (4.66.1)\nRequirement already satisfied: ml-dtypes>=0.2.0 in /opt/conda/lib/python3.10/site-packages (from jax->PyGLN==0.1.0) (0.2.0)\nRequirement already satisfied: opt-einsum in /opt/conda/lib/python3.10/site-packages (from jax->PyGLN==0.1.0) (3.3.0)\nRequirement already satisfied: notebook in /opt/conda/lib/python3.10/site-packages (from jupyter->PyGLN==0.1.0) (6.5.4)\nRequirement already satisfied: qtconsole in /opt/conda/lib/python3.10/site-packages (from jupyter->PyGLN==0.1.0) (5.5.1)\nRequirement already satisfied: jupyter-console in /opt/conda/lib/python3.10/site-packages (from jupyter->PyGLN==0.1.0) (6.6.3)\nRequirement already satisfied: nbconvert in /opt/conda/lib/python3.10/site-packages (from jupyter->PyGLN==0.1.0) (6.4.5)\nRequirement already satisfied: ipykernel in /opt/conda/lib/python3.10/site-packages (from jupyter->PyGLN==0.1.0) (6.28.0)\nRequirement already satisfied: ipywidgets in /opt/conda/lib/python3.10/site-packages (from jupyter->PyGLN==0.1.0) (7.7.1)\nRequirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->PyGLN==0.1.0) (2.9.0.post0)\nRequirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->PyGLN==0.1.0) (2023.3.post1)\nRequirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.10/site-packages (from pandas->PyGLN==0.1.0) (2023.4)\nRequirement already satisfied: joblib>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->PyGLN==0.1.0) (1.4.0)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->PyGLN==0.1.0) (3.2.0)\nRequirement already satisfied: absl-py>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (1.4.0)\nRequirement already satisfied: astunparse>=1.6.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (1.6.3)\nRequirement already satisfied: flatbuffers>=23.5.26 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (23.5.26)\nRequirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (0.5.4)\nRequirement already satisfied: google-pasta>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (0.2.0)\nRequirement already satisfied: h5py>=2.9.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (3.10.0)\nRequirement already satisfied: libclang>=13.0.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (16.0.6)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (21.3)\nRequirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (3.20.3)\nRequirement already satisfied: setuptools in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (69.0.3)\nRequirement already satisfied: six>=1.12.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (1.16.0)\nRequirement already satisfied: termcolor>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (2.4.0)\nRequirement already satisfied: typing-extensions>=3.6.6 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (4.9.0)\nRequirement already satisfied: wrapt<1.15,>=1.11.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (1.14.1)\nRequirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (0.35.0)\nRequirement already satisfied: grpcio<2.0,>=1.24.3 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (1.60.0)\nRequirement already satisfied: tensorboard<2.16,>=2.15 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (2.15.1)\nRequirement already satisfied: tensorflow-estimator<2.16,>=2.15.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow->PyGLN==0.1.0) (2.15.0)\nCollecting keras<2.16,>=2.15.0 (from tensorflow->PyGLN==0.1.0)\n  Downloading keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch->PyGLN==0.1.0) (3.13.1)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch->PyGLN==0.1.0) (1.12)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch->PyGLN==0.1.0) (3.2.1)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch->PyGLN==0.1.0) (3.1.2)\nRequirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch->PyGLN==0.1.0) (2024.2.0)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from torchvision->PyGLN==0.1.0) (2.31.0)\nRequirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/conda/lib/python3.10/site-packages (from torchvision->PyGLN==0.1.0) (9.5.0)\nRequirement already satisfied: wheel<1.0,>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from astunparse>=1.6.0->tensorflow->PyGLN==0.1.0) (0.42.0)\nRequirement already satisfied: google-auth<3,>=1.6.3 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (2.26.1)\nRequirement already satisfied: google-auth-oauthlib<2,>=0.5 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (1.2.0)\nRequirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (3.5.2)\nRequirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (0.7.2)\nRequirement already satisfied: werkzeug>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (3.0.2)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->torchvision->PyGLN==0.1.0) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->torchvision->PyGLN==0.1.0) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->torchvision->PyGLN==0.1.0) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->torchvision->PyGLN==0.1.0) (2024.2.2)\nRequirement already satisfied: comm>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (0.2.1)\nRequirement already satisfied: debugpy>=1.6.5 in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (1.8.0)\nRequirement already satisfied: ipython>=7.23.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (8.20.0)\nRequirement already satisfied: jupyter-client>=6.1.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (7.4.9)\nRequirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (5.7.1)\nRequirement already satisfied: matplotlib-inline>=0.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (0.1.6)\nRequirement already satisfied: nest-asyncio in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (1.5.8)\nRequirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (5.9.3)\nRequirement already satisfied: pyzmq>=24 in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (24.0.1)\nRequirement already satisfied: tornado>=6.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (6.3.3)\nRequirement already satisfied: traitlets>=5.4.0 in /opt/conda/lib/python3.10/site-packages (from ipykernel->jupyter->PyGLN==0.1.0) (5.9.0)\nRequirement already satisfied: ipython-genutils~=0.2.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets->jupyter->PyGLN==0.1.0) (0.2.0)\nRequirement already satisfied: widgetsnbextension~=3.6.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets->jupyter->PyGLN==0.1.0) (3.6.6)\nRequirement already satisfied: jupyterlab-widgets>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets->jupyter->PyGLN==0.1.0) (3.0.9)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch->PyGLN==0.1.0) (2.1.3)\nRequirement already satisfied: prompt-toolkit>=3.0.30 in /opt/conda/lib/python3.10/site-packages (from jupyter-console->jupyter->PyGLN==0.1.0) (3.0.42)\nRequirement already satisfied: pygments in /opt/conda/lib/python3.10/site-packages (from jupyter-console->jupyter->PyGLN==0.1.0) (2.17.2)\nRequirement already satisfied: mistune<2,>=0.8.1 in /opt/conda/lib/python3.10/site-packages (from nbconvert->jupyter->PyGLN==0.1.0) (0.8.4)\nRequirement already satisfied: jupyterlab-pygments in /opt/conda/lib/python3.10/site-packages (from nbconvert->jupyter->PyGLN==0.1.0) (0.3.0)\nRequirement already satisfied: nbformat>=4.4 in /opt/conda/lib/python3.10/site-packages (from nbconvert->jupyter->PyGLN==0.1.0) (5.9.2)\nRequirement already satisfied: entrypoints>=0.2.2 in /opt/conda/lib/python3.10/site-packages (from nbconvert->jupyter->PyGLN==0.1.0) (0.4)\nRequirement already satisfied: bleach in /opt/conda/lib/python3.10/site-packages (from nbconvert->jupyter->PyGLN==0.1.0) (6.1.0)\nRequirement already satisfied: pandocfilters>=1.4.1 in /opt/conda/lib/python3.10/site-packages (from nbconvert->jupyter->PyGLN==0.1.0) (1.5.0)\nRequirement already satisfied: testpath in /opt/conda/lib/python3.10/site-packages (from nbconvert->jupyter->PyGLN==0.1.0) (0.6.0)\nRequirement already satisfied: defusedxml in /opt/conda/lib/python3.10/site-packages (from nbconvert->jupyter->PyGLN==0.1.0) (0.7.1)\nRequirement already satisfied: beautifulsoup4 in /opt/conda/lib/python3.10/site-packages (from nbconvert->jupyter->PyGLN==0.1.0) (4.12.2)\nRequirement already satisfied: nbclient<0.6.0,>=0.5.0 in /opt/conda/lib/python3.10/site-packages (from nbconvert->jupyter->PyGLN==0.1.0) (0.5.13)\nRequirement already satisfied: argon2-cffi in /opt/conda/lib/python3.10/site-packages (from notebook->jupyter->PyGLN==0.1.0) (23.1.0)\nRequirement already satisfied: Send2Trash>=1.8.0 in /opt/conda/lib/python3.10/site-packages (from notebook->jupyter->PyGLN==0.1.0) (1.8.2)\nRequirement already satisfied: terminado>=0.8.3 in /opt/conda/lib/python3.10/site-packages (from notebook->jupyter->PyGLN==0.1.0) (0.18.0)\nRequirement already satisfied: prometheus-client in /opt/conda/lib/python3.10/site-packages (from notebook->jupyter->PyGLN==0.1.0) (0.19.0)\nRequirement already satisfied: nbclassic>=0.4.7 in /opt/conda/lib/python3.10/site-packages (from notebook->jupyter->PyGLN==0.1.0) (1.0.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging->tensorflow->PyGLN==0.1.0) (3.1.1)\nRequirement already satisfied: qtpy>=2.4.0 in /opt/conda/lib/python3.10/site-packages (from qtconsole->jupyter->PyGLN==0.1.0) (2.4.1)\nRequirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch->PyGLN==0.1.0) (1.3.0)\nRequirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (4.2.4)\nRequirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (0.3.0)\nRequirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (4.9)\nRequirement already satisfied: requests-oauthlib>=0.7.0 in /opt/conda/lib/python3.10/site-packages (from google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (1.3.1)\nRequirement already satisfied: decorator in /opt/conda/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter->PyGLN==0.1.0) (5.1.1)\nRequirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter->PyGLN==0.1.0) (0.19.1)\nRequirement already satisfied: stack-data in /opt/conda/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter->PyGLN==0.1.0) (0.6.2)\nRequirement already satisfied: exceptiongroup in /opt/conda/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter->PyGLN==0.1.0) (1.2.0)\nRequirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter->PyGLN==0.1.0) (4.8.0)\nRequirement already satisfied: platformdirs>=2.5 in /opt/conda/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->jupyter->PyGLN==0.1.0) (4.2.0)\nRequirement already satisfied: jupyter-server>=1.8 in /opt/conda/lib/python3.10/site-packages (from nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (2.12.5)\nRequirement already satisfied: notebook-shim>=0.2.3 in /opt/conda/lib/python3.10/site-packages (from nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (0.2.3)\nRequirement already satisfied: fastjsonschema in /opt/conda/lib/python3.10/site-packages (from nbformat>=4.4->nbconvert->jupyter->PyGLN==0.1.0) (2.19.1)\nRequirement already satisfied: jsonschema>=2.6 in /opt/conda/lib/python3.10/site-packages (from nbformat>=4.4->nbconvert->jupyter->PyGLN==0.1.0) (4.20.0)\nRequirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prompt-toolkit>=3.0.30->jupyter-console->jupyter->PyGLN==0.1.0) (0.2.13)\nRequirement already satisfied: ptyprocess in /opt/conda/lib/python3.10/site-packages (from terminado>=0.8.3->notebook->jupyter->PyGLN==0.1.0) (0.7.0)\nRequirement already satisfied: argon2-cffi-bindings in /opt/conda/lib/python3.10/site-packages (from argon2-cffi->notebook->jupyter->PyGLN==0.1.0) (21.2.0)\nRequirement already satisfied: soupsieve>1.2 in /opt/conda/lib/python3.10/site-packages (from beautifulsoup4->nbconvert->jupyter->PyGLN==0.1.0) (2.5)\nRequirement already satisfied: webencodings in /opt/conda/lib/python3.10/site-packages (from bleach->nbconvert->jupyter->PyGLN==0.1.0) (0.5.1)\nRequirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/conda/lib/python3.10/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyter->PyGLN==0.1.0) (0.8.3)\nRequirement already satisfied: attrs>=22.2.0 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat>=4.4->nbconvert->jupyter->PyGLN==0.1.0) (23.2.0)\nRequirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat>=4.4->nbconvert->jupyter->PyGLN==0.1.0) (2023.12.1)\nRequirement already satisfied: referencing>=0.28.4 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat>=4.4->nbconvert->jupyter->PyGLN==0.1.0) (0.32.1)\nRequirement already satisfied: rpds-py>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat>=4.4->nbconvert->jupyter->PyGLN==0.1.0) (0.16.2)\nRequirement already satisfied: anyio>=3.1.0 in /opt/conda/lib/python3.10/site-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (4.2.0)\nRequirement already satisfied: jupyter-events>=0.9.0 in /opt/conda/lib/python3.10/site-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (0.9.0)\nRequirement already satisfied: jupyter-server-terminals in /opt/conda/lib/python3.10/site-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (0.5.1)\nRequirement already satisfied: overrides in /opt/conda/lib/python3.10/site-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (7.4.0)\nRequirement already satisfied: websocket-client in /opt/conda/lib/python3.10/site-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (1.7.0)\nRequirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /opt/conda/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (0.5.1)\nRequirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.10/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow->PyGLN==0.1.0) (3.2.2)\nRequirement already satisfied: cffi>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from argon2-cffi-bindings->argon2-cffi->notebook->jupyter->PyGLN==0.1.0) (1.16.0)\nRequirement already satisfied: executing>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyter->PyGLN==0.1.0) (2.0.1)\nRequirement already satisfied: asttokens>=2.1.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyter->PyGLN==0.1.0) (2.4.1)\nRequirement already satisfied: pure-eval in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyter->PyGLN==0.1.0) (0.2.2)\nRequirement already satisfied: sniffio>=1.1 in /opt/conda/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (1.3.0)\nRequirement already satisfied: pycparser in /opt/conda/lib/python3.10/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook->jupyter->PyGLN==0.1.0) (2.21)\nRequirement already satisfied: python-json-logger>=2.0.4 in /opt/conda/lib/python3.10/site-packages (from jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (2.0.7)\nRequirement already satisfied: pyyaml>=5.3 in /opt/conda/lib/python3.10/site-packages (from jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (6.0.1)\nRequirement already satisfied: rfc3339-validator in /opt/conda/lib/python3.10/site-packages (from jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (0.1.4)\nRequirement already satisfied: rfc3986-validator>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (0.1.1)\nRequirement already satisfied: fqdn in /opt/conda/lib/python3.10/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (1.5.1)\nRequirement already satisfied: isoduration in /opt/conda/lib/python3.10/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (20.11.0)\nRequirement already satisfied: jsonpointer>1.13 in /opt/conda/lib/python3.10/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (2.4)\nRequirement already satisfied: uri-template in /opt/conda/lib/python3.10/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (1.3.0)\nRequirement already satisfied: webcolors>=1.11 in /opt/conda/lib/python3.10/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (1.13)\nRequirement already satisfied: arrow>=0.15.0 in /opt/conda/lib/python3.10/site-packages (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (1.3.0)\nRequirement already satisfied: types-python-dateutil>=2.8.10 in /opt/conda/lib/python3.10/site-packages (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook->jupyter->PyGLN==0.1.0) (2.8.19.20240106)\nDownloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)\nDownloading keras-2.15.0-py3-none-any.whl (1.7 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n\u001b[?25hInstalling collected packages: keras, jupyter, PyGLN\n  Attempting uninstall: keras\n    Found existing installation: keras 3.2.1\n    Uninstalling keras-3.2.1:\n      Successfully uninstalled keras-3.2.1\n  Running setup.py develop for PyGLN\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ntensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed PyGLN-0.1.0 jupyter-1.0.0 keras-2.15.0\n","output_type":"stream"}]},{"cell_type":"code","source":"if DATA_NAME ==\"syn\":\n    #@title Synthetic data\n    def set_npseed(seed):\n        np.random.seed(seed)\n\n\n    def set_torchseed(seed):\n        torch.manual_seed(seed)\n        torch.cuda.manual_seed(seed)\n        torch.cuda.manual_seed_all(seed)\n        torch.backends.cudnn.deterministic = True\n        torch.backends.cudnn.benchmark = False\n\n\n    #classification data\n\n    def data_gen_decision_tree(num_data=1000, dim=2, seed=0, w_list=None, b_list=None,vals=None, num_levels=2):        \n        set_npseed(seed=seed)\n\n        # Construct a complete decision tree with 2**num_levels-1 internal nodes,\n        # e.g. num_levels=2 means there are 3 internal nodes.\n        # w_list, b_list is a list of size equal to num_internal_nodes\n        # vals is a list of size equal to num_leaf_nodes, with values +1 or 0\n        num_internal_nodes = 2**num_levels - 1\n        num_leaf_nodes = 2**num_levels\n        stats = np.zeros(num_internal_nodes+num_leaf_nodes) #stores the num of datapoints at each node so at 0(root) all data points will be present\n\n        if vals is None: #when val i.e., labels are not provided make the labels dynamically\n            vals = np.arange(0,num_internal_nodes+num_leaf_nodes,1,dtype=np.int32)%2 #assign 0 or 1 label to the node based on whether its numbering is even or odd\n            vals[:num_internal_nodes] = -99 #we put -99 to the internal nodes as only the values of leaf nodes are counted\n\n        if w_list is None: #if the w values of the nodes (hyperplane eqn) are not provided then generate dynamically\n            w_list = np.random.standard_normal((num_internal_nodes, dim))\n            w_list = w_list/np.linalg.norm(w_list, axis=1)[:, None] #unit norm w vects\n            b_list = np.zeros((num_internal_nodes))\n\n        '''\n        np.random.random_sample\n        ========================\n        Return random floats in the half-open interval [0.0, 1.0).\n\n        Results are from the \"continuous uniform\" distribution over the\n        stated interval.  To sample :math:`Unif[a, b), b > a` multiply\n        the output of `random_sample` by `(b-a)` and add `a`::\n\n            (b - a) * random_sample() + a\n        '''\n\n    #     data_x = np.random.random_sample((num_data, dim))*2 - 1. #generate the datas in range -1 to +1\n    #     relevant_stats = data_x @ w_list.T + b_list #stores the x.wT+b value of each nodes for all data points(num_data x num_nodes) to check if > 0 i.e will follow right sub tree route or <0 and will follow left sub tree route\n    #     curr_index = np.zeros(shape=(num_data), dtype=int) #stores the curr index for each data point from root to leaf. So initially a datapoint starts from root but then it can go to right or left if it goes to right its curr index will become 2 from 0 else 1 from 0 then in next iteration from say 2 it goes to right then it will become 6\n\n        data_x = np.random.standard_normal((num_data, dim))\n        data_x /= np.sqrt(np.sum(data_x**2, axis=1, keepdims=True))\n        relevant_stats = data_x @ w_list.T + b_list\n        curr_index = np.zeros(shape=(num_data), dtype=int)\n\n        for level in range(num_levels):\n            nodes_curr_level=list(range(2**level - 1,2**(level+1)-1  ))\n            for el in nodes_curr_level:\n    #             b_list[el]=-1*np.median(relevant_stats[curr_index==el,el])\n                relevant_stats[:,el] += b_list[el]\n            decision_variable = np.choose(curr_index, relevant_stats.T) #based on the curr index will choose the corresponding node value of the datapoint\n\n            # Go down and right if wx+b>0 down and left otherwise.\n            # i.e. 0 -> 1 if w[0]x+b[0]<0 and 0->2 otherwise\n            curr_index = (curr_index+1)*2 - (1-(decision_variable > 0)) #update curr index based on the desc_variable\n\n\n        bound_dist = np.min(np.abs(relevant_stats), axis=1) #finds the abs value of the minm node value of a datapoint. If some node value of a datapoint is 0 then that data point exactly passes through a hyperplane and we remove all such datapoints\n        thres = threshold\n        labels = vals[curr_index] #finally labels for each datapoint is assigned after traversing the whole tree\n\n        data_x_pruned = data_x[bound_dist>thres] #to distingush the hyperplanes seperately for 0 1 labels (classification)\n        #removes all the datapoints that passes through a node hyperplane\n        labels_pruned = labels[bound_dist>thres]\n        relevant_stats = np.sign(data_x_pruned @ w_list.T + b_list) #storing only +1 or -1 for a particular node if it is active or not\n        nodes_active = np.zeros((len(data_x_pruned),  num_internal_nodes+num_leaf_nodes), dtype=np.int32) #stores node actv or not for a data\n\n        for node in range(num_internal_nodes+num_leaf_nodes):\n            if node==0:\n                stats[node]=len(relevant_stats) #for root node all datapoints are present\n                nodes_active[:,0]=1 #root node all data points active status is +1\n                continue\n            parent = (node-1)//2\n            nodes_active[:,node]=nodes_active[:,parent]\n            right_child = node-(parent*2)-1 # 0 means left, 1 means right 1 has children 3,4\n            #finds if it is a right child or left of the parent\n            if right_child==1:\n                nodes_active[:,node] *= relevant_stats[:,parent]>0 #if parent node val was >0 then this right child of parent is active\n            if right_child==0:\n                nodes_active[:,node] *= relevant_stats[:,parent]<0 #else left is active\n            stats = nodes_active.sum(axis=0) #updates the status i.e., no of datapoints active in that node (root has all active then gradually divided in left right)\n        return ((data_x_pruned, labels_pruned), (w_list, b_list, vals), stats)\n\n    # Define dictionaries\n    ##Data creation\n    seed=365\n    num_levels=4\n    threshold = 0 #data seperation distance\n    input_dim= 500\n    num_data= 100000\n\n    print(f\"Running code for input_dim={input_dim}, num_data={num_data}\")\n\n    ((data_x, labels), (w_list, b_list, vals), stats) = data_gen_decision_tree(\n                                                dim=input_dim, seed=seed, num_levels=num_levels,\n                                                num_data=num_data)\n    seed_set=seed\n    w_list_old = np.array(w_list)\n    b_list_old = np.array(b_list)\n    print(sum(labels==1))\n    print(sum(labels==0))\n    print(\"Seed= \",seed_set)\n    num_data = len(data_x)\n    num_train= num_data//2\n    num_vali = num_data//4\n    num_test = num_data//4\n    train_data = data_x[:num_train,:]\n    train_data_labels = labels[:num_train]\n\n    vali_data = data_x[num_train:num_train+num_vali,:]\n    vali_data_labels = labels[num_train:num_train+num_vali]\n\n    test_data = data_x[num_train+num_vali :,:]\n    test_data_labels = labels[num_train+num_vali :]\n    \n#     n_polytopes_list = [100]\n#     m_list = [100]\n#     for n_polytopes in n_polytopes_list:\n#         for m in m_list:\n#             print(f\"Running code for n_polytopes={n_polytopes}, m={m}\")\n\n    X_train = train_data\n    X_test = test_data\n\n    y_train = train_data_labels.astype(int)\n    y_test = test_data_labels.astype(int)\n    from pygln import GLN\n\n    model_3 = GLN(backend='numpy', layer_sizes=[10, 10, 10, 1], input_size=X_train.shape[1],context_map_size=4,learning_rate=2.5e-4)\n    batch_size = 100\n#     for n in range(X_train.shape[0]):\n#         pred = model_3.predict(X_train[n:n+1], target=y_train[n:n+1])\n    for epoch in range(10):\n        for n in range(np.ceil(X_train.shape[0] / batch_size).astype(int)):\n            batch_x = X_train[n * batch_size: (n + 1) * batch_size]\n            batch_y = y_train[n * batch_size: (n + 1) * batch_size]\n            pred = model_3.predict(batch_x,batch_y)\n        print(epoch)\n    print(\"Training done----------------------------------------\")\n    preds = []\n    \n    for n in range(np.ceil(X_test.shape[0] / batch_size).astype(int)):\n        batch = X_test[n * batch_size: (n + 1) * batch_size]\n        pred = model_3.predict(batch)\n        preds.append(pred)\n\n\n    print(accuracy_score(y_test, np.concatenate(preds, axis=0)))","metadata":{"execution":{"iopub.status.busy":"2024-04-23T01:54:04.860827Z","iopub.execute_input":"2024-04-23T01:54:04.862414Z","iopub.status.idle":"2024-04-23T01:56:59.645462Z","shell.execute_reply.started":"2024-04-23T01:54:04.862340Z","shell.execute_reply":"2024-04-23T01:56:59.644056Z"},"trusted":true},"execution_count":29,"outputs":[{"name":"stdout","text":"Running code for input_dim=500, num_data=100000\n48718\n51282\nSeed=  365\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTraining done----------------------------------------\n0.59608\n","output_type":"stream"}]},{"cell_type":"code","source":"if DATA_NAME ==\"UCI\":\n    import random\n    import requests\n    import os\n    from tqdm import tqdm\n    import numpy as np\n    import gzip\n    import shutil\n    import tarfile\n    import bz2\n    import pandas as pd\n    import gzip\n    import shutil\n    import warnings\n\n    from pathlib import Path\n    from sklearn.datasets import load_svmlight_file\n    from sklearn.model_selection import train_test_split\n    from sklearn.datasets import make_swiss_roll\n    from sklearn.preprocessing import QuantileTransformer\n\n    from category_encoders import LeaveOneOutEncoder\n    from category_encoders.ordinal import OrdinalEncoder\n    import os\n    import zipfile\n    import shutil\n    import urllib.request\n    import numpy as np\n    import pandas as pd\n    from sklearn.preprocessing import StandardScaler, LabelEncoder\n    from sklearn.metrics import accuracy_score\n    from sklearn.model_selection import train_test_split, GridSearchCV\n    from sklearn.linear_model import LogisticRegression\n    from sklearn.svm import SVC\n    from sklearn.naive_bayes import GaussianNB\n    from sklearn.neighbors import KNeighborsClassifier\n    from sklearn.neural_network import MLPClassifier\n    from sklearn.tree import DecisionTreeClassifier\n    from sklearn.neighbors import NearestCentroid\n    from scipy.io import arff\n    !pip install xlrd\n\n\n    def preprocess_data_adult(data_path):\n    # Read the data into a DataFrame\n        columns = [\n            \"age\", \"workclass\", \"fnlwgt\", \"education\", \"education-num\", \"marital-status\",\n            \"occupation\", \"relationship\", \"race\", \"sex\", \"capital-gain\", \"capital-loss\",\n            \"hours-per-week\", \"native-country\", \"income\"\n        ]\n        df = pd.read_csv(data_path, names=columns, na_values=[\" ?\"])\n\n        # Drop rows with missing values\n        df.dropna(inplace=True)\n\n        # Convert categorical features using Label Encoding\n        categorical_columns = [\"workclass\", \"education\", \"marital-status\", \"occupation\", \"relationship\", \"race\", \"sex\", \"native-country\"]\n        label_encoders = {}\n        for col in categorical_columns:\n            le = LabelEncoder()\n            df[col] = le.fit_transform(df[col])\n            label_encoders[col] = le\n\n        # Encode the target variable\n        df[\"income\"] = df[\"income\"].apply(lambda x: 1 if x == \" >50K\" else 0)\n\n        return df\n\n    def preprocess_data_bank_marketing(data):\n        # Convert categorical features using Label Encoding\n        label_encoders = {}\n        for col in data.select_dtypes(include=['object']).columns:\n            le = LabelEncoder()\n            data[col] = le.fit_transform(data[col])\n            label_encoders[col] = le\n\n        return data\n\n    def preprocess_data_credit_card_defaults(data):\n        # Convert categorical features using one-hot encoding\n        data = pd.get_dummies(data, columns=[\"SEX\", \"EDUCATION\", \"MARRIAGE\"], drop_first=True)\n\n        # Standardize numerical features\n        scaler = StandardScaler()\n        data[[\"LIMIT_BAL\", \"AGE\", \"PAY_0\", \"PAY_2\", \"PAY_3\", \"PAY_4\", \"PAY_5\", \"PAY_6\", \"BILL_AMT1\",\n            \"BILL_AMT2\", \"BILL_AMT3\", \"BILL_AMT4\", \"BILL_AMT5\", \"BILL_AMT6\", \"PAY_AMT1\", \"PAY_AMT2\",\n            \"PAY_AMT3\", \"PAY_AMT4\", \"PAY_AMT5\", \"PAY_AMT6\"]] = scaler.fit_transform(\n            data[[\"LIMIT_BAL\", \"AGE\", \"PAY_0\", \"PAY_2\", \"PAY_3\", \"PAY_4\", \"PAY_5\", \"PAY_6\", \"BILL_AMT1\",\n                \"BILL_AMT2\", \"BILL_AMT3\", \"BILL_AMT4\", \"BILL_AMT5\", \"BILL_AMT6\", \"PAY_AMT1\", \"PAY_AMT2\",\n                \"PAY_AMT3\", \"PAY_AMT4\", \"PAY_AMT5\", \"PAY_AMT6\"]])\n\n        return data\n    \n\n    def fetch_ADULT(data_dir=\"./ADULT_DATA\"):\n        print(\"---------------------ADULT--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n            \n        # URL of the dataset zip file\n        url = \"https://archive.ics.uci.edu/static/public/2/adult.zip\"\n        zip_file_path = os.path.join(data_dir, \"adult.zip\")\n        # Download the zip file\n        urllib.request.urlretrieve(url, zip_file_path)\n        # Extract the zip file\n        with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n            zip_ref.extractall(data_dir)\n\n        # Preprocess the data\n        train_data_path = os.path.join(data_dir, \"adult.data\")\n    #     test_data_path = os.path.join(data_dir, \"adult.test\")\n        df_train = preprocess_data_adult(train_data_path)\n    #     df_test = preprocess_data_adult(test_data_path)\n\n        # Split the data into train, validation, and test sets\n        X = df_train.drop(\"income\", axis=1)\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df_train[\"income\"]\n        \n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n        \n    #     X_test = df_test.drop(\"income\", axis=1)\n    #     y_test = df_test[\"income\"]\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n        # Remove the zip file\n        os.remove(zip_file_path)\n\n        # Remove the extracted directory and its contents using shutil.rmtree()\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train, X_valid=X_val.astype('float32'), y_valid=y_val, X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n    def fetch_bank_marketing(data_dir=\"./BANK\"):\n        print(\"---------------------BANK--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        # URL of the dataset zip file\n        url = \"https://archive.ics.uci.edu/static/public/222/bank+marketing.zip\"\n        zip_file_path = os.path.join(data_dir, \"bank_marketing.zip\")\n\n        # Download the zip file\n        urllib.request.urlretrieve(url, zip_file_path)\n\n        # Extract the zip file\n        with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n            zip_ref.extractall(data_dir)\n        \n        zip_file_path_bank_add = os.path.join(data_dir, \"bank-additional.zip\")\n        with zipfile.ZipFile(zip_file_path_bank_add, \"r\") as zip_ref:\n            zip_ref.extractall(data_dir)\n\n        # Get the extracted directory path\n        extracted_dir = os.path.join(data_dir, \"bank-additional\")\n\n        # Read the dataset\n        data = pd.read_csv(os.path.join(extracted_dir, \"bank-additional-full.csv\"), sep=';')\n\n        # Preprocess the data\n        data = preprocess_data_bank_marketing(data)\n\n        # Split the data into train, validation, and test sets\n        X = data.drop(\"y\", axis=1)\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = data[\"y\"]\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n        \n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n        # Remove the zip file\n        os.remove(zip_file_path)\n\n        # Remove the extracted directory and its contents\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,X_test=X_test.astype('float32'), y_test=y_test, X_valid = X_val.astype('float32'), y_valid = y_val\n        )\n\n    def fetch_credit_card_defaults(data_dir=\"./CREDIT\"):\n        print(\"---------------------CREDIT--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        # URL of the dataset zip file\n        url = \"https://archive.ics.uci.edu/static/public/350/default+of+credit+card+clients.zip\"\n        zip_file_path = os.path.join(data_dir, \"credit_card_defaults.zip\")\n\n        # Download the zip file\n        urllib.request.urlretrieve(url, zip_file_path)\n\n        # Extract the zip file\n        with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n            zip_ref.extractall(data_dir)\n\n    #     # Get the extracted directory path\n    #     extracted_dir = os.path.join(data_dir, \"default+of+credit+card+clients\")\n\n        # Read the dataset\n        data = pd.read_excel(os.path.join(data_dir, \"default of credit card clients.xls\"), skiprows=1)\n\n        # Preprocess the data\n        data = preprocess_data_credit_card_defaults(data)\n\n        # Split the data into train, validation, and test sets\n        X = data.drop(\"default payment next month\", axis=1)\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = data[\"default payment next month\"]\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n        \n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the zip file\n        os.remove(zip_file_path)\n\n        # Remove the extracted directory and its contents\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train, X_valid=X_val.astype('float32'), y_valid=y_val , X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_gamma_telescope(data_dir=\"./TELESCOPE\"):\n        print(\"---------------------TELESCOPE--------------------------------------\")\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        # URL of the dataset zip file\n        url = \"https://archive.ics.uci.edu/static/public/159/magic+gamma+telescope.zip\"\n        zip_file_path = os.path.join(data_dir, \"magic_gamma_telescope.zip\")\n\n        # Download the zip file\n        urllib.request.urlretrieve(url, zip_file_path)\n\n        # Extract the zip file\n        with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n            zip_ref.extractall(data_dir)\n        \n        # Load the data from CSV\n        data_path = os.path.join(data_dir, \"magic04.data\")\n        columns = [\n            \"fLength\", \"fWidth\", \"fSize\", \"fConc\", \"fConc1\", \"fAsym\", \"fM3Long\",\n            \"fM3Trans\", \"fAlpha\", \"fDist\", \"class\"\n        ]\n        data = pd.read_csv(data_path, header=None, names=columns)\n        \n        # Convert the class labels to binary format (g = gamma, h = hadron)\n        data[\"class\"] = data[\"class\"].map({\"g\": 1, \"h\": 0})\n        \n        # Split the data into features (X) and target (y)\n        X = data.drop(\"class\", axis=1)\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = data[\"class\"]\n        \n        # Split the data into train, test, and validation sets\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n        X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n        \n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')\n        \n        # Create a dictionary to store the data splits\n        data_splits = {\n            \"X_train\": X_train.astype('float32'), \"y_train\": y_train,\n            \"X_valid\": X_valid.astype('float32'), \"y_valid\": y_valid,\n            \"X_test\": X_test.astype('float32'), \"y_test\": y_test\n        }\n        \n        # Remove the zip file\n        os.remove(zip_file_path)\n\n        # Remove the extracted directory and its contents\n        shutil.rmtree(data_dir)\n        \n        return data_splits\n\n    def fetch_rice_dataset(data_dir=\"./RICE\"):\n        print(\"---------------------RICE--------------------------------------\")\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        # URL of the dataset zip file\n        url = \"https://archive.ics.uci.edu/static/public/545/rice+cammeo+and+osmancik.zip\"\n        zip_file_path = os.path.join(data_dir, \"rice_dataset.zip\")\n\n        # Download the zip file\n        urllib.request.urlretrieve(url, zip_file_path)\n\n        # Extract the zip file\n        with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n            zip_ref.extractall(data_dir)\n            \n        # Load the data from CSV\n        arff_file_name = os.path.join(data_dir, \"Rice_Cammeo_Osmancik.arff\")\n\n        \n        # Load the ARFF file using SciPy\n        data, meta = arff.loadarff(arff_file_name)\n        \n        df = pd.DataFrame(data)\n        df[\"Class\"] = df[\"Class\"].map({b'Cammeo': 1, b'Osmancik': 0})\n        \n        # Split the data into features (X) and target (y)\n        X = df.drop(\"Class\", axis=1)\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[\"Class\"]\n        \n        # Split the data into train, test, and validation sets\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n        X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n        \n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')\n        \n        # Create a dictionary to store the data splits\n        data_splits = {\n            \"X_train\": X_train.astype('float32'), \"y_train\": y_train,\n            \"X_valid\": X_valid.astype('float32'), \"y_valid\": y_valid,\n            \"X_test\": X_test.astype('float32'), \"y_test\": y_test\n        }\n        \n        # Remove the zip file\n        os.remove(zip_file_path)\n\n        # Remove the extracted directory and its contents\n        shutil.rmtree(data_dir)\n        \n        return data_splits\n\n    def fetch_german_credit_data(data_dir=\"./GERMAN\"):\n        print(\"---------------------GERMAN--------------------------------------\")\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        # URL of the dataset zip file\n        url = \"http://archive.ics.uci.edu/static/public/144/statlog+german+credit+data.zip\"\n        zip_file_path = os.path.join(data_dir, \"german_credit_data.zip\")\n\n        # Download the zip file\n        urllib.request.urlretrieve(url, zip_file_path)\n\n        # Extract the zip file\n        with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n            zip_ref.extractall(data_dir)\n            \n        # Load the data from CSV\n        data_path = os.path.join(data_dir, \"german.data\")\n\n        columns = [\n            \"checking_account_status\", \"duration_months\", \"credit_history\", \"purpose\",\n            \"credit_amount\", \"savings_account_bonds\", \"employment\", \"installment_rate\",\n            \"personal_status_sex\", \"other_debtors_guarantors\", \"present_residence\",\n            \"property\", \"age\", \"other_installment_plans\", \"housing\", \"existing_credits\",\n            \"job\", \"num_dependents\", \"own_telephone\", \"foreign_worker\", \"class\"\n        ]\n        data = pd.read_csv(data_path, sep=' ', header=None, names=columns)\n        \n        # Convert the class labels to binary format (1 = Good, 2 = Bad)\n        data[\"class\"] = data[\"class\"].map({1: 1, 2: 0})\n        \n        # Handle null values (replace with appropriate values)\n        data.fillna(method='ffill', inplace=True)  # Forward fill\n        \n        # Convert categorical variables to dummy variables\n        categorical_columns = [\n            \"checking_account_status\", \"credit_history\", \"purpose\", \"savings_account_bonds\",\n            \"employment\", \"personal_status_sex\", \"other_debtors_guarantors\", \"property\",\n            \"other_installment_plans\", \"housing\", \"job\", \"own_telephone\", \"foreign_worker\"\n        ]\n        data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)\n        \n        # Split the data into features (X) and target (y)\n        X = data.drop(\"class\", axis=1)\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = data[\"class\"]\n        \n        # Split the data into train, test, and validation sets\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n        X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n        \n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')\n        \n        # Create a dictionary to store the data splits\n        data_splits = {\n            \"X_train\": X_train.astype('float32'), \"y_train\": y_train,\n            \"X_valid\": X_valid.astype('float32'), \"y_valid\": y_valid,\n            \"X_test\": X_test.astype('float32'), \"y_test\": y_test\n        }\n        \n        # Remove the zip file\n        os.remove(zip_file_path)\n\n        # Remove the extracted directory and its contents\n        shutil.rmtree(data_dir)\n        \n        return data_splits\n\n    def fetch_spambase_dataset(data_dir=\"./SPAM\"):\n        print(\"---------------------SPAM--------------------------------------\")\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        # URL of the dataset zip file\n        url = \"http://archive.ics.uci.edu/static/public/94/spambase.zip\"\n        zip_file_path = os.path.join(data_dir, \"spambase.zip\")\n\n        # Download the zip file\n        urllib.request.urlretrieve(url, zip_file_path)\n\n        # Extract the zip file\n        with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n            zip_ref.extractall(data_dir)\n            \n        # Load the data from CSV\n        data_path = os.path.join(data_dir, \"spambase.data\")\n\n        columns = [\n            f\"f{i}\" for i in range(57)\n        ] + [\"spam\"]\n        data = pd.read_csv(data_path, header=None, names=columns)\n        \n        # Split the data into features (X) and target (y)\n        X = data.drop(\"spam\", axis=1)\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = data[\"spam\"]\n        \n        # Split the data into train, test, and validation sets\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n        X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n        \n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')\n        \n        # Create a dictionary to store the data splits\n        data_splits = {\n            \"X_train\": X_train.astype('float32'), \"y_train\": y_train,\n            \"X_valid\": X_valid.astype('float32'), \"y_valid\": y_valid,\n            \"X_test\": X_test.astype('float32'), \"y_test\": y_test\n        }\n        \n        # Remove the zip file\n        os.remove(zip_file_path)\n\n        # Remove the extracted directory and its contents\n        shutil.rmtree(data_dir)\n        \n        return data_splits\n\n    def fetch_accelerometer_gyro_dataset(data_dir=\"./GYRO\"):\n        print(\"---------------------GYRO--------------------------------------\")\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        # URL of the dataset zip file\n        url = \"https://archive.ics.uci.edu/static/public/755/accelerometer+gyro+mobile+phone+dataset.zip\"\n        zip_file_path = os.path.join(data_dir, \"accelerometer_gyro_dataset.zip\")\n\n        # Download the zip file\n        urllib.request.urlretrieve(url, zip_file_path)\n\n        # Extract the zip file\n        with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n            zip_ref.extractall(data_dir)\n            \n        # Load the data from CSV\n        data_path = os.path.join(data_dir, \"accelerometer_gyro_mobile_phone_dataset.csv\")\n        \n        data = pd.read_csv(data_path)\n        \n        # Convert categorical column to numeric (e.g., label encoding)\n        data[\"timestamp\"] = data[\"timestamp\"].astype(\"category\").cat.codes\n        \n        # Split the data into features (X) and target (y)\n        X = data.drop(\"Activity\", axis=1)\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = data[\"Activity\"]\n        \n        # Split the data into train, test, and validation sets\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n        X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n        \n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')\n        \n        # Create a dictionary to store the data splits\n        data_splits = {\n            \"X_train\": X_train.astype('float32'), \"y_train\": y_train,\n            \"X_valid\": X_valid.astype('float32'), \"y_valid\": y_valid,\n            \"X_test\": X_test.astype('float32'), \"y_test\": y_test\n        }\n        \n        # Remove the zip file\n        os.remove(zip_file_path)\n\n        # Remove the extracted directory and its contents\n        shutil.rmtree(data_dir)\n        \n        return data_splits\n\n    def fetch_swarm_behaviour(data_dir=\"./SWARM\"):\n        print(\"---------------------SWARM--------------------------------------\")\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        # URL of the dataset zip file\n        url = \"https://archive.ics.uci.edu/static/public/524/swarm+behaviour.zip\"\n        zip_file_path = os.path.join(data_dir, \"swarm_behaviour.zip\")\n\n        # Download the zip file\n        urllib.request.urlretrieve(url, zip_file_path)\n\n        # Extract the zip file\n        with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n            zip_ref.extractall(data_dir)\n            \n        # Load the data from CSV\n        data_path = os.path.join(data_dir, \"Swarm Behavior Data/Grouped.csv\")\n        \n        data = pd.read_csv(data_path)\n        \n        # Split the data into features (X) and target (y)\n        X = data.drop(\"Class\", axis=1)\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = data[\"Class\"]\n        \n        # Split the data into train, test, and validation sets\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n        X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n        \n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_valid = (y_valid.values.reshape(-1) == 1).astype('int64')\n        \n        # Create a dictionary to store the data splits\n        data_splits = {\n            \"X_train\": X_train.astype('float32'), \"y_train\": y_train,\n            \"X_valid\": X_valid.astype('float32'), \"y_valid\": y_valid,\n            \"X_test\": X_test.astype('float32'), \"y_test\": y_test\n        }\n        \n        # Remove the zip file\n        os.remove(zip_file_path)\n        # Remove the extracted directory and its contents\n        shutil.rmtree(data_dir) \n        return data_splits\n\n    def fetch_openml_credit_data(data_dir=\"./OpenML_Credit\"):\n        print(\"---------------------OpenML_Credit DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22103185/credit.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"credit.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n\n        df[last_column] = df[last_column].astype(int)\n        \n    #     print(\"df\",df)\n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_electricity_data(data_dir=\"./OpenML_Electricity\"):\n        print(\"---------------------OpenML_Electricity DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22103245/electricity.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"electricity.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n\n        df[last_column] = df[last_column].map({b'DOWN': 0, b'UP': 1})\n        \n    #     print(\"df\",df)\n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_covertype_data(data_dir=\"./OpenML_Covertype\"):\n        print(\"---------------------OpenML_Covertype DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22103246/covertype.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"covertype.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n\n        df[last_column] = df[last_column].astype(int)\n        \n    #     print(\"df\",df)\n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_pol_data(data_dir=\"./OpenML_Pol\"):\n        print(\"---------------------OpenML_Pol DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22103247/pol.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"pol.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n\n    #     print(\"df\",df)\n        \n        df[last_column] = df[last_column].map({b'N':0,b'P':1})\n        \n        \n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_house_16H_data(data_dir=\"./OpenML_House_16H\"):\n        print(\"---------------------OpenML_House_16H DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22103248/house_16H.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"house_16H.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n\n    #     print(\"df\",df)\n        df[last_column] = df[last_column].map({b'N':0,b'P':1})\n        \n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_MiniBooNE_data(data_dir=\"./OpenML_MiniBooNE\"):\n        print(\"---------------------OpenML_MiniBooNE DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22103253/MiniBooNE.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"MiniBooNE.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n\n    #     print(\"df\",df)\n        \n        df[last_column] = df[last_column].map({b'False':0,b'True':1})\n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_eye_movements_data(data_dir=\"./OpenML_Eye_movements\"):\n        print(\"---------------------OpenML_Eye_movements DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22103255/eye_movements.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"eye_movements.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n\n    #     print(\"df\",df)\n        df[last_column] = df[last_column].astype(int)\n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_Diabetes130US_data(data_dir=\"./OpenML_Diabetes130US\"):\n        print(\"---------------------OpenML_Diabetes130US DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22111908/Diabetes130US.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"Diabetes130US.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n    #     print(\"df\",df)\n        df[last_column] = df[last_column].astype(int)\n        \n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_jannis_data(data_dir=\"./OpenML_Jannis\"):\n        print(\"---------------------OpenML_Jannis DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22111907/jannis.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"jannis.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n    #     print(\"df\",df)\n\n        df[last_column] = df[last_column].astype(int)\n\n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_Bioresponse_data(data_dir=\"./OpenML_Bioresponse\"):\n        print(\"---------------------OpenML_Bioresponse DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22111905/Bioresponse.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"Bioresponse.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n    #     print(\"df\",df)\n\n        df[last_column] = df[last_column].astype(int)\n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_california_data(data_dir=\"./OpenML_California\"):\n        print(\"---------------------OpenML_California DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22111914/california.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"california.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n    #     print(\"df\",df)\n\n        df[last_column] = df[last_column].astype(int)\n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n\n\n    def fetch_openml_heloc_data(data_dir=\"./OpenML_Heloc\"):\n        print(\"---------------------OpenML_Heloc DATASET--------------------------------------\")\n        # Create the data directory if it doesn't exist\n        if not os.path.exists(data_dir):\n            os.makedirs(data_dir)\n\n        data_url = \"https://api.openml.org/data/v1/download/22111912/heloc.arff\"\n        # Download the ARFF file\n        arff_file_path = os.path.join(data_dir, \"heloc.arff\")\n        urllib.request.urlretrieve(data_url, arff_file_path)\n\n        # Load ARFF file into DataFrame\n        data, meta = arff.loadarff(arff_file_path)\n        df = pd.DataFrame(data)\n        # Convert target variable to int\n        last_column = df.columns[-1]\n    #     print(\"df\",df)\n\n        df[last_column] = df[last_column].astype(int)\n\n        # Split the data into train, validation, and test sets\n        X = df.drop(last_column, axis=1)  # Assuming \"SeriousDlqin2yrs\" is the target variable\n        scaler = StandardScaler()\n        X = scaler.fit_transform(X)\n        y = df[last_column]\n\n        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)\n        X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.3, random_state=42)\n\n    #     y_train = y_train.astype('int64')\n    #     y_test = y_test.astype('int64')\n    #     y_val = y_val.astype('int64')\n\n        y_train = (y_train.values.reshape(-1) == 1).astype('int64')\n        y_test = (y_test.values.reshape(-1) == 1).astype('int64')\n        y_val = (y_val.values.reshape(-1) == 1).astype('int64')\n\n        # Remove the ARFF file\n        os.remove(arff_file_path)\n\n        # Remove the data directory\n        shutil.rmtree(data_dir)\n\n        return dict(\n            X_train=X_train.astype('float32'), y_train=y_train,\n            X_valid=X_val.astype('float32'), y_valid=y_val,\n            X_test=X_test.astype('float32'), y_test=y_test\n        )\n    \n    REAL_DATASETS = {\n        ####### 10 latest UCI datasets ########\n        'ADULT': fetch_ADULT,\n        'bank_marketing': fetch_bank_marketing,\n        'credit_card_defaults': fetch_credit_card_defaults,\n        'gamma_telescope': fetch_gamma_telescope,\n        'rice_dataset': fetch_rice_dataset,\n        'german_credit_data': fetch_german_credit_data,\n        'spambase_dataset': fetch_spambase_dataset,\n        'accelerometer_gyro_dataset': fetch_accelerometer_gyro_dataset,\n        'swarm_behaviour': fetch_swarm_behaviour,\n        ######## OpenML Tabular Datasets ##########\n        'OpenML_Credit': fetch_openml_credit_data,\n        'OpenML_Electricity': fetch_openml_electricity_data,\n        'OpenML_Covertype': fetch_openml_covertype_data,\n        'OpenML_Pol': fetch_openml_pol_data,\n        'OpenML_House_16H': fetch_openml_house_16H_data,\n        'OpenML_MiniBooNE': fetch_openml_MiniBooNE_data,\n        'OpenML_Eye_movements': fetch_openml_eye_movements_data,\n        'OpenML_Diabetes130US': fetch_openml_Diabetes130US_data,\n        'OpenML_Jannis': fetch_openml_jannis_data,\n        'OpenML_Bioresponse': fetch_openml_Bioresponse_data,\n        'OpenML_California': fetch_openml_california_data,\n        'OpenML_Heloc': fetch_openml_heloc_data\n    }\n\n    class Dataset:\n        def __init__(self, dataset, data_path='./DATA', normalize=False, normalize_target=False, quantile_transform=False, quantile_noise=1e-3, in_features=None, out_features=None, flatten=False, **kwargs):\n            \"\"\"\n            Dataset is a dataclass that contains all training and evaluation data required for an experiment\n            :param dataset: a pre-defined dataset name (see DATASETS) or a custom dataset\n                Your dataset should be at (or will be downloaded into) {data_path}/{dataset}\n            :param data_path: a shared data folder path where the dataset is stored (or will be downloaded into)\n            :param normalize: standardize features by removing the mean and scaling to unit variance\n            :param quantile_transform: whether tranform the feature distributions into normals, using a quantile transform\n            :param quantile_noise: magnitude of the quantile noise\n            :param in_features: which features to use as inputs\n            :param out_features: which features to reconstruct as output\n            :param flatten: whether flattening instances to vectors\n            :param kwargs: depending on the dataset, you may select train size, test size or other params\n            \"\"\"\n\n            if dataset in REAL_DATASETS:\n                data_dict = REAL_DATASETS[dataset](Path(data_path) / dataset, **kwargs)\n\n                self.X_train = data_dict['X_train']\n                self.y_train = data_dict['y_train']\n                self.X_valid = data_dict['X_valid']\n                self.y_valid = data_dict['y_valid']\n                self.X_test = data_dict['X_test']\n                self.y_test = data_dict['y_test']\n\n                if flatten:\n                    self.X_train, self.X_valid, self.X_test = self.X_train.reshape(len(self.X_train), -1), self.X_valid.reshape(len(self.X_valid), -1), self.X_test.reshape(len(self.X_test), -1)\n\n                if normalize:\n\n                    print(\"Normalize dataset\")\n                    axis = [0] + [i + 2 for i in range(self.X_train.ndim - 2)]\n                    self.mean = np.mean(self.X_train, axis=tuple(axis), dtype=np.float32)\n                    self.std = np.std(self.X_train, axis=tuple(axis), dtype=np.float32)\n\n                    # if constants, set std to 1\n                    self.std[self.std == 0.] = 1.\n\n                    if dataset not in ['ALOI']:\n                        self.X_train = (self.X_train - self.mean) / self.std\n                        self.X_valid = (self.X_valid - self.mean) / self.std\n                        self.X_test = (self.X_test - self.mean) / self.std\n\n                if quantile_transform:\n                    quantile_train = np.copy(self.X_train)\n                    if quantile_noise:\n                        stds = np.std(quantile_train, axis=0, keepdims=True)\n                        noise_std = quantile_noise / np.maximum(stds, quantile_noise)\n                        quantile_train += noise_std * np.random.randn(*quantile_train.shape)\n\n                    qt = QuantileTransformer(output_distribution='normal').fit(quantile_train)\n                    self.X_train = qt.transform(self.X_train)\n                    self.X_valid = qt.transform(self.X_valid)\n                    self.X_test = qt.transform(self.X_test)\n\n                if normalize_target:\n\n                    print(\"Normalize target value\")\n                    self.mean_y = np.mean(self.y_train, axis=0, dtype=np.float32)\n                    self.std_y = np.std(self.y_train, axis=0, dtype=np.float32)\n\n                    # if constants, set std to 1\n                    if self.std_y == 0.:\n                        self.std_y = 1.\n\n                    self.y_train = (self.y_train - self.mean_y) / self.std_y\n                    self.y_valid = (self.y_valid - self.mean_y) / self.std_y\n                    self.y_test = (self.y_test - self.mean_y) / self.std_y\n\n                if in_features is not None:\n                    self.X_train_in, self.X_valid_in, self.X_test_in = self.X_train[:, in_features], self.X_valid[:, in_features], self.X_test[:, in_features]\n\n                if out_features is not None:\n                    self.X_train_out, self.X_valid_out, self.X_test_out = self.X_train[:, out_features], self.X_valid[:, out_features], self.X_test[:, out_features]\n\n            elif dataset in TOY_DATASETS:\n                data_dict = toy_dataset(distr=dataset, **kwargs)\n\n                self.X = data_dict['X']\n                self.Y = data_dict['Y']\n                if 'labels' in data_dict:\n                    self.labels = data_dict['labels']\n\n            self.data_path = data_path\n            self.dataset = dataset\n\n#     n_polytopes_list = [2,5,10,12]\n#     m_list = [2,5,10,12]\n#     for n_polytopes in n_polytopes_list:\n#         for m in m_list:\n#             print(f\"Running code for n_polytopes={n_polytopes}, m={m}\")\n\n    # DATA_NAME_UCI=[\"ADULT\",\"bank_marketing\",\"credit_card_defaults\",\"gamma_telescope\",\"rice_dataset\",\"german_credit_data\",\"spambase_dataset\",\"accelerometer_gyro_dataset\",\"swarm_behaviour\"]#,\"HIGGS\"]\n    # DATA_NAME_UCI=[\"OpenML_Credit\",\"OpenML_Electricity\",\"OpenML_Pol\",\"OpenML_House_16H\",\"OpenML_MiniBooNE\",\"OpenML_Eye_movements\",\"OpenML_Diabetes130US\",\"OpenML_Jannis\",\"OpenML_Bioresponse\",\"OpenML_California\",\"OpenML_Heloc\"]#\",\"OpenML_Covertype\"\"]#,\"bank_marketing\",\"credit_card_defaults\",\"gamma_telescope\",\"rice_dataset\",\"german_credit_data\",\"spambase_dataset\",\"accelerometer_gyro_dataset\",\"swarm_behaviour\"]#,\"HIGGS\"]\n\n#     DATA_NAME_UCI=[\"OpenML_Covertype\"]\n    DATA_NAME_UCI=[\"ADULT\"]\n#             DATA_NAME_UCI=[\"ADULT\",\"bank_marketing\",\"credit_card_defaults\",\"gamma_telescope\",\"rice_dataset\",\"german_credit_data\",\"spambase_dataset\",\"accelerometer_gyro_dataset\",\"swarm_behaviour\",\"OpenML_Credit\",\"OpenML_Electricity\",\"OpenML_Pol\",\"OpenML_House_16H\",\"OpenML_MiniBooNE\",\"OpenML_Eye_movements\",\"OpenML_Diabetes130US\",\"OpenML_Jannis\",\"OpenML_Bioresponse\",\"OpenML_California\",\"OpenML_Heloc\"]#\",\"OpenML_Covertype\"\"]#,\"bank_marketing\",\"credit_card_defaults\",\"gamma_telescope\",\"rice_dataset\",\"german_credit_data\",\"spambase_dataset\",\"accelerometer_gyro_dataset\",\"swarm_behaviour\"]#,\"HIGGS\"]\n\n    for data_name in DATA_NAME_UCI:\n        data = Dataset(data_name)\n        X_train = data.X_train\n        X_test = data.X_test\n\n        y_train = data.y_train\n        y_test = data.y_test\n\n#         dnn = DisjunctiveNormalNetwork(n_polytopes=n_polytopes, m=m)\n#         dnn.fit(X_train, y_train)\n\n#         y_pred = dnn.predict(X_test)\n#         print(accuracy_score(y_test, y_pred))\n        \n#         X_train = train_data\n#         X_test = test_data\n\n#         y_train = train_data_labels.astype(int)\n#         y_test = test_data_labels.astype(int)\n        from pygln import GLN\n\n        model_3 = GLN(backend='numpy', layer_sizes=[10, 10, 10, 1], input_size=X_train.shape[1])\n        batch_size = 100\n    #     for n in range(X_train.shape[0]):\n    #         pred = model_3.predict(X_train[n:n+1], target=y_train[n:n+1])\n        for epoch in range(100):\n            for n in range(np.ceil(X_train.shape[0] / batch_size).astype(int)):\n                batch_x = X_train[n * batch_size: (n + 1) * batch_size]\n                batch_y = y_train[n * batch_size: (n + 1) * batch_size]\n                pred = model_3.predict(batch_x,batch_y)\n            print(epoch)\n        print(\"Training done----------------------------------------\")\n        preds = []\n\n        for n in range(np.ceil(X_test.shape[0] / batch_size).astype(int)):\n            batch = X_test[n * batch_size: (n + 1) * batch_size]\n            pred = model_3.predict(batch)\n            preds.append(pred)\n\n\n        print(accuracy_score(y_test, np.concatenate(preds, axis=0)))\n","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}