{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "79027f14",
   "metadata": {},
   "outputs": [],
   "source": [
    "import librosa\n",
    "import os\n",
    "import logging\n",
    "import numpy\n",
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4bb455c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "logging.basicConfig(level=logging.DEBUG, filename=\"baseline.log\")\n",
    "logger = logging.getLogger(' ')\n",
    "handler = logging.StreamHandler()\n",
    "formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')\n",
    "handler.setFormatter(formatter)\n",
    "logger.addHandler(handler)\n",
    "def file_load(wav_name, mono=False):\n",
    "    \"\"\"\n",
    "    load .wav file.\n",
    "\n",
    "    wav_name : str\n",
    "        target .wav file\n",
    "    sampling_rate : int\n",
    "        audio file sampling_rate\n",
    "    mono : boolean\n",
    "        When load a multi channels file and this param True, the returned data will be merged for mono data\n",
    "\n",
    "    return : numpy.array( float )\n",
    "    \"\"\"\n",
    "    try:\n",
    "        return librosa.load(wav_name, sr=None, mono=mono)\n",
    "    except:\n",
    "        logger.error(\"file_broken or not exists!! : {}\".format(wav_name))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "98071a20",
   "metadata": {},
   "outputs": [],
   "source": [
    "#convert the wave files to the vectors\n",
    "def _img_loader(file_name,\n",
    "                         n_mels=64,\n",
    "                         frames=5,\n",
    "                         n_fft=1024,\n",
    "                         hop_length=512,\n",
    "                         power=2.0):\n",
    "    \"\"\"\n",
    "    convert file_name to a vector array.\n",
    "\n",
    "    file_name : str\n",
    "        target .wav file\n",
    "\n",
    "    return : numpy.array( numpy.array( float ) )\n",
    "        vector array\n",
    "        * dataset.shape = (dataset_size, feature_vector_length)\n",
    "    \"\"\"\n",
    "    # 01 calculate the number of dimensions\n",
    "    dims = n_mels * frames\n",
    "\n",
    "    # 02 generate melspectrogram using librosa\n",
    "    y, sr = file_load(file_name)\n",
    "    mel_spectrogram = librosa.feature.melspectrogram(y=y,\n",
    "                                                     sr=sr,\n",
    "                                                     n_fft=n_fft,\n",
    "                                                     hop_length=hop_length,\n",
    "                                                     n_mels=n_mels,\n",
    "                                                     power=power)\n",
    "\n",
    "    # 03 convert melspectrogram to log mel energy\n",
    "    log_mel_spectrogram = 20.0 / power * numpy.log10(mel_spectrogram + sys.float_info.epsilon)\n",
    "\n",
    "    # 04 calculate total vector size\n",
    "    vector_array_size = len(log_mel_spectrogram[0, :]) - frames + 1\n",
    "\n",
    "    # 05 skip too short clips\n",
    "    if vector_array_size < 1:\n",
    "        return numpy.empty((0, dims))\n",
    "\n",
    "    # 06 generate feature vectors by concatenating multiframes\n",
    "    vector_array = numpy.zeros((vector_array_size, dims))\n",
    "    for t in range(frames):\n",
    "        vector_array[:, n_mels * t: n_mels * (t + 1)] = log_mel_spectrogram[:, t: t + vector_array_size].T\n",
    "\n",
    "    return numpy.array(vector_array, dtype='float32')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "c6f93548",
   "metadata": {},
   "outputs": [],
   "source": [
    "file='C:\\\\gkw\\\\experiments\\\\dataset\\\\fan\\\\anomaly\\\\anomaly_id_00_00000000.wav'\n",
    "img=_img_loader(file,n_mels=64,frames=5,n_fft=1024,hop_length=512,power=2.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "a5ad0fc6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "numpy.ndarray"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(img)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "a1d27e0e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(309, 320)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "numpy.shape(img)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "c9c81480",
   "metadata": {},
   "outputs": [],
   "source": [
    "im=img\n",
    "im=img.reshape(1,img.shape[0],img.shape[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "6b264603",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 309, 320)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "numpy.shape(im)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "71d2042a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "309"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "img.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "ec45bded",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[-17.968945, -16.779787, -23.098763, ..., -50.22804 ,\n",
       "         -49.933784, -51.86027 ],\n",
       "        [-14.07316 , -13.656538, -16.31966 , ..., -46.25612 ,\n",
       "         -48.73854 , -51.363716],\n",
       "        [ -8.643088,  -8.637122, -13.724204, ..., -48.76415 ,\n",
       "         -47.702866, -50.281937],\n",
       "        ...,\n",
       "        [ -6.805975,  -8.433449, -11.873254, ..., -49.922806,\n",
       "         -48.842865, -48.9484  ],\n",
       "        [ -9.982384, -13.013801, -19.881628, ..., -50.446342,\n",
       "         -52.01807 , -52.71633 ],\n",
       "        [-17.953579, -15.628152, -16.120722, ..., -47.86665 ,\n",
       "         -49.879562, -51.732254]]], dtype=float32)"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "im"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "d64f2ace",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_mels=64\n",
    "frames=5\n",
    "n_fft=1024\n",
    "hop_length=512\n",
    "power=2.0\n",
    "y, sr = file_load(file)\n",
    "mel_spectrogram = librosa.feature.melspectrogram(y=y,\n",
    "                                                 sr=sr,\n",
    "                                                 n_fft=n_fft,\n",
    "                                                 hop_length=hop_length,\n",
    "                                                 n_mels=n_mels,\n",
    "                                                 power=power)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "dc908dad",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(64, 313)"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mel_spectrogram.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "634ff914",
   "metadata": {},
   "outputs": [],
   "source": [
    "log_mel_spectrogram = 20.0 / power * numpy.log10(mel_spectrogram + sys.float_info.epsilon)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "6aed3ae9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(64, 313)"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "log_mel_spectrogram.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "7ca8b94e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "313"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(log_mel_spectrogram[0, :])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "840fb810",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
