{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import time\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from claspy.segmentation import BinaryClaSPSegmentation\n",
    "from claspy.data_loader import load_tssb_dataset, load_has_dataset\n",
    "import ruptures as rpt \n",
    "from utilsCPD import *\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_TS_CL = load_has_dataset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n",
      "14\n",
      "7\n",
      "182\n",
      "225\n",
      "19\n",
      "185\n",
      "33\n",
      "36\n",
      "87\n",
      "88\n",
      "210\n",
      "11\n",
      "20\n",
      "23\n",
      "243\n",
      "247\n",
      "91\n",
      "95\n",
      "96\n",
      "100\n",
      "141\n",
      "91\n",
      "95\n",
      "245\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([0.6093832204863007,\n",
       "  0.6370112084403755,\n",
       "  0.6571305605290371,\n",
       "  0.6571305605290371,\n",
       "  0.6571305605290371],\n",
       " [15.782608695652174,\n",
       "  15.043478260869565,\n",
       "  13.956521739130435,\n",
       "  12.565217391304348,\n",
       "  11.652173913043478])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "METRICS = {}\n",
    "METRICS_25 = {}\n",
    "METRICS_50 = {}\n",
    "METRICS_150 = {}\n",
    "METRICS_200 = {}\n",
    "IDs = [10,14,7,182,225,19,185,33,36,87,88,210,11,20,23,243,247,91,95,96,100,141,91,95,245]\n",
    "\n",
    "\n",
    "for id in IDs:\n",
    "    print(id)\n",
    "    METRICS[id]  = {}\n",
    "    METRICS_25[id]  = {}\n",
    "    METRICS_50[id]  = {}\n",
    "    METRICS_150[id]  = {}\n",
    "    METRICS_200[id]  = {}\n",
    "    dataset, window_size, true_cps,labels, time_series = df_TS_CL.iloc[id,:]\n",
    "    algo_c = rpt.KernelCPD(kernel=\"rbf\", min_size=2).fit(\n",
    "        time_series\n",
    "    ) \n",
    "    penalty_value = 10 # beta\n",
    "    cps_KCPA_PELT = algo_c.predict(pen=penalty_value)\n",
    "    cps = np.array(cps_KCPA_PELT)[:-1]\n",
    "    f1,AUC_run,fp = f_measure({'0':true_cps},cps,100)\n",
    "    METRICS[id]['F1'] = f1\n",
    "    METRICS[id]['AUC'] = AUC_run\n",
    "    METRICS[id]['FP'] = fp\n",
    "    METRICS[id]['Covering'] = covering({'0':true_cps},cps,time_series.shape[0])\n",
    "    METRICS[id]['DD'] = detection_delay(true_cps,cps)[1]\n",
    "\n",
    "    f1,AUC_run,fp = f_measure({'0':true_cps},cps,25)\n",
    "    METRICS_25[id]['F1'] = f1\n",
    "    METRICS_25[id]['AUC'] = AUC_run\n",
    "    METRICS_25[id]['FP'] = fp\n",
    "    METRICS_25[id]['Covering'] = covering({'0':true_cps},cps,time_series.shape[0])\n",
    "    METRICS_25[id]['DD'] = detection_delay(true_cps,cps)[1]\n",
    "\n",
    "    f1,AUC_run,fp = f_measure({'0':true_cps},cps,50)\n",
    "    METRICS_50[id]['F1'] = f1\n",
    "    METRICS_50[id]['AUC'] = AUC_run\n",
    "    METRICS_50[id]['FP'] = fp\n",
    "    METRICS_50[id]['Covering'] = covering({'0':true_cps},cps,time_series.shape[0])\n",
    "    METRICS_50[id]['DD'] = detection_delay(true_cps,cps)[1]\n",
    "\n",
    "\n",
    "    f1,AUC_run,fp = f_measure({'0':true_cps},cps,150)\n",
    "    METRICS_150[id]['F1'] = f1\n",
    "    METRICS_150[id]['AUC'] = AUC_run\n",
    "    METRICS_150[id]['FP'] = fp\n",
    "    METRICS_150[id]['Covering'] = covering({'0':true_cps},cps,time_series.shape[0])\n",
    "    METRICS_150[id]['DD'] = detection_delay(true_cps,cps)[1]\n",
    "\n",
    "    f1,AUC_run,fp = f_measure({'0':true_cps},cps,200)\n",
    "    METRICS_200[id]['F1'] = f1\n",
    "    METRICS_200[id]['AUC'] = AUC_run\n",
    "    METRICS_200[id]['FP'] = fp\n",
    "    METRICS_200[id]['Covering'] = covering({'0':true_cps},cps,time_series.shape[0])\n",
    "    METRICS_200[id]['DD'] = detection_delay(true_cps,cps)[1]\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "F1,Covering,AUC,DD,FP = [],[],[],[],[]\n",
    "\n",
    "for k,metrics in METRICS.items():\n",
    "    if len(metrics)>1:\n",
    "        F1.append(metrics['F1'])\n",
    "        Covering.append(metrics['Covering'])\n",
    "        AUC.append(metrics['AUC'])\n",
    "        DD.append(metrics['DD'])\n",
    "        FP.append(metrics['FP'])\n",
    "\n",
    "\n",
    "\n",
    "def AUC_analysis(METRICS_SWD):\n",
    "    f1,cov,AUC,DD,FP = [],[],[],[],[]\n",
    "    for k,metrics in METRICS_SWD.items():\n",
    "        if len(metrics)>1:\n",
    "            f1.append(metrics['F1'])\n",
    "            cov.append(metrics['Covering'])\n",
    "            AUC.append(metrics['AUC'])\n",
    "            DD.append(metrics['DD'])\n",
    "            FP.append(metrics['FP'])\n",
    "\n",
    "    return np.mean(AUC),np.mean(FP)\n",
    "\n",
    "MEAN ,FPs= [], []\n",
    "\n",
    "for res in [METRICS_25,METRICS_50,METRICS,METRICS_150,METRICS_200]:\n",
    "    m,fp =AUC_analysis(res)\n",
    "    MEAN.append(m)\n",
    "    FPs.append(fp)\n",
    "\n",
    "MEAN,FPs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AUC 0.6571305605290371 0.1433144575916974\n",
      "COV 0.585138565707187 0.3198593082526968\n",
      "DD 334.4526572522555 428.20860009809405 0.0 1540.7948717948718\n",
      "FP 13.956521739130435 14.50180876535471 0 47\n"
     ]
    }
   ],
   "source": [
    "print('AUC',np.mean(AUC),np.std(AUC))\n",
    "print('COV',np.mean(Covering),np.std(Covering))\n",
    "print('DD',np.mean(DD),np.std(DD),np.min(DD),np.max(DD))\n",
    "print('FP',np.mean(FP),np.std(FP),np.min(FP),np.max(FP))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 153  827  919 1454 1527 2269 2358 2896 2960 3806 4107 6654 6742 7035\n",
      " 7213 7364 7492 8086]\n",
      "COV 0.6407256925917354\n",
      "AUC 0.5182186234817814\n",
      "FP 11\n",
      "DD 77.22222222222223\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[11, 11, 11, 8, 5]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Occupancy = pd.read_csv('./datasets/Occupancy/Occupancy.csv').iloc[:,1:]\n",
    "algo_c = rpt.KernelCPD(kernel=\"rbf\", min_size=2).fit(\n",
    "        Occupancy.to_numpy()\n",
    "    ) \n",
    "penalty_value = 50 # beta\n",
    "cps_KCPA_PELT = algo_c.predict(pen=penalty_value)\n",
    "cps = np.array(cps_KCPA_PELT)[:-1]\n",
    "print(cps)\n",
    "GroundTruth = [15,831,1455,2271,2895,3743,4271,5183,5759,6655,7215,8095]\n",
    "f1,AUC_run,fp = f_measure({'0':GroundTruth},cps,30)\n",
    "print('COV',covering({'0':GroundTruth},cps,Occupancy.shape[0]))\n",
    "print('AUC',AUC_run)\n",
    "print('FP',fp)\n",
    "print('DD',detection_delay(GroundTruth,cps)[1])\n",
    "\n",
    "\n",
    "\n",
    "KCPA_AUCS = [f_measure({'0':GroundTruth},cps,tau)[2] for tau in [10,30,50,75,100]]\n",
    "\n",
    "KCPA_AUCS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import zipfile\n",
    "import json\n",
    "\n",
    "zip_file_path = './datasets/MNISTSeq.zip'\n",
    "\n",
    "with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:\n",
    "    zip_contents = zip_ref.namelist()\n",
    "    json_file_name = zip_contents[0]\n",
    "    with zip_ref.open(json_file_name) as json_file:\n",
    "        data = json.load(json_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[200 400]\n",
      "[  4 200 374 381 400]\n",
      "[200 400]\n",
      "[  4 200 287 400]\n",
      "[200 400]\n",
      "[200 287 400 600]\n",
      "[200 400 404 600 774 781]\n",
      "[ 87 200 204 400 600]\n",
      "[200 287 400 600]\n",
      "[  4 200 287 400 600]\n",
      "[200 204 400 600 687 800]\n",
      "[174 181 200 400 600 800]\n",
      "[200 400 600 800]\n",
      "[200 204 400 600 800]\n",
      "[200 400 600 800]\n",
      "AUC 0.8955555555555555 0.08489407684399873\n",
      "COV 0.9463222222222224 0.05303691819250082\n",
      "DD 21.716666666666672 0.0 70.75\n",
      "FP 0.6666666666666666 0 2\n",
      "RT 0.03258961041768392 0.008632984247586958\n"
     ]
    }
   ],
   "source": [
    "\n",
    "F1,Covering,AUC,DD,FP,RT = [],[],[],[],[],[]\n",
    "F1_5 = []\n",
    "F1_10 = []\n",
    "F1_15 = []\n",
    "F1_30 = []\n",
    "for id, k in enumerate(data.keys()):\n",
    "    ground_truths = data[k]['target']\n",
    "    ts = data[k]['data']\n",
    "    n = ground_truths[-1] + 200\n",
    "    algo_c = rpt.KernelCPD(kernel=\"rbf\", min_size=1).fit(\n",
    "        np.array(ts)\n",
    "    ) \n",
    "    penalty_value = 1# beta\n",
    "    start_time = time.time()\n",
    "    cps_KCPA_PELT = algo_c.predict(pen=penalty_value)\n",
    "    cps = np.array(cps_KCPA_PELT)[:-1]\n",
    "    print(cps)\n",
    "    f1, AUC_score,fp = f_measure({'0':ground_truths},cps,20)\n",
    "    covering_score = covering({'0':ground_truths},cps,n)\n",
    "    delay = detection_delay(ground_truths,cps)[1]\n",
    "    end_time = time.time()\n",
    "    RT.append(end_time-start_time)\n",
    "    F1.append(f1)\n",
    "    AUC.append(AUC_score)\n",
    "    Covering.append(covering_score)\n",
    "    DD.append(delay)\n",
    "    FP.append(fp)\n",
    "\n",
    "    f1, AUC_score,fp = f_measure({'0':ground_truths},cps,5)\n",
    "    F1_5.append(fp)\n",
    "\n",
    "    f1, AUC_score,fp = f_measure({'0':ground_truths},cps,10)\n",
    "    F1_10.append(fp)\n",
    "\n",
    "    f1, AUC_score,fp = f_measure({'0':ground_truths},cps,15)\n",
    "    F1_15.append(fp)\n",
    "\n",
    "    f1, AUC_score,fp = f_measure({'0':ground_truths},cps,30)\n",
    "    F1_30.append(fp)\n",
    "\n",
    "\n",
    "print('AUC',np.mean(AUC),np.std(AUC))\n",
    "print('COV',np.mean(Covering),np.std(Covering))\n",
    "print('DD',np.mean(DD),np.min(DD),np.max(DD))\n",
    "print('FP',np.mean(FP),np.min(FP),np.max(FP))\n",
    "print('RT',np.mean(RT),np.std(RT))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.8, 0.8, 0.8, 0.6666666666666666, 0.5333333333333333]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[np.mean(f) for f in [F1_5,F1_10,F1_15,FP,F1_30]]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "CodePaper",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
