{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e329ed08",
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy.io as scio\n",
    "import numpy as np\n",
    "import time\n",
    "import pandas as pd\n",
    "from OT_CPD.OtSingleDimStatLib import *\n",
    "import time\n",
    "from utilsCPD import *\n",
    "from OT_CPD.WassersteinChangePointDetectionLib import w2SampClustering "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "ec938d50",
   "metadata": {},
   "outputs": [],
   "source": [
    "window = 14\n",
    "stride = 1\n",
    "w2ConvFilter = scio.loadmat(\"OT_CPD/TwoSampConvFilter.mat\")[\"filter2\"].flatten()\n",
    "w2ConvFilter = w2ConvFilter[0::int(np.ceil(len(w2ConvFilter)/(2*window)))]-0.166\n",
    "w2ConvFilter = w2ConvFilter / np.sum(w2ConvFilter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "8f0103b1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'F1 & AUC & FP': (0.4000000000000001, 0.40723981900452494, 11), 'DD': 129.6875, 'Covering': 0.7380780308864959}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\Sven Jacob\\miniconda3\\envs\\CodePaper\\Lib\\site-packages\\sklearn\\cluster\\_kmeans.py:1429: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.\n",
      "  warnings.warn(\n",
      "c:\\Users\\Sven Jacob\\miniconda3\\envs\\CodePaper\\Lib\\site-packages\\sklearn\\base.py:1473: ConvergenceWarning: Number of distinct clusters (2) found smaller than n_clusters (11). Possibly due to duplicate points in X.\n",
      "  return fit_method(estimator, *args, **kwargs)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[13, 11, 10, 9, 8]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pathlib\n",
    "import os\n",
    "str(pathlib.Path(os.getcwd()).parent)\n",
    "\n",
    "file_name = 'Occupancy.csv'\n",
    "\n",
    "file_path = os.path.normpath(os.path.join(os.getcwd(),\"datasets/Occupancy\",file_name))\n",
    "\n",
    "Occupancy = pd.read_csv(file_path).iloc[:,1:]\n",
    "\n",
    "Targets = pd.read_csv(os.path.normpath(os.path.join(os.getcwd(),\"datasets/Occupancy/OccupancyTargets.csv\"))).iloc[:,1:]\n",
    "\n",
    "GroundTruth = list(Targets.to_numpy().flatten())\n",
    "\n",
    "Occupancy.head()\n",
    "\n",
    "\n",
    "data = Occupancy.to_numpy()\n",
    "\n",
    "N_obs = len(data)\n",
    "\n",
    "## run with several window sizes\n",
    "\n",
    "\n",
    "#window_size = [200,500,750,1000]\n",
    "\n",
    "window_size = [750]\n",
    "METRICS_OCCUPANCY = {}\n",
    "\n",
    "for window in window_size:\n",
    "    METRICS_OCCUPANCY[str(window)] = {}\n",
    "    res  = w2SampClustering(data,'occupancy',window,stride,11,w2ConvFilter)\n",
    "    cp_pred = res[2]\n",
    "    METRICS_OCCUPANCY[str(window)]['F1 & AUC & FP'] = f_measure({'0':GroundTruth},list(cp_pred),margin=30)\n",
    "    METRICS_OCCUPANCY[str(window)]['DD'] = detection_delay(GroundTruth,cp_pred)[1]\n",
    "    METRICS_OCCUPANCY[str(window)]['Covering'] = covering({'0':GroundTruth},cp_pred,n_obs=N_obs)\n",
    "\n",
    "    print(METRICS_OCCUPANCY[str(window)])\n",
    "\n",
    "\n",
    "\n",
    "OT_Occ_AUCs = [f_measure({'0':GroundTruth},list(cp_pred),margin=tau)[2] for tau in [10,30,50,75,100]]\n",
    "OT_Occ_AUCs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "81f02096",
   "metadata": {},
   "outputs": [],
   "source": [
    "import zipfile\n",
    "import json\n",
    "\n",
    "zip_file_path = './datasets/MNISTSeq.zip'\n",
    "\n",
    "with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:\n",
    "    zip_contents = zip_ref.namelist()\n",
    "    json_file_name = zip_contents[0]\n",
    "    with zip_ref.open(json_file_name) as json_file:\n",
    "        data = json.load(json_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d08f1171",
   "metadata": {},
   "outputs": [],
   "source": [
    "F1,Covering,AUC,DD,FP,RT = [],[],[],[],[],[]\n",
    "\n",
    "\n",
    "window  =150\n",
    "stride = 1\n",
    "\n",
    "AUC_5,AUC_10,AUC_15,AUC_30 = [],[],[],[]\n",
    "FP_5,FP_10,FP_15,FP_30 = [],[],[],[]\n",
    "Cov_5,Cov_10,Cov_15,Cov_30 = [],[],[],[]\n",
    "\n",
    "for id, k in enumerate(data.keys()):\n",
    "    ground_truths = data[k]['target']\n",
    "    ts = np.array(data[k]['data'])\n",
    "    n = ground_truths[-1] + 200\n",
    "    start_time = time.time()\n",
    "    res  = w2SampClustering(ts,'MNIST',window,stride,11,w2ConvFilter)\n",
    "    cps = res[2]\n",
    "    f1, AUC_score, false_alarms = f_measure({'0':ground_truths},cps,20)\n",
    "    covering_score = covering({'0':ground_truths},cps,n)\n",
    "    delay = detection_delay(ground_truths,cps)[1]\n",
    "    end_time =time.time()\n",
    "    F1.append(f1)\n",
    "    AUC.append(AUC_score)\n",
    "    Covering.append(covering_score)\n",
    "    DD.append(delay)\n",
    "    FP.append(false_alarms)\n",
    "    RT.append(end_time-start_time)\n",
    "    print(AUC_score,delay,false_alarms)\n",
    "\n",
    "    f1, AUC_score, false_alarms = f_measure({'0':ground_truths},cps,5)\n",
    "    AUC_5.append(AUC_score)\n",
    "    FP_5.append(false_alarms)\n",
    "    f1, AUC_score, false_alarms = f_measure({'0':ground_truths},cps,10)\n",
    "    AUC_10.append(AUC_score)\n",
    "    FP_10.append(false_alarms)\n",
    "    f1, AUC_score, false_alarms = f_measure({'0':ground_truths},cps,15)\n",
    "    AUC_15.append(AUC_score)\n",
    "    FP_15.append(false_alarms)\n",
    "    f1, AUC_score, false_alarms = f_measure({'0':ground_truths},cps,30)\n",
    "    AUC_30.append(AUC_score)\n",
    "    FP_30.append(false_alarms)\n",
    "\n",
    "\n",
    "print(np.mean(AUC))\n",
    "print(np.std(AUC))\n",
    "print(np.mean(Covering))\n",
    "print(np.std(Covering))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2eaf33a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_TS = load_master_data(\"./datasets/has2023_master.csv.zip\")\n",
    "\n",
    "def prepare_data(df_TS,channels_start,id):\n",
    "    initalized = False\n",
    "    for k,name in zip(df_TS.iloc[id,channels_start:],df_TS.columns[channels_start:]):\n",
    "        if np.array(k).shape[0]!=0:\n",
    "            if not initalized:\n",
    "                data = pd.DataFrame(k,columns=[name]).astype('float32')\n",
    "                initalized = True\n",
    "            else:\n",
    "                data[name] = np.array(k).astype(np.float32)\n",
    "    if df_TS.iloc[id].group == 'outdoor':\n",
    "        data = data.iloc[:,:-3]\n",
    "    GroundTruth = df_TS.iloc[id,5]\n",
    "\n",
    "    data = (data - data.mean())/(data.std())\n",
    "    return data,GroundTruth\n",
    "\n",
    "IDs = [10,14,7,182,225,19,185,33,36,87,88,210,11,20,23,243,247,91,95,96,100,141,91,95,245]\n",
    "\n",
    "\n",
    "window = 1000\n",
    "stride = 1\n",
    "\n",
    "\n",
    "METRICS_HASC = {}\n",
    "METRICS_HASC_25 ={}\n",
    "METRICS_HASC_50 ={}\n",
    "METRICS_HASC_150 ={}\n",
    "METRICS_HASC_200 ={}\n",
    "\n",
    "for id in IDs:\n",
    "    #print(id)\n",
    "    METRICS_HASC[id]  = {}\n",
    "    METRICS_HASC_25[id]= {}\n",
    "    METRICS_HASC_50[id] = {}\n",
    "    METRICS_HASC_150[id] = {}\n",
    "    METRICS_HASC_200[id] = {}\n",
    "    df, GroundTruth = prepare_data(df_TS,8,id)\n",
    "    #print(GroundTruth)\n",
    "    if df.shape[0]<=1200:\n",
    "        print('yes')\n",
    "        continue\n",
    "    n_cl = len(GroundTruth) if len(GroundTruth) >0 else 1\n",
    "    try:\n",
    "        res  = w2SampClustering(df.to_numpy(),'HASC',window,stride,n_cl,w2ConvFilter)\n",
    "        print(res)\n",
    "    except:\n",
    "        continue\n",
    "    \n",
    "    cps = res[2]\n",
    "\n",
    "\n",
    "    print('CPD:',cps)\n",
    "    f1, AUC_score, false_alarms = f_measure({'0':GroundTruth},cps,100)\n",
    "    covering_score = covering({'0':GroundTruth},cps,len(df))\n",
    "    delay = detection_delay(GroundTruth,cps)[1]\n",
    "    METRICS_HASC[id]['F1'] =f1\n",
    "    METRICS_HASC[id]['Covering'] = covering_score\n",
    "    METRICS_HASC[id]['AUC'] = AUC_score\n",
    "    METRICS_HASC[id]['DD'] = delay\n",
    "    METRICS_HASC[id]['FP'] = false_alarms\n",
    "\n",
    "    #print(METRICS_HASC)\n",
    "\n",
    "    f1, AUC_score, false_alarms = f_measure({'0':GroundTruth},cps,25)\n",
    "    covering_score = covering({'0':GroundTruth},cps,len(df))\n",
    "    delay = detection_delay(GroundTruth,cps)[1]\n",
    "    METRICS_HASC_25[id]['F1'] =f1\n",
    "    METRICS_HASC_25[id]['Covering'] = covering_score\n",
    "    METRICS_HASC_25[id]['AUC'] = AUC_score\n",
    "    METRICS_HASC_25[id]['DD'] = delay\n",
    "    METRICS_HASC_25[id]['FP'] = false_alarms\n",
    "\n",
    "\n",
    "    f1, AUC_score, false_alarms = f_measure({'0':GroundTruth},cps,50)\n",
    "    covering_score = covering({'0':GroundTruth},cps,len(df))\n",
    "    delay = detection_delay(GroundTruth,cps)[1]\n",
    "    METRICS_HASC_50[id]['F1'] =f1\n",
    "    METRICS_HASC_50[id]['Covering'] = covering_score\n",
    "    METRICS_HASC_50[id]['AUC'] = AUC_score\n",
    "    METRICS_HASC_50[id]['DD'] = delay\n",
    "    METRICS_HASC_50[id]['FP'] = false_alarms\n",
    "\n",
    "\n",
    "    f1, AUC_score, false_alarms = f_measure({'0':GroundTruth},cps,150)\n",
    "    covering_score = covering({'0':GroundTruth},cps,len(df))\n",
    "    delay = detection_delay(GroundTruth,cps)[1]\n",
    "    METRICS_HASC_150[id]['F1'] =f1\n",
    "    METRICS_HASC_150[id]['Covering'] = covering_score\n",
    "    METRICS_HASC_150[id]['AUC'] = AUC_score\n",
    "    METRICS_HASC_150[id]['DD'] = delay\n",
    "    METRICS_HASC_150[id]['FP'] = false_alarms\n",
    "\n",
    "    f1, AUC_score, false_alarms = f_measure({'0':GroundTruth},cps,200)\n",
    "    covering_score = covering({'0':GroundTruth},cps,len(df))\n",
    "    delay = detection_delay(GroundTruth,cps)[1]\n",
    "    METRICS_HASC_200[id]['F1'] =f1\n",
    "    METRICS_HASC_200[id]['Covering'] = covering_score\n",
    "    METRICS_HASC_200[id]['AUC'] = AUC_score\n",
    "    METRICS_HASC_200[id]['DD'] = delay\n",
    "    METRICS_HASC_200[id]['FP'] = false_alarms\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "CodePaper",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
