{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 227,
   "id": "a67adbf9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import fitz\n",
    "import re\n",
    "import json\n",
    "from datetime import datetime\n",
    "from typing import Optional, List, Callable, Any, Union, Dict, Hashable\n",
    "from abc import abstractmethod, ABC\n",
    "import numpy as np\n",
    "import math\n",
    "import pandas as pd\n",
    "import copy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 228,
   "id": "f21bfb15",
   "metadata": {},
   "outputs": [],
   "source": [
    "#parameter \n",
    "\n",
    "extract_fol = \"extracted\"\n",
    "data_fol = \"data\"\n",
    "dataset_loc = os.path.join(extract_fol,\"observations.rules.json\")\n",
    "alert_event_loc = os.path.join(data_fol, \"alert_events.csv\")\n",
    "equipment_profile_loc = os.path.join(data_fol, \"equipment_profile.csv\")\n",
    "save_equipment_info_fol = \"extracted/equipment_info\"\n",
    "\n",
    "os.makedirs(save_equipment_info_fol, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 229,
   "id": "a224b3a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(dataset_loc, \"r\") as f0:\n",
    "    data = json.load(f0)\n",
    "\n",
    "def text_write(text, filename= \"debug.test\"):\n",
    "    with open(filename, \"w\") as f0:\n",
    "        f0.write(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 230,
   "id": "232b234b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>asset</th>\n",
       "      <th>#instances</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AHU</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Chiller</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>CRAC</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>VAV</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Boiler</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Pump</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Cooling Tower</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>HXU</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Air Compressor</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Meter</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>PDU</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>PIU</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Plate &amp; Frame</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>None</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Lighting</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Fan</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AHU Humidity</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>UPS</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             asset  #instances\n",
       "1              AHU          55\n",
       "5          Chiller          11\n",
       "4             CRAC          10\n",
       "15             VAV           8\n",
       "3           Boiler           6\n",
       "14            Pump           5\n",
       "6    Cooling Tower           4\n",
       "8              HXU           4\n",
       "0   Air Compressor           3\n",
       "11           Meter           3\n",
       "12             PDU           3\n",
       "16             PIU           2\n",
       "13   Plate & Frame           1\n",
       "9             None           1\n",
       "10        Lighting           1\n",
       "7              Fan           1\n",
       "2     AHU Humidity           1\n",
       "17             UPS           1"
      ]
     },
     "execution_count": 230,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "asset_list = {}\n",
    "rule_ids = set()\n",
    "for d in data[\"rule_set\"]:\n",
    "    asset = d[\"asset\"]\n",
    "    rule_ids.add(d[\"id\"].strip())\n",
    "    if asset is None:\n",
    "        asset = \"None\"\n",
    "    try:\n",
    "        asset_list[asset] += 1\n",
    "    except KeyError:\n",
    "        asset_list[asset] = 1\n",
    "        \n",
    "stats = pd.DataFrame.from_dict(asset_list, orient=\"index\").reset_index()\n",
    "stats.columns = columns=[\"asset\",\"#instances\"]\n",
    "stats.sort_values(\"#instances\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "id": "e21f1875",
   "metadata": {},
   "outputs": [],
   "source": [
    "alert_events = pd.read_csv(alert_event_loc)\n",
    "equipment_profile = pd.read_csv(equipment_profile_loc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "id": "f75f55c3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Intersection of rules:{'CT00003', 'CT00005', 'CR00012', 'CR00007', 'CR00011', 'CR00008', 'CR00002'}\n",
      "Num of rules without equipments:113\n"
     ]
    }
   ],
   "source": [
    "#only several rules have equipment mapping.\n",
    "print(\"Intersection of rules:{}\".format(set(alert_events[\"rule_id\"].to_list()).intersection(rule_ids)))\n",
    "print(\"Num of rules without equipments:{}\".format(len(rule_ids-set(alert_events[\"rule_id\"].to_list()))))\n",
    "\n",
    "\n",
    "text_write(\"\\n\".join(rule_ids-set(alert_events[\"rule_id\"].to_list())), os.path.join(save_equipment_info_fol, \"rules_without_equipment_info.txt\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 233,
   "id": "181afd90",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>equipment_id</th>\n",
       "      <th>equipment_name</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>rule_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>CR00008</th>\n",
       "      <td>617</td>\n",
       "      <td>617</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CT00003</th>\n",
       "      <td>528</td>\n",
       "      <td>528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CR00012</th>\n",
       "      <td>362</td>\n",
       "      <td>362</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CR00007</th>\n",
       "      <td>255</td>\n",
       "      <td>255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CR00011</th>\n",
       "      <td>127</td>\n",
       "      <td>127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CR00002</th>\n",
       "      <td>105</td>\n",
       "      <td>105</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CT00005</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         equipment_id  equipment_name\n",
       "rule_id                              \n",
       "CR00008           617             617\n",
       "CT00003           528             528\n",
       "CR00012           362             362\n",
       "CR00007           255             255\n",
       "CR00011           127             127\n",
       "CR00002           105             105\n",
       "CT00005             1               1"
      ]
     },
     "execution_count": 233,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count_rule_applied_equipment = alert_events[\n",
    "    [\"equipment_id\", \"equipment_name\", \"rule_id\"]\n",
    "    ].groupby(\"rule_id\").count().sort_values([\"equipment_id\"], ascending=False)\n",
    "\n",
    "count_rule_applied_equipment\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 234,
   "id": "22627f41",
   "metadata": {},
   "outputs": [],
   "source": [
    "equipment_info_to_retrieve = pd.DataFrame()#index=count_rule_applied_equipment.index\n",
    "rule_id_group = alert_events[[\"equipment_id\", \"equipment_name\", \"rule_id\"]].groupby(\"rule_id\")\n",
    "equipment_info_to_retrieve[\"rule_id\"] = rule_id_group[\"rule_id\"].transform(lambda x: \"||\".join(set(x.to_list())))\n",
    "equipment_info_to_retrieve[\"equipment_ids\"] = rule_id_group[\"equipment_id\"].transform(lambda x: \"||\".join(set(x.to_list())))\n",
    "equipment_info_to_retrieve[\"equipment_names\"] = rule_id_group[\"equipment_name\"].transform(lambda x: \"||\".join(set(x.to_list())))\n",
    "equipment_info_to_retrieve.drop_duplicates(inplace=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 235,
   "id": "3b3915a3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rule_id</th>\n",
       "      <th>equipment_ids</th>\n",
       "      <th>equipment_names</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>CT00003</td>\n",
       "      <td>B027 Cooling Towers</td>\n",
       "      <td>B027 Cooling Towers</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>384</th>\n",
       "      <td>CT00005</td>\n",
       "      <td>B027 Cooling Towers</td>\n",
       "      <td>B027 Cooling Towers</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>529</th>\n",
       "      <td>CR00011</td>\n",
       "      <td>CU02701||CU02703||CU02704||CU02006</td>\n",
       "      <td>Chiller 1||Chiller 6||Chiller 3||Chiller 4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>542</th>\n",
       "      <td>CR00008</td>\n",
       "      <td>CU02010||CU02009||CU02702||CU02704||CU02012||C...</td>\n",
       "      <td>Chiller 10||Chiller 12||Chiller 6||Chiller 9||...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>543</th>\n",
       "      <td>CR00007</td>\n",
       "      <td>CU02009||CU02704||CU02701||CU02703||CU02006</td>\n",
       "      <td>Chiller 3||Chiller 6||Chiller 1||Chiller 9||Ch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>544</th>\n",
       "      <td>CR00002</td>\n",
       "      <td>CU02704||CU02012||CU02701||CU02007||CU02703||C...</td>\n",
       "      <td>Chiller 12||Chiller 7||Chiller 3||Chiller 6||C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>657</th>\n",
       "      <td>CR00012</td>\n",
       "      <td>CU02013||CU02010||CU02009||CU02702||CU02704||C...</td>\n",
       "      <td>Chiller 2||Chiller 10||Chiller 12||Chiller 7||...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     rule_id                                      equipment_ids  \\\n",
       "0    CT00003                                B027 Cooling Towers   \n",
       "384  CT00005                                B027 Cooling Towers   \n",
       "529  CR00011                 CU02701||CU02703||CU02704||CU02006   \n",
       "542  CR00008  CU02010||CU02009||CU02702||CU02704||CU02012||C...   \n",
       "543  CR00007        CU02009||CU02704||CU02701||CU02703||CU02006   \n",
       "544  CR00002  CU02704||CU02012||CU02701||CU02007||CU02703||C...   \n",
       "657  CR00012  CU02013||CU02010||CU02009||CU02702||CU02704||C...   \n",
       "\n",
       "                                       equipment_names  \n",
       "0                                  B027 Cooling Towers  \n",
       "384                                B027 Cooling Towers  \n",
       "529         Chiller 1||Chiller 6||Chiller 3||Chiller 4  \n",
       "542  Chiller 10||Chiller 12||Chiller 6||Chiller 9||...  \n",
       "543  Chiller 3||Chiller 6||Chiller 1||Chiller 9||Ch...  \n",
       "544  Chiller 12||Chiller 7||Chiller 3||Chiller 6||C...  \n",
       "657  Chiller 2||Chiller 10||Chiller 12||Chiller 7||...  "
      ]
     },
     "execution_count": 235,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "equipment_info_to_retrieve"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 236,
   "id": "255eac90",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Rule: CT00003 | #equipment_profiles: 0\n",
      "Rule: CT00005 | #equipment_profiles: 0\n",
      "Rule: CR00011 | #equipment_profiles: 4\n",
      "Rule: CR00008 | #equipment_profiles: 8\n",
      "Rule: CR00007 | #equipment_profiles: 5\n",
      "Rule: CR00002 | #equipment_profiles: 6\n",
      "Rule: CR00012 | #equipment_profiles: 11\n"
     ]
    }
   ],
   "source": [
    "equipment_info_of_rule_ids = {}\n",
    "for _, rule in equipment_info_to_retrieve.to_dict('index').items():\n",
    "    _temp = {}\n",
    "    equipments_per_rule = equipment_profile[equipment_profile[\"equipment_id\"].isin(rule[\"equipment_ids\"].split(\"||\"))]\n",
    "    \n",
    "    _temp[\"rule_id\"] = rule[\"rule_id\"]\n",
    "    _temp[\"equipment_profiles\"] = [v for _,v in equipments_per_rule.to_dict('index').items()]\n",
    "\n",
    "    print(\"Rule: {} | #equipment_profiles: {}\".format(\n",
    "        _temp [\"rule_id\"],\n",
    "        len(_temp[\"equipment_profiles\"])\n",
    "        ))\n",
    "    \n",
    "    equipment_info_of_rule_ids[rule[\"rule_id\"]] = _temp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 237,
   "id": "7be8faea",
   "metadata": {},
   "outputs": [],
   "source": [
    "def replace_list(d:Dict[str,Any]) -> Dict[str,Any]:\n",
    "    _temp = {}\n",
    "    for k,v in d.items():\n",
    "        if isinstance(v, dict):\n",
    "            _temp[k] = replace_list(v)\n",
    "        else:\n",
    "            _temp[k] = []\n",
    "\n",
    "    return _temp\n",
    "\n",
    "def remove_nan_in_list(d:Dict[str,Any]) -> Dict[str,Any]:\n",
    "    _temp = copy.deepcopy(d)\n",
    "    for k,v in d.items():\n",
    "        if isinstance(v, dict):\n",
    "            _temp[k] = remove_nan_in_list(v)\n",
    "        else:\n",
    "            #print(k,_temp[k])\n",
    "            #print([str(x) for x in _temp[k]])\n",
    "            _temp[k] = [x for x in _temp[k] if not str(x) in ['NaN', 'nan']]\n",
    "\n",
    "    return _temp\n",
    "\n",
    "def unique_elements(d:Dict[str,Any]) -> Dict[str,Any]:\n",
    "    _temp = copy.deepcopy(d)\n",
    "    for k,v in d.items():\n",
    "        if isinstance(v, dict):\n",
    "            _temp[k] = remove_nan_in_list(v)\n",
    "        else:\n",
    "            _temp[k] = list(set(_temp[k]))\n",
    "\n",
    "    return _temp\n",
    "\n",
    "def concat_dict(d:List[Dict[str,Any]], template:Dict[str,Any]) -> None:\n",
    "\n",
    "    for k,v in template.items():\n",
    "        if isinstance(v, dict):\n",
    "            #print(k,[x[k] for x in d], template[k] )\n",
    "            concat_dict([x[k] for x in d], template[k])\n",
    "        else:\n",
    "            #print(k,d)\n",
    "            template[k] = [x[k] for x in d]\n",
    "            \n",
    "def save_json(data:Dict[Hashable,Any], loc:str) -> None:\n",
    "    with open(loc, \"w\") as f0:\n",
    "        json.dump(data, f0)\n",
    "\n",
    "def aggregate_profile(profiles:List[dict]) -> dict:\n",
    "    \n",
    "    if len(profiles)<1:\n",
    "        return {}\n",
    "    \n",
    "    if len(profiles)<2:\n",
    "        return profiles[0]\n",
    "\n",
    "    _temp = copy.deepcopy(profiles[0])\n",
    "    _temp = replace_list(_temp)\n",
    "\n",
    "    #aggregate values to list\n",
    "    concat_dict(profiles,_temp)\n",
    "    _temp = remove_nan_in_list(_temp)\n",
    "    _temp = unique_elements(_temp)\n",
    "    \n",
    "\n",
    "    return _temp\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 238,
   "id": "0ffe39bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "rule_to_equipment_profile = {}\n",
    "for k,v in equipment_info_of_rule_ids.items():\n",
    "    check = aggregate_profile(v['equipment_profiles'])\n",
    "    rule_to_equipment_profile[k] = check\n",
    "\n",
    "save_json(rule_to_equipment_profile, os.path.join(save_equipment_info_fol,\"rule_to_equipment_map.json\"))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51821e85",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 239,
   "id": "9187c448",
   "metadata": {},
   "outputs": [],
   "source": [
    "gre_asset_description = pd.read_csv(os.path.join(data_fol, \"GRE_Asset_Description.csv\"))\n",
    "gre_asset_description.set_index(['Asset Type Name'], inplace=True)\n",
    "\n",
    "gre_asset_description.columns = ['desc', 'detailed_desc']\n",
    "gre_asset_description = gre_asset_description.to_dict(orient=\"index\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "id": "da6ec6d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "save_json(gre_asset_description, os.path.join(save_equipment_info_fol, \"gre_desc.json\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
