{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "8657ca12",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import fitz\n",
    "import re\n",
    "import json\n",
    "from datetime import datetime\n",
    "from typing import Optional, List, Callable, Any\n",
    "from abc import abstractmethod, ABC\n",
    "from functools import partial\n",
    "import copy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "69daaa29",
   "metadata": {},
   "outputs": [],
   "source": [
    "#parameter \n",
    "\n",
    "extract_fol = \"extracted\"\n",
    "dataset_locs = [os.path.join(extract_fol,x) for x in os.listdir(extract_fol) if \"extracted_data\" not in x]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5575d5bb",
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_json(data:dict, loc:str) -> None:\n",
    "    with open(loc, \"w\") as f0:\n",
    "        json.dump(data, f0)\n",
    "\n",
    "def load_json(loc:str) -> dict:\n",
    "    with open(loc, \"r\") as f0:\n",
    "        return json.load(f0)\n",
    "    \n",
    "def save_json_with_outputfile(data:dict, file_path:str, output_file:Optional[str] = None) -> None:\n",
    "    if not output_file:\n",
    "        output_file = file_path\n",
    "    save_json(data, output_file)\n",
    "    \n",
    "\n",
    "def add_asset(file_path:str, output_file:Optional[str]=None) -> None:\n",
    "    data = load_json(file_path)\n",
    "\n",
    "    for d in data[\"rule_set\"]:\n",
    "        if  \" - \" in d['name'].strip() and \" – \" in d[\"name\"].strip():\n",
    "            use = {\" - \": d['name'].strip().find(\" - \"), \" – \":d[\"name\"].strip().find(\" – \")}\n",
    "            use_val = min(use, key=use.get)\n",
    "            d[\"asset\"] = d['name'].strip().split(use_val)[0]\n",
    "        elif \" - \" in d['name'].strip():\n",
    "            d[\"asset\"] = d['name'].strip().split(\" - \")[0]\n",
    "        elif \" – \" in d[\"name\"].strip():\n",
    "            d[\"asset\"] = d['name'].strip().split(\" – \")[0]\n",
    "        else:\n",
    "            d[\"asset\"] = None\n",
    "            print(d['name'].strip(), d[\"#n\"])\n",
    "\n",
    "    save_json_with_outputfile(data,file_path, output_file)\n",
    "\n",
    "def sep_observations(file_path:str, output_file:Optional[str]=None) -> None:\n",
    "    data = load_json(file_path)\n",
    "\n",
    "    for d in data[\"rule_set\"]:\n",
    "        if d[\"display_text\"][\"observations\"]:\n",
    "            d[\"display_text\"][\"observations\"] = d[\"display_text\"][\"observations\"].split(\"\\n -\")\n",
    "            d[\"display_text\"][\"observations\"][0] = d[\"display_text\"][\"observations\"][0][2:]\n",
    "            d[\"display_text\"][\"observations\"] = [x.strip().replace(\"\\n\", \"\") for x in d[\"display_text\"][\"observations\"]]\n",
    "\n",
    "    save_json_with_outputfile(data,file_path, output_file)\n",
    "\n",
    "def create_tree_node(file_path:str, output_file:Optional[str]=None) -> None:\n",
    "    \n",
    "\n",
    "def handle_multiple_datasets(locs:List[str], func:Callable[[str, Optional[str]], None], sep_out:Optional[Callable[[str],str]]=False) -> None:\n",
    "    for f in locs:\n",
    "        if sep_out:\n",
    "            #f\"{f.split('.')[0]}.{func.__name__}.{f.split('.')[-1]}\"\n",
    "            func(f, sep_out(f))\n",
    "        else:\n",
    "            func(f, None)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "7e6cfba8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 118\n",
      "Finds periods when a target's point histories do not change by a specified \n",
      "threshold for at least a duration or longer. 0\n",
      "Finds periods when a target's point histories contain two or more values \n",
      "within an interval for at least a duration or longer. 1\n",
      "Finds periods when a target's point histories contain zero values within an \n",
      "interval for at least a duration or longer. 2\n",
      "Finds periods when a target's point histories contain values outside of the \n",
      "acceptable range that could be generated by the system for at least a duration \n",
      "or longer. 3\n",
      "Finds periods when a target's point histories contain values outside of the \n",
      "acceptable range that could be generated by the system for at least a duration \n",
      "or longer. 6\n",
      "Finds periods when a target's point histories do not change by a specified \n",
      "threshold for at least a duration or longer. 77\n",
      "Whoever it was that changed this rule on March 3rd at 1pm Eastern, be glad \n",
      "the logs didn't record your name. Or maybe not, based off what I'm about to \n",
      "say. 78\n",
      "Finds points with the cmd tag that have more than a certain number of peaks \n",
      "& valleys in a 3 hr period. 107\n",
      "Finds periods when a target's point histories contain two or more values \n",
      "within an interval for at least a duration or longer. 108\n",
      "Finds periods when a target's point histories contain zero values within an \n",
      "interval for at least a duration or longer. 109\n",
      "Finds periods when a target's point histories contain zero values within an \n",
      "interval for at least a duration or longer. 110\n",
      "Finds periods when a target's point histories contain values outside of the \n",
      "acceptable range that could be generated by the system for at least a duration \n",
      "or longer. 111\n",
      "KPI for site PUE 135\n",
      "Spark if data has been missing from a meter that is part of a PUE calculation \n",
      "for more than 4 hours. 136\n",
      "Pauls rule to test his latest crackpot idea 137\n",
      "Pauls rule to test his latest crackpot idea 138\n",
      "Spark if equipment has reached equipment maintenance runtime. 139\n",
      "Finds the periods of time for a site where a certain percentage (default: 85%) \n",
      "of the site's total points have flatlined for 4 hours or more. 140\n",
      "TEST rule to determine if a random number from the BMS will work as a \n",
      "flatlined indicator to help prevent false alerts. 141\n",
      "TEST rule to determine if we can have a \"null flatlined\" rule to prevent false \n",
      "alerts if a sites' points are not reporting. This will generally apply to only the \n",
      "DB sites and not Niagara sites 142\n",
      "Finds periods when the target meter's THD is above a threshold. This rule runs \n",
      "on electric meters with points that have the \"thd\" tag. 143\n",
      "Finds periods when a target's point histories do not change by a specified \n",
      "threshold for at least a duration or longer. 156\n",
      "Finds periods when a target's point histories contain zero values within an \n",
      "interval for at least a duration or longer. 157\n",
      "kW min max 158\n",
      "kW/ton min to max 159\n",
      "KPI rule for kW/ton, run on sites. 160\n"
     ]
    }
   ],
   "source": [
    "handle_multiple_datasets(dataset_locs, add_asset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "26943b32",
   "metadata": {},
   "outputs": [],
   "source": [
    "#handle_multiple_datasets(dataset_locs, sep_observations)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ec0aa3a0",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
