# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from typing import Any, Mapping, Optional, Union, List, Iterator
from collections import OrderedDict
import logging
import time
import itertools
import yaml
import os
import shutil
import pathlib
TItems = Union[Mapping, str]
# do not reference common or otherwise we will have circular deps
def _fmt(val:Any)->str:
if isinstance(val, float):
return f'{val:.4g}'
return str(val)
[docs]class OrderedDictLogger:
"""The purpose of the structured logging is to store logs as key value pair. However, when you have loop and sub routine calls, what you need is hierarchical dictionaries where the value for a key could be a dictionary. The idea is that you set one of the nodes in tree as current node and start logging your values. You can then use pushd to create and go to child node and popd to come back to parent. To implement this mechanism we use two main variables: _stack allows us to push each node on stack when pushd is called. The node is OrderedDictionary. As a convinience, we let specify child path in pushd in which case child hierarchy is created and current node will be set to the last node in specified path. When popd is called, we go back to original parent instead of parent of current node. To implement this we use _paths variable which stores subpath when each pushd call was made.
"""
def __init__(self, filepath:Optional[str], logger:Optional[logging.Logger],
save_delay:Optional[float]=30.0, yaml_log=True) -> None:
super().__init__()
self.reset(filepath, logger, save_delay, yaml_log=yaml_log)
[docs] def reset(self, filepath:Optional[str], logger:Optional[logging.Logger],
save_delay:Optional[float]=30.0,
load_existing_file=False, backup_existing_file=True, yaml_log=True) -> None:
self._logger = logger
self._yaml_log = yaml_log
# stack stores dict for each path
# path stores each path created via pushd
self._paths = [['']]
self._save_delay = save_delay
self._call_count = 0
self._last_save = time.time()
self._filepath = filepath
# backup file if already exist
root_od = OrderedDict()
if self._yaml_log and filepath and os.path.exists(filepath):
if load_existing_file:
root_od = yaml.load(self._filepath, Loader=yaml.Loader)
if backup_existing_file:
cur_p = pathlib.Path(filepath)
new_p = cur_p.with_name(cur_p.stem + '.' + str(int(time.time()))
+ cur_p.suffix)
if os.path.exists(str(new_p)):
raise RuntimeError(f'Cannot backup file {filepath} because new name {new_p} already exist')
cur_p.rename(new_p)
self._stack:List[Optional[OrderedDict]] = [root_od]
[docs] def debug(self, dict:TItems, level:Optional[int]=logging.DEBUG, exists_ok=False)->None:
self.info(dict, level, exists_ok)
[docs] def warn(self, dict:TItems, level:Optional[int]=logging.WARN, exists_ok=False)->None:
self.info(dict, level, exists_ok)
[docs] def info(self, dict:TItems, level:Optional[int]=logging.INFO, exists_ok=False)->None:
self._call_count += 1 # provides default key when key is not specified
if isinstance(dict, Mapping): # if logging dict then just update current section
self._update(dict, exists_ok)
msg = ', '.join(f'{k}={_fmt(v)}' for k, v in dict.items())
else:
msg = dict
key = '_warnings' if level==logging.WARN else '_messages'
self._update_key(self._call_count, msg, node=self._root(), path=[key])
if level is not None and self._logger:
self._logger.log(msg=self.path() + ' ' + msg, level=level)
if self._save_delay is not None and \
time.time() - self._last_save > self._save_delay:
self.save()
self._last_save = time.time()
def _root(self)->OrderedDict:
r = self._stack[0]
assert r is not None
return r
def _cur(self)->OrderedDict:
self._ensure_paths()
c = self._stack[-1]
assert c is not None
return c
[docs] def save(self, filepath:Optional[str]=None)->None:
filepath = filepath or self._filepath
if filepath:
with open(filepath, 'w') as f:
yaml.dump(self._root(), f)
[docs] def load(self, filepath:str)->None:
with open(filepath, 'r') as f:
od = yaml.load(f, Loader=yaml.Loader)
self._stack = [od]
[docs] def close(self)->None:
self.save()
if self._logger:
for h in self._logger.handlers:
h.flush()
def _insert(self, dict:Mapping):
self._update(dict, exists_ok=False)
def _update(self, dict:Mapping, exists_ok=True):
for k,v in dict.items():
self._update_key(k, v, exists_ok)
def _update_key(self, key:Any, val:Any, exists_ok=True,
node:Optional[OrderedDict]=None, path:List[str]=[]):
if not self._yaml_log:
return
if not exists_ok and key in self._cur():
raise KeyError(f'Key "{key}" already exists in log at path "{self.path()}" and cannot be updated with value {val} because it already has value "{self._cur()[key]}". Log is being saved at "{self._filepath}".')
node = node if node is not None else self._cur()
for p in path:
if p not in node:
node[p] = OrderedDict()
node = node[p]
node[str(key)] = val
def _ensure_paths(self)->None:
if not self._yaml_log:
return
if self._stack[-1] is not None:
return
last_od = None
for i, (path, od) in enumerate(zip(self._paths, self._stack)):
if od is None: # if corresponding dict is being delayed created
od = last_od
for key in path:
if key not in od:
od[key] = OrderedDict()
if not isinstance(od[key], OrderedDict):
raise RuntimeError(f'The key "{key}" is being used to store scaler value as well as in popd')
od = od[key]
self._stack[i] = od
last_od = od
[docs] def pushd(self, *keys:Any)->'OrderedDictLogger':
if not self._yaml_log:
return self
"""Creates new path as specified by the sequence of the keys"""
self._paths.append([str(k) for k in keys])
self._stack.append(None) # delay create
return self # this allows calling __enter__
[docs] def popd(self):
if not self._yaml_log:
return
if len(self._stack)==1:
raise RuntimeError('There is no child logger, popd() call is invalid')
self._stack.pop()
self._paths.pop()
[docs] def path(self)->str:
if not self._yaml_log:
return '/'
# flatten array of array
return '/'.join(itertools.chain.from_iterable(self._paths[1:]))
def __enter__(self)->'OrderedDictLogger':
return self
def __exit__(self, type, value, traceback):
self.popd()
def __contains__(self, key:Any):
return key in self._cur()
def __len__(self)->int:
return len(self._cur())