# Copyright 2019 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Utility functions used by generate_graph.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import hashlib
import itertools

import numpy as np


def gen_is_edge_fn(bits):
  """Generate a boolean function for the edge connectivity.

  Given a bitstring FEDCBA and a 4x4 matrix, the generated matrix is
    [[0, A, B, D],
     [0, 0, C, E],
     [0, 0, 0, F],
     [0, 0, 0, 0]]

  Note that this function is agnostic to the actual matrix dimension due to
  order in which elements are filled out (column-major, starting from least
  significant bit). For example, the same FEDCBA bitstring (0-padded) on a 5x5
  matrix is
    [[0, A, B, D, 0],
     [0, 0, C, E, 0],
     [0, 0, 0, F, 0],
     [0, 0, 0, 0, 0],
     [0, 0, 0, 0, 0]]

  Args:
    bits: integer which will be interpreted as a bit mask.

  Returns:
    vectorized function that returns True when an edge is present.
  """
  def is_edge(x, y):
    """Is there an edge from x to y (0-indexed)?"""
    if x >= y:
      return 0
    # Map x, y to index into bit string
    index = x + (y * (y - 1) // 2)
    return (bits >> index) % 2 == 1

  return np.vectorize(is_edge)


def is_full_dag(matrix):
  """Full DAG == all vertices on a path from vert 0 to (V-1).

  i.e. no disconnected or "hanging" vertices.

  It is sufficient to check for:
    1) no rows of 0 except for row V-1 (only output vertex has no out-edges)
    2) no cols of 0 except for col 0 (only input vertex has no in-edges)

  Args:
    matrix: V x V upper-triangular adjacency matrix

  Returns:
    True if the there are no dangling vertices.
  """
  shape = np.shape(matrix)

  rows = matrix[:shape[0]-1, :] == 0
  rows = np.all(rows, axis=1)     # Any row with all 0 will be True
  rows_bad = np.any(rows)

  cols = matrix[:, 1:] == 0
  cols = np.all(cols, axis=0)     # Any col with all 0 will be True
  cols_bad = np.any(cols)

  return (not rows_bad) and (not cols_bad)


def num_edges(matrix):
  """Computes number of edges in adjacency matrix."""
  return np.sum(matrix)


def hash_module(matrix, labeling):
  """Computes a graph-invariance MD5 hash of the matrix and label pair.

  Args:
    matrix: np.ndarray square upper-triangular adjacency matrix.
    labeling: list of int labels of length equal to both dimensions of
      matrix.

  Returns:
    MD5 hash of the matrix and labeling.
  """
  vertices = np.shape(matrix)[0]
  in_edges = np.sum(matrix, axis=0).tolist()
  out_edges = np.sum(matrix, axis=1).tolist()

  assert len(in_edges) == len(out_edges) == len(labeling)
  hashes = list(zip(out_edges, in_edges, labeling))
  hashes = [hashlib.md5(str(h).encode('utf-8')).hexdigest() for h in hashes]
  # Computing this up to the diameter is probably sufficient but since the
  # operation is fast, it is okay to repeat more times.
  for _ in range(vertices):
    new_hashes = []
    for v in range(vertices):
      in_neighbors = [hashes[w] for w in range(vertices) if matrix[w, v]]
      out_neighbors = [hashes[w] for w in range(vertices) if matrix[v, w]]
      new_hashes.append(hashlib.md5(
          (''.join(sorted(in_neighbors)) + '|' +
           ''.join(sorted(out_neighbors)) + '|' +
           hashes[v]).encode('utf-8')).hexdigest())
    hashes = new_hashes
  fingerprint = hashlib.md5(str(sorted(hashes)).encode('utf-8')).hexdigest()

  return fingerprint


def permute_graph(graph, label, permutation):
  """Permutes the graph and labels based on permutation.

  Args:
    graph: np.ndarray adjacency matrix.
    label: list of labels of same length as graph dimensions.
    permutation: a permutation list of ints of same length as graph dimensions.

  Returns:
    np.ndarray where vertex permutation[v] is vertex v from the original graph
  """
  # vertex permutation[v] in new graph is vertex v in the old graph
  forward_perm = zip(permutation, list(range(len(permutation))))
  inverse_perm = [x[1] for x in sorted(forward_perm)]
  edge_fn = lambda x, y: graph[inverse_perm[x], inverse_perm[y]] == 1
  new_matrix = np.fromfunction(np.vectorize(edge_fn),
                               (len(label), len(label)),
                               dtype=np.int8)
  new_label = [label[inverse_perm[i]] for i in range(len(label))]
  return new_matrix, new_label


def is_isomorphic(graph1, graph2):
  """Exhaustively checks if 2 graphs are isomorphic."""
  matrix1, label1 = np.array(graph1[0]), graph1[1]
  matrix2, label2 = np.array(graph2[0]), graph2[1]
  assert np.shape(matrix1) == np.shape(matrix2)
  assert len(label1) == len(label2)

  vertices = np.shape(matrix1)[0]
  # Note: input and output in our constrained graphs always map to themselves
  # but this script does not enforce that.
  for perm in itertools.permutations(range(0, vertices)):
    pmatrix1, plabel1 = permute_graph(matrix1, label1, perm)
    if np.array_equal(pmatrix1, matrix2) and plabel1 == label2:
      return True

  return False
