# coding=utf-8
# Copyright 2022 The Mixed Fl Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for data_utils."""

import collections
from typing import Any, Dict

import tensorflow as tf

from mixed_fl.experiments.celeba import data_utils

BATCH_SIZE = 3


def _get_synthetic_data_dict(num_images = BATCH_SIZE):
  """Returns a dictionary suitable for `tf.data.Dataset.from_tensor_slices`.

  Args:
    num_images: Number of images per client.

  Returns:
    A dictionary with an `image` field that matches the image format of CelebA
    data, and a `smiling` example attribute field.
  """
  images = tf.random.uniform(
      shape=[
          num_images, 84, 84, 3
      ],
      maxval=256,
      minval=0,
      dtype=tf.int64)
  smiling = [True] * num_images
  return collections.OrderedDict(image=tf.cast(images, dtype=tf.uint8),
                                 smiling=smiling)


def _get_example_client_dataset():
  return tf.data.Dataset.from_tensor_slices(_get_synthetic_data_dict())


class DataUtilsTest(tf.test.TestCase):

  def test_preprocess_img_dataset(self):
    raw_images_ds = _get_example_client_dataset()
    standard_images_ds = data_utils.preprocess_img_dataset(
        raw_images_ds, label_attribute='smiling', batch_size=BATCH_SIZE)
    for image_batch, label_batch in iter(standard_images_ds):
      for image in image_batch:
        self.assertAllGreaterEqual(image, -1.0)
        self.assertAllLessEqual(image, 1.0)
        self.assertEqual(image.shape, [84, 84, 3])
      for label in label_batch:
        self.assertEqual(label, 1.0)

  def test_preprocess_img_dataset_batches_correctly(self):
    raw_images_ds = _get_example_client_dataset()
    standard_images_ds = data_utils.preprocess_img_dataset(
        raw_images_ds, label_attribute='smiling', batch_size=BATCH_SIZE)
    image_batch, label_batch = next(iter(standard_images_ds))
    self.assertEqual(BATCH_SIZE, image_batch.shape[0])
    self.assertEqual(BATCH_SIZE, label_batch.shape[0])

  def test_preprocess_img_dataset_with_bad_label_attribute_raises_error(self):
    raw_images_ds = _get_example_client_dataset()
    with self.assertRaises(ValueError):
      _ = data_utils.preprocess_img_dataset(
          raw_images_ds, label_attribute='foo', batch_size=BATCH_SIZE)


if __name__ == '__main__':
  tf.test.main()
