# Copyright 2023 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Wrappers that take care of casting."""

import chex
import contextlib
import haiku as hk
import jax
import jax.numpy as jnp
import numpy as np
import xarray

from typing import Any, Mapping, Tuple

from . import predictor_base

PyTree = Any


class Bfloat16Cast(predictor_base.Predictor):
    """Wrapper that casts all inputs to bfloat16 and outputs to targets dtype."""

    def __init__(self, predictor: predictor_base.Predictor, enabled: bool = True):
        """Inits the wrapper.

        Args:
          predictor: predictor being wrapped.
          enabled: disables the wrapper if False, for simpler hyperparameter scans.

        """
        self._enabled = enabled
        self._predictor = predictor

    def __call__(
        self,
        inputs: xarray.Dataset,
        targets_template: xarray.Dataset,
        forcings: xarray.Dataset,
        **kwargs,
    ) -> xarray.Dataset:
        if not self._enabled:
            return self._predictor(inputs, targets_template, forcings, **kwargs)

        with bfloat16_variable_view():
            predictions = self._predictor(
                *_all_inputs_to_bfloat16(inputs, targets_template, forcings),
                **kwargs,
            )

        predictions_dtype = infer_floating_dtype(predictions)  # pytype: disable=wrong-arg-types
        if predictions_dtype != jnp.bfloat16:
            raise ValueError(f"Expected bfloat16 output, got {predictions_dtype}")

        targets_dtype = infer_floating_dtype(targets_template)  # pytype: disable=wrong-arg-types
        return tree_map_cast(predictions, input_dtype=jnp.bfloat16, output_dtype=targets_dtype)

    def loss(
        self,
        inputs: xarray.Dataset,
        targets: xarray.Dataset,
        forcings: xarray.Dataset,
        **kwargs,
    ) -> predictor_base.LossAndDiagnostics:
        if not self._enabled:
            return self._predictor.loss(inputs, targets, forcings, **kwargs)

        with bfloat16_variable_view():
            loss, scalars = self._predictor.loss(
                *_all_inputs_to_bfloat16(inputs, targets, forcings), **kwargs
            )

        if loss.dtype != jnp.bfloat16:
            raise ValueError(f"Expected bfloat16 loss, got {loss.dtype}")

        targets_dtype = infer_floating_dtype(targets)  # pytype: disable=wrong-arg-types

        # Note that casting back the loss to e.g. float32 should not affect data
        # types of the backwards pass, because the first thing the backwards pass
        # should do is to go backwards the casting op and cast back to bfloat16
        # (and xprofs seem to confirm this).
        return tree_map_cast((loss, scalars), input_dtype=jnp.bfloat16, output_dtype=targets_dtype)

    def loss_and_predictions(  # pytype: disable=signature-mismatch  # jax-ndarray
        self,
        inputs: xarray.Dataset,
        targets: xarray.Dataset,
        forcings: xarray.Dataset,
        **kwargs,
    ) -> Tuple[predictor_base.LossAndDiagnostics, xarray.Dataset]:
        if not self._enabled:
            return self._predictor.loss_and_predictions(
                inputs,
                targets,
                forcings,  # pytype: disable=bad-return-type  # jax-ndarray
                **kwargs,
            )

        with bfloat16_variable_view():
            (loss, scalars), predictions = self._predictor.loss_and_predictions(
                *_all_inputs_to_bfloat16(inputs, targets, forcings), **kwargs
            )

        if loss.dtype != jnp.bfloat16:
            raise ValueError(f"Expected bfloat16 loss, got {loss.dtype}")

        predictions_dtype = infer_floating_dtype(predictions)  # pytype: disable=wrong-arg-types
        if predictions_dtype != jnp.bfloat16:
            raise ValueError(f"Expected bfloat16 output, got {predictions_dtype}")

        targets_dtype = infer_floating_dtype(targets)  # pytype: disable=wrong-arg-types
        return tree_map_cast(
            ((loss, scalars), predictions), input_dtype=jnp.bfloat16, output_dtype=targets_dtype
        )


def infer_floating_dtype(data_vars: Mapping[str, chex.Array]) -> np.dtype:
    """Infers a floating dtype from an input mapping of data."""
    dtypes = {v.dtype for k, v in data_vars.items() if jnp.issubdtype(v.dtype, np.floating)}
    if len(dtypes) != 1:
        dtypes_and_shapes = {
            k: (v.dtype, v.shape)
            for k, v in data_vars.items()
            if jnp.issubdtype(v.dtype, np.floating)
        }
        raise ValueError(
            f"Did not found exactly one floating dtype {dtypes} in input variables:"
            f"{dtypes_and_shapes}"
        )
    return list(dtypes)[0]


def _all_inputs_to_bfloat16(
    inputs: xarray.Dataset,
    targets: xarray.Dataset,
    forcings: xarray.Dataset,
) -> Tuple[xarray.Dataset, xarray.Dataset, xarray.Dataset]:
    return (
        inputs.astype(jnp.bfloat16),
        jax.tree.map(lambda x: x.astype(jnp.bfloat16), targets),
        forcings.astype(jnp.bfloat16),
    )


def tree_map_cast(
    inputs: PyTree,
    input_dtype: np.dtype,
    output_dtype: np.dtype,
) -> PyTree:
    def cast_fn(x):
        if x.dtype == input_dtype:
            return x.astype(output_dtype)

    return jax.tree.map(cast_fn, inputs)


@contextlib.contextmanager
def bfloat16_variable_view(enabled: bool = True):
    """Context for Haiku modules with float32 params, but bfloat16 activations.

    It works as follows:
    * Every time a variable is requested to be created/set as np.bfloat16,
      it will create an underlying float32 variable, instead.
    * Every time a variable a variable is requested as bfloat16, it will check the
      variable is of float32 type, and cast the variable to bfloat16.

    Note the gradients are still computed and accumulated as float32, because
    the params returned by init are float32, so the gradient function with
    respect to the params will already include an implicit casting to float32.

    Args:
      enabled: Only enables bfloat16 behavior if True.

    Yields:
      None
    """

    if enabled:
        with (
            hk.custom_creator(_bfloat16_creator, state=True),
            hk.custom_getter(_bfloat16_getter, state=True),
            hk.custom_setter(_bfloat16_setter),
        ):
            yield
    else:
        yield


def _bfloat16_creator(next_creator, shape, dtype, init, context):
    """Creates float32 variables when bfloat16 is requested."""
    if context.original_dtype == jnp.bfloat16:
        dtype = jnp.float32
    return next_creator(shape, dtype, init)


def _bfloat16_getter(next_getter, value, context):
    """Casts float32 to bfloat16 when bfloat16 was originally requested."""
    if context.original_dtype == jnp.bfloat16:
        assert value.dtype == jnp.float32
        value = value.astype(jnp.bfloat16)
    return next_getter(value)


def _bfloat16_setter(next_setter, value, context):
    """Casts bfloat16 to float32 when bfloat16 was originally set."""
    if context.original_dtype == jnp.bfloat16:
        value = value.astype(jnp.float32)
    return next_setter(value)
