import functools

import jax
from jax import nn as jnn
from jax import numpy as jnp
from jax import random as jrandom

from neural_networks_chomsky_hierarchy.tasks import task

class RightDeterministic(task.GeneralizationTask):
  """{b,d}*a{a,c,d}*

  Examples:
    A -> class 0
    BCD, ABCD -> class 1

  Note the sampling is jittable so this task is fast.
  """

  def __init__(self, *args, **kwargs):
    """Initializes the remember_string task.

    Args:
      vocab_size: The size of the alphabet.
      *args: Args for the base task class.
      **kwargs: Kwargs for the base task class.
    """
    super().__init__(*args, **kwargs)
    self._vocab_size = 4

  @functools.partial(jax.jit, static_argnums=(0, 2, 3))
  def sample_batch(self, rng: jnp.ndarray, batch_size: int,
                   length: int) -> task.Batch:
    """Returns a batch of strings and the expected class."""
    strings = jrandom.randint(
        rng,
        shape=(batch_size, length),
        minval=0,
        maxval=self._vocab_size,
    )

    reserve1 = batch_size // 3
    reserve2 = reserve1 + (batch_size - reserve1) // 2

    max_a_masks = jnp.cumsum((strings == 0)[:, ::-1], axis=1)[:, ::-1].astype(bool)
    masks_b = (strings == 1) & max_a_masks
    toggle_one_b = jnn.one_hot(length - masks_b[:,::-1].argmax(axis=1) - 1, length).astype(bool)
    masks_b = masks_b.at[reserve1:reserve2].set((masks_b & ~ toggle_one_b)[reserve1:reserve2])

    masks_c = (strings == 2) & ~ max_a_masks
    toggle_one_c = jnn.one_hot(masks_c.argmax(axis=1), length).astype(bool)
    masks_c = masks_c.at[reserve2:batch_size].set((masks_c & ~ toggle_one_c)[reserve2:batch_size])

    jit_delete = jax.jit(jnp.delete, static_argnames=['assume_unique_indices'])
    strings = jnp.where(masks_b, jrandom.choice(rng, jit_delete(jnp.arange(self._vocab_size), jnp.array([1]), assume_unique_indices=True), shape=(batch_size, length)), strings)
    strings = jnp.where(masks_c, jrandom.choice(rng, jit_delete(jnp.arange(self._vocab_size), jnp.array([0, 2]), assume_unique_indices=True), shape=(batch_size, length)), strings)

    one_hot_strings = jnn.one_hot(strings, num_classes=self._vocab_size)

    max_a_masks = jnp.cumsum((strings == 0)[:, ::-1], axis=1)[:, ::-1].astype(bool)
    masks_b = (strings == 1) & max_a_masks
    masks_c = (strings == 2) & ~ max_a_masks
    labels = jnp.any(max_a_masks, axis=1) & ~ jnp.any(masks_b, axis=1) & ~ jnp.any(masks_c, axis=1)

    labels = labels.astype(jnp.float32)
    labels = jnn.one_hot(labels, num_classes=2)
    return {
        'input': one_hot_strings,
        'output': labels,
    }

  @property
  def input_size(self) -> int:
    """Returns the input size for the models."""
    return self._vocab_size

  @property
  def output_size(self) -> int:
    """Returns the output size for the models."""
    return 2