"""Test functions for 1D array set operations.

"""
import pytest

import numpy as np
from numpy import ediff1d, intersect1d, isin, setdiff1d, setxor1d, union1d, unique
from numpy.dtypes import StringDType
from numpy.exceptions import AxisError
from numpy.testing import (
    assert_array_equal,
    assert_equal,
    assert_raises,
    assert_raises_regex,
)


class TestSetOps:

    def test_intersect1d(self):
        # unique inputs
        a = np.array([5, 7, 1, 2])
        b = np.array([2, 4, 3, 1, 5])

        ec = np.array([1, 2, 5])
        c = intersect1d(a, b, assume_unique=True)
        assert_array_equal(c, ec)

        # non-unique inputs
        a = np.array([5, 5, 7, 1, 2])
        b = np.array([2, 1, 4, 3, 3, 1, 5])

        ed = np.array([1, 2, 5])
        c = intersect1d(a, b)
        assert_array_equal(c, ed)
        assert_array_equal([], intersect1d([], []))

    def test_intersect1d_array_like(self):
        # See gh-11772
        class Test:
            def __array__(self, dtype=None, copy=None):
                return np.arange(3)

        a = Test()
        res = intersect1d(a, a)
        assert_array_equal(res, a)
        res = intersect1d([1, 2, 3], [1, 2, 3])
        assert_array_equal(res, [1, 2, 3])

    def test_intersect1d_indices(self):
        # unique inputs
        a = np.array([1, 2, 3, 4])
        b = np.array([2, 1, 4, 6])
        c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
        ee = np.array([1, 2, 4])
        assert_array_equal(c, ee)
        assert_array_equal(a[i1], ee)
        assert_array_equal(b[i2], ee)

        # non-unique inputs
        a = np.array([1, 2, 2, 3, 4, 3, 2])
        b = np.array([1, 8, 4, 2, 2, 3, 2, 3])
        c, i1, i2 = intersect1d(a, b, return_indices=True)
        ef = np.array([1, 2, 3, 4])
        assert_array_equal(c, ef)
        assert_array_equal(a[i1], ef)
        assert_array_equal(b[i2], ef)

        # non1d, unique inputs
        a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]])
        b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]])
        c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
        ui1 = np.unravel_index(i1, a.shape)
        ui2 = np.unravel_index(i2, b.shape)
        ea = np.array([2, 6, 7, 8])
        assert_array_equal(ea, a[ui1])
        assert_array_equal(ea, b[ui2])

        # non1d, not assumed to be uniqueinputs
        a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]])
        b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]])
        c, i1, i2 = intersect1d(a, b, return_indices=True)
        ui1 = np.unravel_index(i1, a.shape)
        ui2 = np.unravel_index(i2, b.shape)
        ea = np.array([2, 7, 8])
        assert_array_equal(ea, a[ui1])
        assert_array_equal(ea, b[ui2])

    def test_setxor1d(self):
        a = np.array([5, 7, 1, 2])
        b = np.array([2, 4, 3, 1, 5])

        ec = np.array([3, 4, 7])
        c = setxor1d(a, b)
        assert_array_equal(c, ec)

        a = np.array([1, 2, 3])
        b = np.array([6, 5, 4])

        ec = np.array([1, 2, 3, 4, 5, 6])
        c = setxor1d(a, b)
        assert_array_equal(c, ec)

        a = np.array([1, 8, 2, 3])
        b = np.array([6, 5, 4, 8])

        ec = np.array([1, 2, 3, 4, 5, 6])
        c = setxor1d(a, b)
        assert_array_equal(c, ec)

        assert_array_equal([], setxor1d([], []))

    def test_setxor1d_unique(self):
        a = np.array([1, 8, 2, 3])
        b = np.array([6, 5, 4, 8])

        ec = np.array([1, 2, 3, 4, 5, 6])
        c = setxor1d(a, b, assume_unique=True)
        assert_array_equal(c, ec)

        a = np.array([[1], [8], [2], [3]])
        b = np.array([[6, 5], [4, 8]])

        ec = np.array([1, 2, 3, 4, 5, 6])
        c = setxor1d(a, b, assume_unique=True)
        assert_array_equal(c, ec)

    def test_ediff1d(self):
        zero_elem = np.array([])
        one_elem = np.array([1])
        two_elem = np.array([1, 2])

        assert_array_equal([], ediff1d(zero_elem))
        assert_array_equal([0], ediff1d(zero_elem, to_begin=0))
        assert_array_equal([0], ediff1d(zero_elem, to_end=0))
        assert_array_equal([-1, 0], ediff1d(zero_elem, to_begin=-1, to_end=0))
        assert_array_equal([], ediff1d(one_elem))
        assert_array_equal([1], ediff1d(two_elem))
        assert_array_equal([7, 1, 9], ediff1d(two_elem, to_begin=7, to_end=9))
        assert_array_equal([5, 6, 1, 7, 8],
                           ediff1d(two_elem, to_begin=[5, 6], to_end=[7, 8]))
        assert_array_equal([1, 9], ediff1d(two_elem, to_end=9))
        assert_array_equal([1, 7, 8], ediff1d(two_elem, to_end=[7, 8]))
        assert_array_equal([7, 1], ediff1d(two_elem, to_begin=7))
        assert_array_equal([5, 6, 1], ediff1d(two_elem, to_begin=[5, 6]))

    @pytest.mark.parametrize("ary, prepend, append, expected", [
        # should fail because trying to cast
        # np.nan standard floating point value
        # into an integer array:
        (np.array([1, 2, 3], dtype=np.int64),
         None,
         np.nan,
         'to_end'),
        # should fail because attempting
        # to downcast to int type:
        (np.array([1, 2, 3], dtype=np.int64),
         np.array([5, 7, 2], dtype=np.float32),
         None,
         'to_begin'),
        # should fail because attempting to cast
        # two special floating point values
        # to integers (on both sides of ary),
        # `to_begin` is in the error message as the impl checks this first:
        (np.array([1., 3., 9.], dtype=np.int8),
         np.nan,
         np.nan,
         'to_begin'),
         ])
    def test_ediff1d_forbidden_type_casts(self, ary, prepend, append, expected):
        # verify resolution of gh-11490

        # specifically, raise an appropriate
        # Exception when attempting to append or
        # prepend with an incompatible type
        msg = f'dtype of `{expected}` must be compatible'
        with assert_raises_regex(TypeError, msg):
            ediff1d(ary=ary,
                    to_end=append,
                    to_begin=prepend)

    @pytest.mark.parametrize(
        "ary,prepend,append,expected",
        [
         (np.array([1, 2, 3], dtype=np.int16),
          2**16,  # will be cast to int16 under same kind rule.
          2**16 + 4,
          np.array([0, 1, 1, 4], dtype=np.int16)),
         (np.array([1, 2, 3], dtype=np.float32),
          np.array([5], dtype=np.float64),
          None,
          np.array([5, 1, 1], dtype=np.float32)),
         (np.array([1, 2, 3], dtype=np.int32),
          0,
          0,
          np.array([0, 1, 1, 0], dtype=np.int32)),
         (np.array([1, 2, 3], dtype=np.int64),
          3,
          -9,
          np.array([3, 1, 1, -9], dtype=np.int64)),
        ]
    )
    def test_ediff1d_scalar_handling(self,
                                     ary,
                                     prepend,
                                     append,
                                     expected):
        # maintain backwards-compatibility
        # of scalar prepend / append behavior
        # in ediff1d following fix for gh-11490
        actual = np.ediff1d(ary=ary,
                            to_end=append,
                            to_begin=prepend)
        assert_equal(actual, expected)
        assert actual.dtype == expected.dtype

    @pytest.mark.parametrize("kind", [None, "sort", "table"])
    def test_isin(self, kind):
        def _isin_slow(a, b):
            b = np.asarray(b).flatten().tolist()
            return a in b
        isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})

        def assert_isin_equal(a, b):
            x = isin(a, b, kind=kind)
            y = isin_slow(a, b)
            assert_array_equal(x, y)

        # multidimensional arrays in both arguments
        a = np.arange(24).reshape([2, 3, 4])
        b = np.array([[10, 20, 30], [0, 1, 3], [11, 22, 33]])
        assert_isin_equal(a, b)

        # array-likes as both arguments
        c = [(9, 8), (7, 6)]
        d = (9, 7)
        assert_isin_equal(c, d)

        # zero-d array:
        f = np.array(3)
        assert_isin_equal(f, b)
        assert_isin_equal(a, f)
        assert_isin_equal(f, f)

        # scalar:
        assert_isin_equal(5, b)
        assert_isin_equal(a, 6)
        assert_isin_equal(5, 6)

        # empty array-like:
        if kind != "table":
            # An empty list will become float64,
            # which is invalid for kind="table"
            x = []
            assert_isin_equal(x, b)
            assert_isin_equal(a, x)
            assert_isin_equal(x, x)

        # empty array with various types:
        for dtype in [bool, np.int64, np.float64]:
            if kind == "table" and dtype == np.float64:
                continue

            if dtype in {np.int64, np.float64}:
                ar = np.array([10, 20, 30], dtype=dtype)
            elif dtype in {bool}:
                ar = np.array([True, False, False])

            empty_array = np.array([], dtype=dtype)

            assert_isin_equal(empty_array, ar)
            assert_isin_equal(ar, empty_array)
            assert_isin_equal(empty_array, empty_array)

    @pytest.mark.parametrize("kind", [None, "sort", "table"])
    def test_isin_additional(self, kind):
        # we use two different sizes for the b array here to test the
        # two different paths in isin().
        for mult in (1, 10):
            # One check without np.array to make sure lists are handled correct
            a = [5, 7, 1, 2]
            b = [2, 4, 3, 1, 5] * mult
            ec = np.array([True, False, True, True])
            c = isin(a, b, assume_unique=True, kind=kind)
            assert_array_equal(c, ec)

            a[0] = 8
            ec = np.array([False, False, True, True])
            c = isin(a, b, assume_unique=True, kind=kind)
            assert_array_equal(c, ec)

            a[0], a[3] = 4, 8
            ec = np.array([True, False, True, False])
            c = isin(a, b, assume_unique=True, kind=kind)
            assert_array_equal(c, ec)

            a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
            b = [2, 3, 4] * mult
            ec = [False, True, False, True, True, True, True, True, True,
                  False, True, False, False, False]
            c = isin(a, b, kind=kind)
            assert_array_equal(c, ec)

            b = b + [5, 5, 4] * mult
            ec = [True, True, True, True, True, True, True, True, True, True,
                  True, False, True, True]
            c = isin(a, b, kind=kind)
            assert_array_equal(c, ec)

            a = np.array([5, 7, 1, 2])
            b = np.array([2, 4, 3, 1, 5] * mult)
            ec = np.array([True, False, True, True])
            c = isin(a, b, kind=kind)
            assert_array_equal(c, ec)

            a = np.array([5, 7, 1, 1, 2])
            b = np.array([2, 4, 3, 3, 1, 5] * mult)
            ec = np.array([True, False, True, True, True])
            c = isin(a, b, kind=kind)
            assert_array_equal(c, ec)

            a = np.array([5, 5])
            b = np.array([2, 2] * mult)
            ec = np.array([False, False])
            c = isin(a, b, kind=kind)
            assert_array_equal(c, ec)

        a = np.array([5])
        b = np.array([2])
        ec = np.array([False])
        c = isin(a, b, kind=kind)
        assert_array_equal(c, ec)

        if kind in {None, "sort"}:
            assert_array_equal(isin([], [], kind=kind), [])

    def test_isin_char_array(self):
        a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b'])
        b = np.array(['a', 'c'])

        ec = np.array([True, False, True, False, False, True, False, False])
        c = isin(a, b)

        assert_array_equal(c, ec)

    @pytest.mark.parametrize("kind", [None, "sort", "table"])
    def test_isin_invert(self, kind):
        "Test isin's invert parameter"
        # We use two different sizes for the b array here to test the
        # two different paths in isin().
        for mult in (1, 10):
            a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
            b = [2, 3, 4] * mult
            assert_array_equal(np.invert(isin(a, b, kind=kind)),
                               isin(a, b, invert=True, kind=kind))

        # float:
        if kind in {None, "sort"}:
            for mult in (1, 10):
                a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5],
                            dtype=np.float32)
                b = [2, 3, 4] * mult
                b = np.array(b, dtype=np.float32)
                assert_array_equal(np.invert(isin(a, b, kind=kind)),
                                   isin(a, b, invert=True, kind=kind))

    def test_isin_hit_alternate_algorithm(self):
        """Hit the standard isin code with integers"""
        # Need extreme range to hit standard code
        # This hits it without the use of kind='table'
        a = np.array([5, 4, 5, 3, 4, 4, 1e9], dtype=np.int64)
        b = np.array([2, 3, 4, 1e9], dtype=np.int64)
        expected = np.array([0, 1, 0, 1, 1, 1, 1], dtype=bool)
        assert_array_equal(expected, isin(a, b))
        assert_array_equal(np.invert(expected), isin(a, b, invert=True))

        a = np.array([5, 7, 1, 2], dtype=np.int64)
        b = np.array([2, 4, 3, 1, 5, 1e9], dtype=np.int64)
        ec = np.array([True, False, True, True])
        c = isin(a, b, assume_unique=True)
        assert_array_equal(c, ec)

    @pytest.mark.parametrize("kind", [None, "sort", "table"])
    def test_isin_boolean(self, kind):
        """Test that isin works for boolean input"""
        a = np.array([True, False])
        b = np.array([False, False, False])
        expected = np.array([False, True])
        assert_array_equal(expected,
                           isin(a, b, kind=kind))
        assert_array_equal(np.invert(expected),
                           isin(a, b, invert=True, kind=kind))

    @pytest.mark.parametrize("kind", [None, "sort"])
    def test_isin_timedelta(self, kind):
        """Test that isin works for timedelta input"""
        rstate = np.random.RandomState(0)
        a = rstate.randint(0, 100, size=10)
        b = rstate.randint(0, 100, size=10)
        truth = isin(a, b)
        a_timedelta = a.astype("timedelta64[s]")
        b_timedelta = b.astype("timedelta64[s]")
        assert_array_equal(truth, isin(a_timedelta, b_timedelta, kind=kind))

    def test_isin_table_timedelta_fails(self):
        a = np.array([0, 1, 2], dtype="timedelta64[s]")
        b = a
        # Make sure it raises a value error:
        with pytest.raises(ValueError):
            isin(a, b, kind="table")

    @pytest.mark.parametrize(
        "dtype1,dtype2",
        [
            (np.int8, np.int16),
            (np.int16, np.int8),
            (np.uint8, np.uint16),
            (np.uint16, np.uint8),
            (np.uint8, np.int16),
            (np.int16, np.uint8),
            (np.uint64, np.int64),
        ]
    )
    @pytest.mark.parametrize("kind", [None, "sort", "table"])
    def test_isin_mixed_dtype(self, dtype1, dtype2, kind):
        """Test that isin works as expected for mixed dtype input."""
        is_dtype2_signed = np.issubdtype(dtype2, np.signedinteger)
        ar1 = np.array([0, 0, 1, 1], dtype=dtype1)

        if is_dtype2_signed:
            ar2 = np.array([-128, 0, 127], dtype=dtype2)
        else:
            ar2 = np.array([127, 0, 255], dtype=dtype2)

        expected = np.array([True, True, False, False])

        expect_failure = kind == "table" and (
            dtype1 == np.int16 and dtype2 == np.int8)

        if expect_failure:
            with pytest.raises(RuntimeError, match="exceed the maximum"):
                isin(ar1, ar2, kind=kind)
        else:
            assert_array_equal(isin(ar1, ar2, kind=kind), expected)

    @pytest.mark.parametrize("data", [
        np.array([2**63, 2**63 + 1], dtype=np.uint64),
        np.array([-2**62, -2**62 - 1], dtype=np.int64),
    ])
    @pytest.mark.parametrize("kind", [None, "sort", "table"])
    def test_isin_mixed_huge_vals(self, kind, data):
        """Test values outside intp range (negative ones if 32bit system)"""
        query = data[1]
        res = np.isin(data, query, kind=kind)
        assert_array_equal(res, [False, True])
        # Also check that nothing weird happens for values can't possibly
        # in range.
        data = data.astype(np.int32)  # clearly different values
        res = np.isin(data, query, kind=kind)
        assert_array_equal(res, [False, False])

    @pytest.mark.parametrize("kind", [None, "sort", "table"])
    def test_isin_mixed_boolean(self, kind):
        """Test that isin works as expected for bool/int input."""
        for dtype in np.typecodes["AllInteger"]:
            a = np.array([True, False, False], dtype=bool)
            b = np.array([0, 0, 0, 0], dtype=dtype)
            expected = np.array([False, True, True], dtype=bool)
            assert_array_equal(isin(a, b, kind=kind), expected)

            a, b = b, a
            expected = np.array([True, True, True, True], dtype=bool)
            assert_array_equal(isin(a, b, kind=kind), expected)

    def test_isin_first_array_is_object(self):
        ar1 = [None]
        ar2 = np.array([1] * 10)
        expected = np.array([False])
        result = np.isin(ar1, ar2)
        assert_array_equal(result, expected)

    def test_isin_second_array_is_object(self):
        ar1 = 1
        ar2 = np.array([None] * 10)
        expected = np.array([False])
        result = np.isin(ar1, ar2)
        assert_array_equal(result, expected)

    def test_isin_both_arrays_are_object(self):
        ar1 = [None]
        ar2 = np.array([None] * 10)
        expected = np.array([True])
        result = np.isin(ar1, ar2)
        assert_array_equal(result, expected)

    def test_isin_both_arrays_have_structured_dtype(self):
        # Test arrays of a structured data type containing an integer field
        # and a field of dtype `object` allowing for arbitrary Python objects
        dt = np.dtype([('field1', int), ('field2', object)])
        ar1 = np.array([(1, None)], dtype=dt)
        ar2 = np.array([(1, None)] * 10, dtype=dt)
        expected = np.array([True])
        result = np.isin(ar1, ar2)
        assert_array_equal(result, expected)

    def test_isin_with_arrays_containing_tuples(self):
        ar1 = np.array([(1,), 2], dtype=object)
        ar2 = np.array([(1,), 2], dtype=object)
        expected = np.array([True, True])
        result = np.isin(ar1, ar2)
        assert_array_equal(result, expected)
        result = np.isin(ar1, ar2, invert=True)
        assert_array_equal(result, np.invert(expected))

        # An integer is added at the end of the array to make sure
        # that the array builder will create the array with tuples
        # and after it's created the integer is removed.
        # There's a bug in the array constructor that doesn't handle
        # tuples properly and adding the integer fixes that.
        ar1 = np.array([(1,), (2, 1), 1], dtype=object)
        ar1 = ar1[:-1]
        ar2 = np.array([(1,), (2, 1), 1], dtype=object)
        ar2 = ar2[:-1]
        expected = np.array([True, True])
        result = np.isin(ar1, ar2)
        assert_array_equal(result, expected)
        result = np.isin(ar1, ar2, invert=True)
        assert_array_equal(result, np.invert(expected))

        ar1 = np.array([(1,), (2, 3), 1], dtype=object)
        ar1 = ar1[:-1]
        ar2 = np.array([(1,), 2], dtype=object)
        expected = np.array([True, False])
        result = np.isin(ar1, ar2)
        assert_array_equal(result, expected)
        result = np.isin(ar1, ar2, invert=True)
        assert_array_equal(result, np.invert(expected))

    def test_isin_errors(self):
        """Test that isin raises expected errors."""

        # Error 1: `kind` is not one of 'sort' 'table' or None.
        ar1 = np.array([1, 2, 3, 4, 5])
        ar2 = np.array([2, 4, 6, 8, 10])
        assert_raises(ValueError, isin, ar1, ar2, kind='quicksort')

        # Error 2: `kind="table"` does not work for non-integral arrays.
        obj_ar1 = np.array([1, 'a', 3, 'b', 5], dtype=object)
        obj_ar2 = np.array([1, 'a', 3, 'b', 5], dtype=object)
        assert_raises(ValueError, isin, obj_ar1, obj_ar2, kind='table')

        for dtype in [np.int32, np.int64]:
            ar1 = np.array([-1, 2, 3, 4, 5], dtype=dtype)
            # The range of this array will overflow:
            overflow_ar2 = np.array([-1, np.iinfo(dtype).max], dtype=dtype)

            # Error 3: `kind="table"` will trigger a runtime error
            #  if there is an integer overflow expected when computing the
            #  range of ar2
            assert_raises(
                RuntimeError,
                isin, ar1, overflow_ar2, kind='table'
            )

            # Non-error: `kind=None` will *not* trigger a runtime error
            #  if there is an integer overflow, it will switch to
            #  the `sort` algorithm.
            result = np.isin(ar1, overflow_ar2, kind=None)
            assert_array_equal(result, [True] + [False] * 4)
            result = np.isin(ar1, overflow_ar2, kind='sort')
            assert_array_equal(result, [True] + [False] * 4)

    def test_union1d(self):
        a = np.array([5, 4, 7, 1, 2])
        b = np.array([2, 4, 3, 3, 2, 1, 5])

        ec = np.array([1, 2, 3, 4, 5, 7])
        c = union1d(a, b)
        assert_array_equal(c, ec)

        # Tests gh-10340, arguments to union1d should be
        # flattened if they are not already 1D
        x = np.array([[0, 1, 2], [3, 4, 5]])
        y = np.array([0, 1, 2, 3, 4])
        ez = np.array([0, 1, 2, 3, 4, 5])
        z = union1d(x, y)
        assert_array_equal(z, ez)

        assert_array_equal([], union1d([], []))

    def test_setdiff1d(self):
        a = np.array([6, 5, 4, 7, 1, 2, 7, 4])
        b = np.array([2, 4, 3, 3, 2, 1, 5])

        ec = np.array([6, 7])
        c = setdiff1d(a, b)
        assert_array_equal(c, ec)

        a = np.arange(21)
        b = np.arange(19)
        ec = np.array([19, 20])
        c = setdiff1d(a, b)
        assert_array_equal(c, ec)

        assert_array_equal([], setdiff1d([], []))
        a = np.array((), np.uint32)
        assert_equal(setdiff1d(a, []).dtype, np.uint32)

    def test_setdiff1d_unique(self):
        a = np.array([3, 2, 1])
        b = np.array([7, 5, 2])
        expected = np.array([3, 1])
        actual = setdiff1d(a, b, assume_unique=True)
        assert_equal(actual, expected)

    def test_setdiff1d_char_array(self):
        a = np.array(['a', 'b', 'c'])
        b = np.array(['a', 'b', 's'])
        assert_array_equal(setdiff1d(a, b), np.array(['c']))

    def test_manyways(self):
        a = np.array([5, 7, 1, 2, 8])
        b = np.array([9, 8, 2, 4, 3, 1, 5])

        c1 = setxor1d(a, b)
        aux1 = intersect1d(a, b)
        aux2 = union1d(a, b)
        c2 = setdiff1d(aux2, aux1)
        assert_array_equal(c1, c2)


class TestUnique:

    def check_all(self, a, b, i1, i2, c, dt):
        base_msg = 'check {0} failed for type {1}'

        msg = base_msg.format('values', dt)
        v = unique(a)
        assert_array_equal(v, b, msg)
        assert type(v) == type(b)

        msg = base_msg.format('return_index', dt)
        v, j = unique(a, True, False, False)
        assert_array_equal(v, b, msg)
        assert_array_equal(j, i1, msg)
        assert type(v) == type(b)

        msg = base_msg.format('return_inverse', dt)
        v, j = unique(a, False, True, False)
        assert_array_equal(v, b, msg)
        assert_array_equal(j, i2, msg)
        assert type(v) == type(b)

        msg = base_msg.format('return_counts', dt)
        v, j = unique(a, False, False, True)
        assert_array_equal(v, b, msg)
        assert_array_equal(j, c, msg)
        assert type(v) == type(b)

        msg = base_msg.format('return_index and return_inverse', dt)
        v, j1, j2 = unique(a, True, True, False)
        assert_array_equal(v, b, msg)
        assert_array_equal(j1, i1, msg)
        assert_array_equal(j2, i2, msg)
        assert type(v) == type(b)

        msg = base_msg.format('return_index and return_counts', dt)
        v, j1, j2 = unique(a, True, False, True)
        assert_array_equal(v, b, msg)
        assert_array_equal(j1, i1, msg)
        assert_array_equal(j2, c, msg)
        assert type(v) == type(b)

        msg = base_msg.format('return_inverse and return_counts', dt)
        v, j1, j2 = unique(a, False, True, True)
        assert_array_equal(v, b, msg)
        assert_array_equal(j1, i2, msg)
        assert_array_equal(j2, c, msg)
        assert type(v) == type(b)

        msg = base_msg.format(('return_index, return_inverse '
                                'and return_counts'), dt)
        v, j1, j2, j3 = unique(a, True, True, True)
        assert_array_equal(v, b, msg)
        assert_array_equal(j1, i1, msg)
        assert_array_equal(j2, i2, msg)
        assert_array_equal(j3, c, msg)
        assert type(v) == type(b)

    def get_types(self):
        types = []
        types.extend(np.typecodes['AllInteger'])
        types.extend(np.typecodes['AllFloat'])
        types.append('datetime64[D]')
        types.append('timedelta64[D]')
        return types

    def test_unique_1d(self):

        a = [5, 7, 1, 2, 1, 5, 7] * 10
        b = [1, 2, 5, 7]
        i1 = [2, 3, 0, 1]
        i2 = [2, 3, 0, 1, 0, 2, 3] * 10
        c = np.multiply([2, 1, 2, 2], 10)

        # test for numeric arrays
        types = self.get_types()
        for dt in types:
            aa = np.array(a, dt)
            bb = np.array(b, dt)
            self.check_all(aa, bb, i1, i2, c, dt)

        # test for object arrays
        dt = 'O'
        aa = np.empty(len(a), dt)
        aa[:] = a
        bb = np.empty(len(b), dt)
        bb[:] = b
        self.check_all(aa, bb, i1, i2, c, dt)

        # test for structured arrays
        dt = [('', 'i'), ('', 'i')]
        aa = np.array(list(zip(a, a)), dt)
        bb = np.array(list(zip(b, b)), dt)
        self.check_all(aa, bb, i1, i2, c, dt)

        # test for ticket #2799
        aa = [1. + 0.j, 1 - 1.j, 1]
        assert_array_equal(
            np.sort(np.unique(aa)),
            [1. - 1.j, 1.],
        )

        # test for ticket #4785
        a = [(1, 2), (1, 2), (2, 3)]
        unq = [1, 2, 3]
        inv = [[0, 1], [0, 1], [1, 2]]
        a1 = unique(a)
        assert_array_equal(a1, unq)
        a2, a2_inv = unique(a, return_inverse=True)
        assert_array_equal(a2, unq)
        assert_array_equal(a2_inv, inv)

        # test for chararrays with return_inverse (gh-5099)
        a = np.char.chararray(5)
        a[...] = ''
        a2, a2_inv = np.unique(a, return_inverse=True)
        assert_array_equal(a2_inv, np.zeros(5))

        # test for ticket #9137
        a = []
        a1_idx = np.unique(a, return_index=True)[1]
        a2_inv = np.unique(a, return_inverse=True)[1]
        a3_idx, a3_inv = np.unique(a, return_index=True,
                                   return_inverse=True)[1:]
        assert_equal(a1_idx.dtype, np.intp)
        assert_equal(a2_inv.dtype, np.intp)
        assert_equal(a3_idx.dtype, np.intp)
        assert_equal(a3_inv.dtype, np.intp)

        # test for ticket 2111 - float
        a = [2.0, np.nan, 1.0, np.nan]
        ua = [1.0, 2.0, np.nan]
        ua_idx = [2, 0, 1]
        ua_inv = [1, 2, 0, 2]
        ua_cnt = [1, 1, 2]
        # order of unique values is not guaranteed
        assert_equal(np.sort(np.unique(a)), np.sort(ua))
        assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
        assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
        assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))

        # test for ticket 2111 - complex
        a = [2.0 - 1j, np.nan, 1.0 + 1j, complex(0.0, np.nan), complex(1.0, np.nan)]
        ua = [1.0 + 1j, 2.0 - 1j, complex(0.0, np.nan)]
        ua_idx = [2, 0, 3]
        ua_inv = [1, 2, 0, 2, 2]
        ua_cnt = [1, 1, 3]
        # order of unique values is not guaranteed
        assert_equal(np.sort(np.unique(a)), np.sort(ua))
        assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
        assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
        assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))

        # test for ticket 2111 - datetime64
        nat = np.datetime64('nat')
        a = [np.datetime64('2020-12-26'), nat, np.datetime64('2020-12-24'), nat]
        ua = [np.datetime64('2020-12-24'), np.datetime64('2020-12-26'), nat]
        ua_idx = [2, 0, 1]
        ua_inv = [1, 2, 0, 2]
        ua_cnt = [1, 1, 2]
        assert_equal(np.unique(a), ua)
        assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
        assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
        assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))

        # test for ticket 2111 - timedelta
        nat = np.timedelta64('nat')
        a = [np.timedelta64(1, 'D'), nat, np.timedelta64(1, 'h'), nat]
        ua = [np.timedelta64(1, 'h'), np.timedelta64(1, 'D'), nat]
        ua_idx = [2, 0, 1]
        ua_inv = [1, 2, 0, 2]
        ua_cnt = [1, 1, 2]
        assert_equal(np.unique(a), ua)
        assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
        assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
        assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))

        # test for gh-19300
        all_nans = [np.nan] * 4
        ua = [np.nan]
        ua_idx = [0]
        ua_inv = [0, 0, 0, 0]
        ua_cnt = [4]
        assert_equal(np.unique(all_nans), ua)
        assert_equal(np.unique(all_nans, return_index=True), (ua, ua_idx))
        assert_equal(np.unique(all_nans, return_inverse=True), (ua, ua_inv))
        assert_equal(np.unique(all_nans, return_counts=True), (ua, ua_cnt))

    def test_unique_zero_sized(self):
        # test for zero-sized arrays
        types = self.get_types()
        types.extend('SU')
        for dt in types:
            a = np.array([], dt)
            b = np.array([], dt)
            i1 = np.array([], np.int64)
            i2 = np.array([], np.int64)
            c = np.array([], np.int64)
            self.check_all(a, b, i1, i2, c, dt)

    def test_unique_subclass(self):
        class Subclass(np.ndarray):
            pass

        i1 = [2, 3, 0, 1]
        i2 = [2, 3, 0, 1, 0, 2, 3] * 10
        c = np.multiply([2, 1, 2, 2], 10)

        # test for numeric arrays
        types = self.get_types()
        for dt in types:
            a = np.array([5, 7, 1, 2, 1, 5, 7] * 10, dtype=dt)
            b = np.array([1, 2, 5, 7], dtype=dt)
            aa = Subclass(a.shape, dtype=dt, buffer=a)
            bb = Subclass(b.shape, dtype=dt, buffer=b)
            self.check_all(aa, bb, i1, i2, c, dt)

    def test_unique_byte_string_hash_based(self):
        # test for byte string arrays
        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape']
        unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']

        a1 = unique(arr, sorted=False)
        # the result varies depending on the impl of std::unordered_set,
        # so we check them by sorting
        assert_array_equal(sorted(a1.tolist()), unq_sorted)

    def test_unique_unicode_string_hash_based(self):
        # test for unicode string arrays
        arr = [
            'café', 'cafe', 'café', 'naïve', 'naive',
            'résumé', 'naïve', 'resume', 'résumé',
        ]
        unq_sorted = ['cafe', 'café', 'naive', 'naïve', 'resume', 'résumé']

        a1 = unique(arr, sorted=False)
        # the result varies depending on the impl of std::unordered_set,
        # so we check them by sorting
        assert_array_equal(sorted(a1.tolist()), unq_sorted)

    def test_unique_vstring_hash_based_equal_nan(self):
        # test for unicode and nullable string arrays (equal_nan=True)
        a = np.array([
                # short strings
                'straße',
                None,
                'strasse',
                'straße',
                None,
                'niño',
                'nino',
                'élève',
                'eleve',
                'niño',
                'élève',
                # medium strings
                'b' * 20,
                'ß' * 30,
                None,
                'é' * 30,
                'e' * 20,
                'ß' * 30,
                'n' * 30,
                'ñ' * 20,
                None,
                'e' * 20,
                'ñ' * 20,
                # long strings
                'b' * 300,
                'ß' * 400,
                None,
                'é' * 400,
                'e' * 300,
                'ß' * 400,
                'n' * 400,
                'ñ' * 300,
                None,
                'e' * 300,
                'ñ' * 300,
            ],
            dtype=StringDType(na_object=None)
        )
        unq_sorted_wo_none = [
            'b' * 20,
            'b' * 300,
            'e' * 20,
            'e' * 300,
            'eleve',
            'nino',
            'niño',
            'n' * 30,
            'n' * 400,
            'strasse',
            'straße',
            'ß' * 30,
            'ß' * 400,
            'élève',
            'é' * 30,
            'é' * 400,
            'ñ' * 20,
            'ñ' * 300,
        ]

        a1 = unique(a, sorted=False, equal_nan=True)
        # the result varies depending on the impl of std::unordered_set,
        # so we check them by sorting

        # a1 should have exactly one None
        count_none = sum(x is None for x in a1)
        assert_equal(count_none, 1)

        a1_wo_none = sorted(x for x in a1 if x is not None)
        assert_array_equal(a1_wo_none, unq_sorted_wo_none)

    def test_unique_vstring_hash_based_not_equal_nan(self):
        # test for unicode and nullable string arrays (equal_nan=False)
        a = np.array([
                # short strings
                'straße',
                None,
                'strasse',
                'straße',
                None,
                'niño',
                'nino',
                'élève',
                'eleve',
                'niño',
                'élève',
                # medium strings
                'b' * 20,
                'ß' * 30,
                None,
                'é' * 30,
                'e' * 20,
                'ß' * 30,
                'n' * 30,
                'ñ' * 20,
                None,
                'e' * 20,
                'ñ' * 20,
                # long strings
                'b' * 300,
                'ß' * 400,
                None,
                'é' * 400,
                'e' * 300,
                'ß' * 400,
                'n' * 400,
                'ñ' * 300,
                None,
                'e' * 300,
                'ñ' * 300,
            ],
            dtype=StringDType(na_object=None)
        )
        unq_sorted_wo_none = [
            'b' * 20,
            'b' * 300,
            'e' * 20,
            'e' * 300,
            'eleve',
            'nino',
            'niño',
            'n' * 30,
            'n' * 400,
            'strasse',
            'straße',
            'ß' * 30,
            'ß' * 400,
            'élève',
            'é' * 30,
            'é' * 400,
            'ñ' * 20,
            'ñ' * 300,
        ]

        a1 = unique(a, sorted=False, equal_nan=False)
        # the result varies depending on the impl of std::unordered_set,
        # so we check them by sorting

        # a1 should have exactly one None
        count_none = sum(x is None for x in a1)
        assert_equal(count_none, 6)

        a1_wo_none = sorted(x for x in a1 if x is not None)
        assert_array_equal(a1_wo_none, unq_sorted_wo_none)

    def test_unique_vstring_errors(self):
        a = np.array(
            [
                'apple', 'banana', 'apple', None, 'cherry',
                'date', 'banana', 'fig', None, 'grape',
            ] * 2,
            dtype=StringDType(na_object=None)
        )
        assert_raises(ValueError, unique, a, equal_nan=False)

    @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])
    def test_unsupported_hash_based(self, arg):
        """These currently never use the hash-based solution.  However,
        it seems easier to just allow it.

        When the hash-based solution is added, this test should fail and be
        replaced with something more comprehensive.
        """
        a = np.array([1, 5, 2, 3, 4, 8, 199, 1, 3, 5])

        res_not_sorted = np.unique([1, 1], sorted=False, **{arg: True})
        res_sorted = np.unique([1, 1], sorted=True, **{arg: True})
        # The following should fail without first sorting `res_not_sorted`.
        for arr, expected in zip(res_not_sorted, res_sorted):
            assert_array_equal(arr, expected)

    def test_unique_axis_errors(self):
        assert_raises(TypeError, self._run_axis_tests, object)
        assert_raises(TypeError, self._run_axis_tests,
                      [('a', int), ('b', object)])

        assert_raises(AxisError, unique, np.arange(10), axis=2)
        assert_raises(AxisError, unique, np.arange(10), axis=-2)

    def test_unique_axis_list(self):
        msg = "Unique failed on list of lists"
        inp = [[0, 1, 0], [0, 1, 0]]
        inp_arr = np.asarray(inp)
        assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg)
        assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg)

    def test_unique_axis(self):
        types = []
        types.extend(np.typecodes['AllInteger'])
        types.extend(np.typecodes['AllFloat'])
        types.append('datetime64[D]')
        types.append('timedelta64[D]')
        types.append([('a', int), ('b', int)])
        types.append([('a', int), ('b', float)])

        for dtype in types:
            self._run_axis_tests(dtype)

        msg = 'Non-bitwise-equal booleans test failed'
        data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
        result = np.array([[False, True], [True, True]], dtype=bool)
        assert_array_equal(unique(data, axis=0), result, msg)

        msg = 'Negative zero equality test failed'
        data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
        result = np.array([[-0.0, 0.0]])
        assert_array_equal(unique(data, axis=0), result, msg)

    @pytest.mark.parametrize("axis", [0, -1])
    def test_unique_1d_with_axis(self, axis):
        x = np.array([4, 3, 2, 3, 2, 1, 2, 2])
        uniq = unique(x, axis=axis)
        assert_array_equal(uniq, [1, 2, 3, 4])

    @pytest.mark.parametrize("axis", [None, 0, -1])
    def test_unique_inverse_with_axis(self, axis):
        x = np.array([[4, 4, 3], [2, 2, 1], [2, 2, 1], [4, 4, 3]])
        uniq, inv = unique(x, return_inverse=True, axis=axis)
        assert_equal(inv.ndim, x.ndim if axis is None else 1)
        assert_array_equal(x, np.take(uniq, inv, axis=axis))

    def test_unique_axis_zeros(self):
        # issue 15559
        single_zero = np.empty(shape=(2, 0), dtype=np.int8)
        uniq, idx, inv, cnt = unique(single_zero, axis=0, return_index=True,
                                     return_inverse=True, return_counts=True)

        # there's 1 element of shape (0,) along axis 0
        assert_equal(uniq.dtype, single_zero.dtype)
        assert_array_equal(uniq, np.empty(shape=(1, 0)))
        assert_array_equal(idx, np.array([0]))
        assert_array_equal(inv, np.array([0, 0]))
        assert_array_equal(cnt, np.array([2]))

        # there's 0 elements of shape (2,) along axis 1
        uniq, idx, inv, cnt = unique(single_zero, axis=1, return_index=True,
                                     return_inverse=True, return_counts=True)

        assert_equal(uniq.dtype, single_zero.dtype)
        assert_array_equal(uniq, np.empty(shape=(2, 0)))
        assert_array_equal(idx, np.array([]))
        assert_array_equal(inv, np.array([]))
        assert_array_equal(cnt, np.array([]))

        # test a "complicated" shape
        shape = (0, 2, 0, 3, 0, 4, 0)
        multiple_zeros = np.empty(shape=shape)
        for axis in range(len(shape)):
            expected_shape = list(shape)
            if shape[axis] == 0:
                expected_shape[axis] = 0
            else:
                expected_shape[axis] = 1

            assert_array_equal(unique(multiple_zeros, axis=axis),
                               np.empty(shape=expected_shape))

    def test_unique_masked(self):
        # issue 8664
        x = np.array([64, 0, 1, 2, 3, 63, 63, 0, 0, 0, 1, 2, 0, 63, 0],
                     dtype='uint8')
        y = np.ma.masked_equal(x, 0)

        v = np.unique(y)
        v2, i, c = np.unique(y, return_index=True, return_counts=True)

        msg = 'Unique returned different results when asked for index'
        assert_array_equal(v.data, v2.data, msg)
        assert_array_equal(v.mask, v2.mask, msg)

    def test_unique_sort_order_with_axis(self):
        # These tests fail if sorting along axis is done by treating subarrays
        # as unsigned byte strings.  See gh-10495.
        fmt = "sort order incorrect for integer type '%s'"
        for dt in 'bhilq':
            a = np.array([[-1], [0]], dt)
            b = np.unique(a, axis=0)
            assert_array_equal(a, b, fmt % dt)

    def _run_axis_tests(self, dtype):
        data = np.array([[0, 1, 0, 0],
                         [1, 0, 0, 0],
                         [0, 1, 0, 0],
                         [1, 0, 0, 0]]).astype(dtype)

        msg = 'Unique with 1d array and axis=0 failed'
        result = np.array([0, 1])
        assert_array_equal(unique(data), result.astype(dtype), msg)

        msg = 'Unique with 2d array and axis=0 failed'
        result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
        assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)

        msg = 'Unique with 2d array and axis=1 failed'
        result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
        assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)

        msg = 'Unique with 3d array and axis=2 failed'
        data3d = np.array([[[1, 1],
                            [1, 0]],
                           [[0, 1],
                            [0, 0]]]).astype(dtype)
        result = np.take(data3d, [1, 0], axis=2)
        assert_array_equal(unique(data3d, axis=2), result, msg)

        uniq, idx, inv, cnt = unique(data, axis=0, return_index=True,
                                     return_inverse=True, return_counts=True)
        msg = "Unique's return_index=True failed with axis=0"
        assert_array_equal(data[idx], uniq, msg)
        msg = "Unique's return_inverse=True failed with axis=0"
        assert_array_equal(np.take(uniq, inv, axis=0), data)
        msg = "Unique's return_counts=True failed with axis=0"
        assert_array_equal(cnt, np.array([2, 2]), msg)

        uniq, idx, inv, cnt = unique(data, axis=1, return_index=True,
                                     return_inverse=True, return_counts=True)
        msg = "Unique's return_index=True failed with axis=1"
        assert_array_equal(data[:, idx], uniq)
        msg = "Unique's return_inverse=True failed with axis=1"
        assert_array_equal(np.take(uniq, inv, axis=1), data)
        msg = "Unique's return_counts=True failed with axis=1"
        assert_array_equal(cnt, np.array([2, 1, 1]), msg)

    def test_unique_nanequals(self):
        # issue 20326
        a = np.array([1, 1, np.nan, np.nan, np.nan])
        unq = np.unique(a)
        not_unq = np.unique(a, equal_nan=False)
        assert_array_equal(unq, np.array([1, np.nan]))
        assert_array_equal(not_unq, np.array([1, np.nan, np.nan, np.nan]))

    def test_unique_array_api_functions(self):
        arr = np.array(
            [
                np.nan, 1.0, 0.0, 4.0, -np.nan,
                -0.0, 1.0, 3.0, 4.0, np.nan,
                5.0, -0.0, 1.0, -np.nan, 0.0,
            ],
        )

        for res_unique_array_api, res_unique in [
            (
                np.unique_values(arr),
                np.unique(arr, equal_nan=False)
            ),
            (
                np.unique_counts(arr),
                np.unique(arr, return_counts=True, equal_nan=False)
            ),
            (
                np.unique_inverse(arr),
                np.unique(arr, return_inverse=True, equal_nan=False)
            ),
            (
                np.unique_all(arr),
                np.unique(
                    arr,
                    return_index=True,
                    return_inverse=True,
                    return_counts=True,
                    equal_nan=False
                )
            )
        ]:
            assert len(res_unique_array_api) == len(res_unique)
            if not isinstance(res_unique_array_api, tuple):
                res_unique_array_api = (res_unique_array_api,)
            if not isinstance(res_unique, tuple):
                res_unique = (res_unique,)

            for actual, expected in zip(res_unique_array_api, res_unique):
                # Order of output is not guaranteed
                assert_equal(np.sort(actual), np.sort(expected))

    def test_unique_inverse_shape(self):
        # Regression test for https://github.com/numpy/numpy/issues/25552
        arr = np.array([[1, 2, 3], [2, 3, 1]])
        expected_values, expected_inverse = np.unique(arr, return_inverse=True)
        expected_inverse = expected_inverse.reshape(arr.shape)
        for func in np.unique_inverse, np.unique_all:
            result = func(arr)
            assert_array_equal(expected_values, result.values)
            assert_array_equal(expected_inverse, result.inverse_indices)
            assert_array_equal(arr, result.values[result.inverse_indices])

    @pytest.mark.parametrize(
        'data',
        [[[1, 1, 1],
          [1, 1, 1]],
         [1, 3, 2],
         1],
    )
    @pytest.mark.parametrize('transpose', [False, True])
    @pytest.mark.parametrize('dtype', [np.int32, np.float64])
    def test_unique_with_matrix(self, data, transpose, dtype):
        mat = np.matrix(data).astype(dtype)
        if transpose:
            mat = mat.T
        u = np.unique(mat)
        expected = np.unique(np.asarray(mat))
        assert_array_equal(u, expected, strict=True)

    def test_unique_axis0_equal_nan_on_1d_array(self):
        # Test Issue #29336
        arr1d = np.array([np.nan, 0, 0, np.nan])
        expected = np.array([0., np.nan])
        result = np.unique(arr1d, axis=0, equal_nan=True)
        assert_array_equal(result, expected)

    def test_unique_axis_minus1_eq_on_1d_array(self):
        arr1d = np.array([np.nan, 0, 0, np.nan])
        expected = np.array([0., np.nan])
        result = np.unique(arr1d, axis=-1, equal_nan=True)
        assert_array_equal(result, expected)

    def test_unique_axis_float_raises_typeerror(self):
        arr1d = np.array([np.nan, 0, 0, np.nan])
        with pytest.raises(TypeError, match="integer argument expected"):
            np.unique(arr1d, axis=0.0, equal_nan=False)

    @pytest.mark.parametrize('dt', [np.dtype('F'), np.dtype('D')])
    @pytest.mark.parametrize('values', [[complex(0.0, -1), complex(-0.0, -1), 0],
                                        [-200, complex(-200, -0.0), -1],
                                        [-25, 3, -5j, complex(-25, -0.0), 3j]])
    def test_unique_complex_signed_zeros(self, dt, values):
        z = np.array(values, dtype=dt)
        u = np.unique(z)
        assert len(u) == len(values) - 1
