From 4c75cc8eb688622c7ffd46c0eca1798082a8b017 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 18 Oct 2019 17:25:50 -0700 Subject: [PATCH 1/2] commit so i can rebase --- pandas/_libs/algos_take_helper.pxi.in | 1 - pandas/_libs/join.pyx | 32 --------------------------- pandas/core/indexes/datetimes.py | 8 +++---- pandas/core/indexes/timedeltas.py | 8 +++---- pandas/tests/test_join.py | 30 ++++++++++++------------- 5 files changed, 23 insertions(+), 56 deletions(-) diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 3a3adc71875ed..f10061a417c03 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -276,7 +276,6 @@ cdef _take_2d(ndarray[take_t, ndim=2] values, object idx): Py_ssize_t i, j, N, K ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx ndarray[take_t, ndim=2] result - object val N, K = (values).shape diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 238bfd0be0aa7..e5e058658d9b7 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -296,14 +296,6 @@ def left_join_indexer_unique(join_t[:] left, join_t[:] right): return indexer -left_join_indexer_unique_float64 = left_join_indexer_unique["float64_t"] -left_join_indexer_unique_float32 = left_join_indexer_unique["float32_t"] -left_join_indexer_unique_object = left_join_indexer_unique["object"] -left_join_indexer_unique_int32 = left_join_indexer_unique["int32_t"] -left_join_indexer_unique_int64 = left_join_indexer_unique["int64_t"] -left_join_indexer_unique_uint64 = left_join_indexer_unique["uint64_t"] - - @cython.wraparound(False) @cython.boundscheck(False) def left_join_indexer(ndarray[join_t] left, ndarray[join_t] right): @@ -407,14 +399,6 @@ def left_join_indexer(ndarray[join_t] left, ndarray[join_t] right): return result, lindexer, rindexer -left_join_indexer_float64 = left_join_indexer["float64_t"] -left_join_indexer_float32 = left_join_indexer["float32_t"] -left_join_indexer_object = left_join_indexer["object"] -left_join_indexer_int32 = left_join_indexer["int32_t"] -left_join_indexer_int64 = left_join_indexer["int64_t"] -left_join_indexer_uint64 = left_join_indexer["uint64_t"] - - @cython.wraparound(False) @cython.boundscheck(False) def inner_join_indexer(ndarray[join_t] left, ndarray[join_t] right): @@ -508,14 +492,6 @@ def inner_join_indexer(ndarray[join_t] left, ndarray[join_t] right): return result, lindexer, rindexer -inner_join_indexer_float64 = inner_join_indexer["float64_t"] -inner_join_indexer_float32 = inner_join_indexer["float32_t"] -inner_join_indexer_object = inner_join_indexer["object"] -inner_join_indexer_int32 = inner_join_indexer["int32_t"] -inner_join_indexer_int64 = inner_join_indexer["int64_t"] -inner_join_indexer_uint64 = inner_join_indexer["uint64_t"] - - @cython.wraparound(False) @cython.boundscheck(False) def outer_join_indexer(ndarray[join_t] left, ndarray[join_t] right): @@ -645,14 +621,6 @@ def outer_join_indexer(ndarray[join_t] left, ndarray[join_t] right): return result, lindexer, rindexer -outer_join_indexer_float64 = outer_join_indexer["float64_t"] -outer_join_indexer_float32 = outer_join_indexer["float32_t"] -outer_join_indexer_object = outer_join_indexer["object"] -outer_join_indexer_int32 = outer_join_indexer["int32_t"] -outer_join_indexer_int64 = outer_join_indexer["int64_t"] -outer_join_indexer_uint64 = outer_join_indexer["uint64_t"] - - # ---------------------------------------------------------------------- # asof_join_by # ---------------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 0b20df38e7d42..7973eaaef6579 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -230,11 +230,11 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index, DatetimeDelegateMixin): def _join_i8_wrapper(joinf, **kwargs): return DatetimeIndexOpsMixin._join_i8_wrapper(joinf, dtype="M8[ns]", **kwargs) - _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer_int64) - _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer_int64) - _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer_int64) + _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer) + _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer) + _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer) _left_indexer_unique = _join_i8_wrapper( - libjoin.left_join_indexer_unique_int64, with_indexers=False + libjoin.left_join_indexer_unique, with_indexers=False ) _engine_type = libindex.DatetimeEngine diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index c6dce77c4d078..3370db0754697 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -152,11 +152,11 @@ class TimedeltaIndex( def _join_i8_wrapper(joinf, **kwargs): return DatetimeIndexOpsMixin._join_i8_wrapper(joinf, dtype="m8[ns]", **kwargs) - _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer_int64) - _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer_int64) - _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer_int64) + _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer) + _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer) + _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer) _left_indexer_unique = _join_i8_wrapper( - libjoin.left_join_indexer_unique_int64, with_indexers=False + libjoin.left_join_indexer_unique, with_indexers=False ) _engine_type = libindex.TimedeltaEngine diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index e750193abb71a..62254a016568d 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -10,11 +10,11 @@ class TestIndexer: def test_outer_join_indexer(self): typemap = [ - ("int32", _join.outer_join_indexer_int32), - ("int64", _join.outer_join_indexer_int64), - ("float32", _join.outer_join_indexer_float32), - ("float64", _join.outer_join_indexer_float64), - ("object", _join.outer_join_indexer_object), + ("int32", _join.outer_join_indexer), + ("int64", _join.outer_join_indexer), + ("float32", _join.outer_join_indexer), + ("float64", _join.outer_join_indexer), + ("object", _join.outer_join_indexer), ] for dtype, indexer in typemap: @@ -51,7 +51,7 @@ def test_left_join_indexer_unique(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([2, 2, 3, 4, 4], dtype=np.int64) - result = _join.left_join_indexer_unique_int64(b, a) + result = _join.left_join_indexer_unique(b, a) expected = np.array([1, 1, 2, 3, 3], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) @@ -182,7 +182,7 @@ def test_inner_join_indexer(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([0, 3, 5, 7, 9], dtype=np.int64) - index, ares, bres = _join.inner_join_indexer_int64(a, b) + index, ares, bres = _join.inner_join_indexer(a, b) index_exp = np.array([3, 5], dtype=np.int64) assert_almost_equal(index, index_exp) @@ -195,7 +195,7 @@ def test_inner_join_indexer(): a = np.array([5], dtype=np.int64) b = np.array([5], dtype=np.int64) - index, ares, bres = _join.inner_join_indexer_int64(a, b) + index, ares, bres = _join.inner_join_indexer(a, b) tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) @@ -205,7 +205,7 @@ def test_outer_join_indexer(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([0, 3, 5, 7, 9], dtype=np.int64) - index, ares, bres = _join.outer_join_indexer_int64(a, b) + index, ares, bres = _join.outer_join_indexer(a, b) index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64) assert_almost_equal(index, index_exp) @@ -218,7 +218,7 @@ def test_outer_join_indexer(): a = np.array([5], dtype=np.int64) b = np.array([5], dtype=np.int64) - index, ares, bres = _join.outer_join_indexer_int64(a, b) + index, ares, bres = _join.outer_join_indexer(a, b) tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) @@ -228,7 +228,7 @@ def test_left_join_indexer(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([0, 3, 5, 7, 9], dtype=np.int64) - index, ares, bres = _join.left_join_indexer_int64(a, b) + index, ares, bres = _join.left_join_indexer(a, b) assert_almost_equal(index, a) @@ -240,7 +240,7 @@ def test_left_join_indexer(): a = np.array([5], dtype=np.int64) b = np.array([5], dtype=np.int64) - index, ares, bres = _join.left_join_indexer_int64(a, b) + index, ares, bres = _join.left_join_indexer(a, b) tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) @@ -250,7 +250,7 @@ def test_left_join_indexer2(): idx = Index([1, 1, 2, 5]) idx2 = Index([1, 2, 5, 7, 9]) - res, lidx, ridx = _join.left_join_indexer_int64(idx2.values, idx.values) + res, lidx, ridx = _join.left_join_indexer(idx2.values, idx.values) exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) assert_almost_equal(res, exp_res) @@ -266,7 +266,7 @@ def test_outer_join_indexer2(): idx = Index([1, 1, 2, 5]) idx2 = Index([1, 2, 5, 7, 9]) - res, lidx, ridx = _join.outer_join_indexer_int64(idx2.values, idx.values) + res, lidx, ridx = _join.outer_join_indexer(idx2.values, idx.values) exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) assert_almost_equal(res, exp_res) @@ -282,7 +282,7 @@ def test_inner_join_indexer2(): idx = Index([1, 1, 2, 5]) idx2 = Index([1, 2, 5, 7, 9]) - res, lidx, ridx = _join.inner_join_indexer_int64(idx2.values, idx.values) + res, lidx, ridx = _join.inner_join_indexer(idx2.values, idx.values) exp_res = np.array([1, 1, 2, 5], dtype=np.int64) assert_almost_equal(res, exp_res) From 7b88515e423b2a35c75961b89d917dee1a999c29 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 20 Oct 2019 10:53:57 -0700 Subject: [PATCH 2/2] parametrize test --- pandas/tests/test_join.py | 71 +++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index 62254a016568d..2de70ceb53647 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas._libs import join as _join @@ -8,43 +9,39 @@ class TestIndexer: - def test_outer_join_indexer(self): - typemap = [ - ("int32", _join.outer_join_indexer), - ("int64", _join.outer_join_indexer), - ("float32", _join.outer_join_indexer), - ("float64", _join.outer_join_indexer), - ("object", _join.outer_join_indexer), - ] - - for dtype, indexer in typemap: - left = np.arange(3, dtype=dtype) - right = np.arange(2, 5, dtype=dtype) - empty = np.array([], dtype=dtype) - - result, lindexer, rindexer = indexer(left, right) - assert isinstance(result, np.ndarray) - assert isinstance(lindexer, np.ndarray) - assert isinstance(rindexer, np.ndarray) - tm.assert_numpy_array_equal(result, np.arange(5, dtype=dtype)) - exp = np.array([0, 1, 2, -1, -1], dtype=np.int64) - tm.assert_numpy_array_equal(lindexer, exp) - exp = np.array([-1, -1, 0, 1, 2], dtype=np.int64) - tm.assert_numpy_array_equal(rindexer, exp) - - result, lindexer, rindexer = indexer(empty, right) - tm.assert_numpy_array_equal(result, right) - exp = np.array([-1, -1, -1], dtype=np.int64) - tm.assert_numpy_array_equal(lindexer, exp) - exp = np.array([0, 1, 2], dtype=np.int64) - tm.assert_numpy_array_equal(rindexer, exp) - - result, lindexer, rindexer = indexer(left, empty) - tm.assert_numpy_array_equal(result, left) - exp = np.array([0, 1, 2], dtype=np.int64) - tm.assert_numpy_array_equal(lindexer, exp) - exp = np.array([-1, -1, -1], dtype=np.int64) - tm.assert_numpy_array_equal(rindexer, exp) + @pytest.mark.parametrize( + "dtype", ["int32", "int64", "float32", "float64", "object"] + ) + def test_outer_join_indexer(self, dtype): + indexer = _join.outer_join_indexer + + left = np.arange(3, dtype=dtype) + right = np.arange(2, 5, dtype=dtype) + empty = np.array([], dtype=dtype) + + result, lindexer, rindexer = indexer(left, right) + assert isinstance(result, np.ndarray) + assert isinstance(lindexer, np.ndarray) + assert isinstance(rindexer, np.ndarray) + tm.assert_numpy_array_equal(result, np.arange(5, dtype=dtype)) + exp = np.array([0, 1, 2, -1, -1], dtype=np.int64) + tm.assert_numpy_array_equal(lindexer, exp) + exp = np.array([-1, -1, 0, 1, 2], dtype=np.int64) + tm.assert_numpy_array_equal(rindexer, exp) + + result, lindexer, rindexer = indexer(empty, right) + tm.assert_numpy_array_equal(result, right) + exp = np.array([-1, -1, -1], dtype=np.int64) + tm.assert_numpy_array_equal(lindexer, exp) + exp = np.array([0, 1, 2], dtype=np.int64) + tm.assert_numpy_array_equal(rindexer, exp) + + result, lindexer, rindexer = indexer(left, empty) + tm.assert_numpy_array_equal(result, left) + exp = np.array([0, 1, 2], dtype=np.int64) + tm.assert_numpy_array_equal(lindexer, exp) + exp = np.array([-1, -1, -1], dtype=np.int64) + tm.assert_numpy_array_equal(rindexer, exp) def test_left_join_indexer_unique():