From 4b132cfd6b895727be03f7232ec563bfdaa3bc32 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 13 May 2018 19:34:37 -0400 Subject: [PATCH 01/20] ENH: add in extension dtype registry --- doc/source/whatsnew/v0.24.0.txt | 9 ++ pandas/core/algorithms.py | 4 +- pandas/core/arrays/base.py | 45 +++++++++- pandas/core/arrays/categorical.py | 4 + pandas/core/dtypes/base.py | 6 ++ pandas/core/dtypes/cast.py | 5 ++ pandas/core/dtypes/common.py | 38 ++------ pandas/core/dtypes/dtypes.py | 86 +++++++++++++++++++ pandas/core/internals.py | 21 +++-- pandas/core/series.py | 8 +- pandas/io/formats/format.py | 1 - pandas/tests/dtypes/test_dtypes.py | 23 ++++- pandas/tests/extension/base/__init__.py | 1 + pandas/tests/extension/base/constructors.py | 12 +++ pandas/tests/extension/base/methods.py | 3 +- pandas/tests/extension/base/missing.py | 5 ++ pandas/tests/extension/base/ops.py | 6 ++ pandas/tests/extension/base/reshaping.py | 9 ++ .../extension/category/test_categorical.py | 4 + pandas/tests/extension/decimal/array.py | 8 +- .../tests/extension/decimal/test_decimal.py | 14 ++- pandas/tests/extension/json/array.py | 7 +- pandas/tests/extension/json/test_json.py | 10 ++- 23 files changed, 274 insertions(+), 55 deletions(-) create mode 100644 pandas/tests/extension/base/ops.py diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 7baeadb967819..57cddc327c21a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -119,6 +119,15 @@ Datetimelike API Changes - For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with non-``None`` ``freq`` attribute, addition or subtraction of integer-dtyped array or ``Index`` will return an object of the same class (:issue:`19959`) - :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`) +.. _whatsnew_0240.api.extension: + +ExtensionType Changes +^^^^^^^^^^^^^^^^^^^^^ + +- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` and ``.append()``, and attribute ``array_type`` (:issue:`21185`) +- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instaniate a registered ``DecimalDtype`` (:issue:`21185`) +- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) + .. _whatsnew_0240.api.other: Other API Changes diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index dc726a736d34f..c937dcf0429d3 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -154,7 +154,7 @@ def _reconstruct_data(values, dtype, original): """ from pandas import Index if is_extension_array_dtype(dtype): - pass + values = dtype.array_type._from_sequence(values) elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype): values = Index(original)._shallow_copy(values, name=None) elif is_bool_dtype(dtype): @@ -705,7 +705,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, else: - if is_categorical_dtype(values) or is_sparse(values): + if is_extension_array_dtype(values) or is_sparse(values): # handle Categorical and sparse, result = Series(values)._values.value_counts(dropna=dropna) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 30949ca6d1d6b..6a9a90c9dd53c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -36,7 +36,9 @@ class ExtensionArray(object): * isna * take * copy + * append * _concat_same_type + * array_type An additional method is available to satisfy pandas' internal, private block API. @@ -49,6 +51,7 @@ class ExtensionArray(object): methods: * fillna + * dropna * unique * factorize / _values_for_factorize * argsort / _values_for_argsort @@ -82,7 +85,7 @@ class ExtensionArray(object): # Constructors # ------------------------------------------------------------------------ @classmethod - def _from_sequence(cls, scalars): + def _from_sequence(cls, scalars, copy=False): """Construct a new ExtensionArray from a sequence of scalars. Parameters @@ -90,6 +93,8 @@ def _from_sequence(cls, scalars): scalars : Sequence Each element will be an instance of the scalar type for this array, ``cls.dtype.type``. + copy : boolean, default True + if True, copy the underlying data Returns ------- ExtensionArray @@ -379,6 +384,16 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values + def dropna(self): + """ Return ExtensionArray without NA values + + Returns + ------- + valid : ExtensionArray + """ + + return self[~self.isna()] + def unique(self): """Compute the ExtensionArray of unique values. @@ -567,6 +582,34 @@ def copy(self, deep=False): """ raise AbstractMethodError(self) + def append(self, other): + """ + Append a collection of Arrays together + + Parameters + ---------- + other : ExtensionArray or list/tuple of ExtensionArrays + + Returns + ------- + appended : ExtensionArray + """ + + to_concat = [self] + cls = self.__class__ + + if isinstance(other, (list, tuple)): + to_concat = to_concat + list(other) + else: + to_concat.append(other) + + for obj in to_concat: + if not isinstance(obj, cls): + raise TypeError('all inputs must be of type {}'.format( + cls.__name__)) + + return cls._concat_same_type(to_concat) + # ------------------------------------------------------------------------ # Block-related methods # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0252b5b52ae94..c7069b266ef91 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2411,6 +2411,10 @@ def isin(self, values): return algorithms.isin(self.codes, code_values) +# inform the Dtype about us +CategoricalDtype.array_type = Categorical + + # The Series.cat accessor diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 49e98c16c716e..ba359c9ef4982 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -156,6 +156,12 @@ def name(self): """ raise AbstractMethodError(self) + @property + def array_type(self): + """Return the array type associated with this dtype + """ + raise AbstractMethodError(self) + @classmethod def construct_from_string(cls, string): """Attempt to construct this type from a string. diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 65328dfc7347e..2cd8144e43cea 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -648,6 +648,11 @@ def conv(r, dtype): def astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! """ + + # dispatch on extension dtype if needed + if is_extension_array_dtype(dtype): + return dtype.array_type._from_sequence(arr, copy=copy) + if not isinstance(dtype, np.dtype): dtype = pandas_dtype(dtype) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 05f82c67ddb8b..a794adfad271a 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -5,6 +5,7 @@ PY3, PY36) from pandas._libs import algos, lib from pandas._libs.tslibs import conversion + from pandas.core.dtypes.dtypes import ( CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, IntervalDtype, @@ -1977,38 +1978,13 @@ def pandas_dtype(dtype): np.dtype or a pandas dtype """ - if isinstance(dtype, DatetimeTZDtype): - return dtype - elif isinstance(dtype, PeriodDtype): - return dtype - elif isinstance(dtype, CategoricalDtype): - return dtype - elif isinstance(dtype, IntervalDtype): - return dtype - elif isinstance(dtype, string_types): - try: - return DatetimeTZDtype.construct_from_string(dtype) - except TypeError: - pass - - if dtype.startswith('period[') or dtype.startswith('Period['): - # do not parse string like U as period[U] - try: - return PeriodDtype.construct_from_string(dtype) - except TypeError: - pass - - elif dtype.startswith('interval') or dtype.startswith('Interval'): - try: - return IntervalDtype.construct_from_string(dtype) - except TypeError: - pass + # registered extension types + result = registry.find(dtype) + if result is not None: + return result - try: - return CategoricalDtype.construct_from_string(dtype) - except TypeError: - pass - elif isinstance(dtype, (PandasExtensionDtype, ExtensionDtype)): + # un-registered extension types + if isinstance(dtype, ExtensionDtype): return dtype try: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 1e762c2be92a6..102f4b99704a2 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2,12 +2,70 @@ import re import numpy as np +from collections import OrderedDict from pandas import compat from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex from .base import ExtensionDtype, _DtypeOpsMixin +class Registry(object): + """ Registry for dtype inference + + We can directly construct dtypes in pandas_dtypes if they are + a type; the registry allows us to register an extension dtype + to try inference from a string or a dtype class + + These are tried in order for inference. + """ + dtypes = OrderedDict() + + @classmethod + def register(self, dtype, constructor=None): + """ + Parameters + ---------- + dtype : PandasExtension Dtype + """ + if not issubclass(dtype, (PandasExtensionDtype, ExtensionDtype)): + raise ValueError("can only register pandas extension dtypes") + + if constructor is None: + constructor = dtype.construct_from_string + + self.dtypes[dtype] = constructor + + def find(self, dtype): + """ + Parameters + ---------- + dtype : PandasExtensionDtype or string + + Returns + ------- + return the first matching dtype, otherwise return None + """ + if not isinstance(dtype, compat.string_types): + dtype_type = dtype + if not isinstance(dtype, type): + dtype_type = type(dtype) + if issubclass(dtype_type, (PandasExtensionDtype, ExtensionDtype)): + return dtype + + return None + + for dtype_type, constructor in self.dtypes.items(): + try: + return constructor(dtype) + except TypeError: + pass + + return None + + +registry = Registry() + + class PandasExtensionDtype(_DtypeOpsMixin): """ A np.dtype duck-typed class, suitable for holding a custom dtype. @@ -568,6 +626,17 @@ def construct_from_string(cls, string): pass raise TypeError("could not construct PeriodDtype") + @classmethod + def construct_from_string_strict(cls, string): + """ + Strict construction from a string, raise a TypeError if not + possible + """ + if string.startswith('period[') or string.startswith('Period['): + # do not parse string like U as period[U] + return PeriodDtype.construct_from_string(string) + raise TypeError("could not construct PeriodDtype") + def __unicode__(self): return "period[{freq}]".format(freq=self.freq.freqstr) @@ -687,6 +756,16 @@ def construct_from_string(cls, string): msg = "a string needs to be passed, got type {typ}" raise TypeError(msg.format(typ=type(string))) + @classmethod + def construct_from_string_strict(cls, string): + """ + Strict construction from a string, raise a TypeError if not + possible + """ + if string.startswith('interval') or string.startswith('Interval'): + return IntervalDtype.construct_from_string(string) + raise TypeError("cannot construct IntervalDtype") + def __unicode__(self): if self.subtype is None: return "interval" @@ -727,3 +806,10 @@ def is_dtype(cls, dtype): else: return False return super(IntervalDtype, cls).is_dtype(dtype) + + +# register the dtypes in search order +registry.register(DatetimeTZDtype) +registry.register(PeriodDtype, PeriodDtype.construct_from_string_strict) +registry.register(IntervalDtype, IntervalDtype.construct_from_string_strict) +registry.register(CategoricalDtype) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index fe508dc1bb0bc..a5e9107b8a660 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -633,8 +633,9 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, return self.make_block(Categorical(self.values, dtype=dtype)) # astype processing - dtype = np.dtype(dtype) - if self.dtype == dtype: + if not is_extension_array_dtype(dtype): + dtype = np.dtype(dtype) + if is_dtype_equal(self.dtype, dtype): if copy: return self.copy() return self @@ -662,7 +663,13 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, # _astype_nansafe works fine with 1-d only values = astype_nansafe(values.ravel(), dtype, copy=True) - values = values.reshape(self.shape) + + # TODO(extension) + # should we make this attribute? + try: + values = values.reshape(self.shape) + except AttributeError: + pass newb = make_block(values, placement=self.mgr_locs, klass=klass) @@ -3170,6 +3177,10 @@ def get_block_type(values, dtype=None): cls = TimeDeltaBlock elif issubclass(vtype, np.complexfloating): cls = ComplexBlock + elif is_categorical(values): + cls = CategoricalBlock + elif is_extension_array_dtype(values): + cls = ExtensionBlock elif issubclass(vtype, np.datetime64): assert not is_datetimetz(values) cls = DatetimeBlock @@ -3179,10 +3190,6 @@ def get_block_type(values, dtype=None): cls = IntBlock elif dtype == np.bool_: cls = BoolBlock - elif is_categorical(values): - cls = CategoricalBlock - elif is_extension_array_dtype(values): - cls = ExtensionBlock else: cls = ObjectBlock return cls diff --git a/pandas/core/series.py b/pandas/core/series.py index cdb901d18767c..ff6fa52725f8c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4092,11 +4092,9 @@ def _try_cast(arr, take_fast_path): subarr = Categorical(arr, dtype.categories, ordered=dtype.ordered) elif is_extension_array_dtype(dtype): - # We don't allow casting to third party dtypes, since we don't - # know what array belongs to which type. - msg = ("Cannot cast data to extension dtype '{}'. " - "Pass the extension array directly.".format(dtype)) - raise ValueError(msg) + # create an extension array from its dtype + array_type = dtype.array_type + subarr = array_type(subarr, copy=copy) elif dtype is not None and raise_cast_failure: raise diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 12201f62946ac..adb4bf3f47572 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -514,7 +514,6 @@ def _to_str_columns(self): Render a DataFrame to a list of columns (as lists of strings). """ frame = self.tr_frame - # may include levels names also str_index = self._get_formatted_index(frame) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index eee53a2fcac6a..10d400ec2880d 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -10,7 +10,7 @@ from pandas.compat import string_types from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, PeriodDtype, - IntervalDtype, CategoricalDtype) + IntervalDtype, CategoricalDtype, registry) from pandas.core.dtypes.common import ( is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, @@ -767,3 +767,24 @@ def test_update_dtype_errors(self, bad_dtype): msg = 'a CategoricalDtype must be passed to perform an update, ' with tm.assert_raises_regex(ValueError, msg): dtype.update_dtype(bad_dtype) + + +@pytest.mark.parametrize( + 'dtype', + [DatetimeTZDtype, CategoricalDtype, + PeriodDtype, IntervalDtype]) +def test_registry(dtype): + assert dtype in registry.dtypes + + +@pytest.mark.parametrize( + 'dtype, expected', + [('int64', None), + ('interval', IntervalDtype()), + ('interval[int64]', IntervalDtype()), + ('category', CategoricalDtype()), + ('period[D]', PeriodDtype('D')), + ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern'))]) +def test_registry_find(dtype, expected): + + assert registry.find(dtype) == expected diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 9da985625c4ee..1f42de6737528 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -45,6 +45,7 @@ class TestMyDtype(BaseDtypeTests): from .dtype import BaseDtypeTests # noqa from .getitem import BaseGetitemTests # noqa from .groupby import BaseGroupbyTests # noqa +from .ops import BaseOpsTests # noqa from .interface import BaseInterfaceTests # noqa from .methods import BaseMethodsTests # noqa from .missing import BaseMissingTests # noqa diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 489a430bb4020..972ef7f37acca 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -1,5 +1,6 @@ import pytest +import numpy as np import pandas as pd import pandas.util.testing as tm from pandas.core.internals import ExtensionBlock @@ -45,3 +46,14 @@ def test_series_given_mismatched_index_raises(self, data): msg = 'Length of passed values is 3, index implies 5' with tm.assert_raises_regex(ValueError, msg): pd.Series(data[:3], index=[0, 1, 2, 3, 4]) + + def test_from_dtype(self, data): + # construct from our dtype & string dtype + dtype = data.dtype + + expected = pd.Series(data) + result = pd.Series(np.array(data), dtype=dtype) + self.assert_series_equal(result, expected) + + result = pd.Series(np.array(data), dtype=str(dtype)) + self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 23227867ee4d7..c660687f16590 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -19,7 +19,8 @@ def test_value_counts(self, all_data, dropna): other = all_data result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() - expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts( + dropna=dropna).sort_index() self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index af26d83df3fe2..43b2702c72193 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -23,6 +23,11 @@ def test_isna(self, data_missing): expected = pd.Series([], dtype=bool) self.assert_series_equal(result, expected) + def test_dropna_array(self, data_missing): + result = data_missing.dropna() + expected = data_missing[[1]] + self.assert_extension_array_equal(result, expected) + def test_dropna_series(self, data_missing): ser = pd.Series(data_missing) result = ser.dropna() diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py new file mode 100644 index 0000000000000..3742f342e4346 --- /dev/null +++ b/pandas/tests/extension/base/ops.py @@ -0,0 +1,6 @@ +from .base import BaseExtensionTests + + +class BaseOpsTests(BaseExtensionTests): + """Various Series and DataFrame ops methos.""" + pass diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index fe920a47ab740..ff739c97f2785 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -26,6 +26,14 @@ def test_concat(self, data, in_frame): assert dtype == data.dtype assert isinstance(result._data.blocks[0], ExtensionBlock) + def test_append(self, data): + + wrapped = pd.Series(data) + result = wrapped.append(wrapped) + expected = pd.concat([wrapped, wrapped]) + + self.assert_series_equal(result, expected) + @pytest.mark.parametrize('in_frame', [True, False]) def test_concat_all_na_block(self, data_missing, in_frame): valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) @@ -84,6 +92,7 @@ def test_concat_columns(self, data, na_value): expected = pd.DataFrame({ 'A': data._from_sequence(list(data[:3]) + [na_value]), 'B': [np.nan, 1, 2, 3]}) + result = pd.concat([df1, df2], axis=1) self.assert_frame_equal(result, expected) result = pd.concat([df1['A'], df2['B']], axis=1) diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 61fdb8454b542..9145d4a551fb4 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -65,6 +65,10 @@ class TestDtype(base.BaseDtypeTests): pass +class TestOps(base.BaseOpsTests): + pass + + class TestInterface(base.BaseInterfaceTests): @pytest.mark.skip(reason="Memory usage doesn't match") def test_memory_usage(self): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index cc6fadc483d5e..24bb97af739a7 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -27,10 +27,11 @@ def construct_from_string(cls, string): class DecimalArray(ExtensionArray): dtype = DecimalDtype() - def __init__(self, values): + def __init__(self, values, copy=False): for val in values: if not isinstance(val, self.dtype.type): raise TypeError + values = np.asarray(values, dtype=object) self._data = values @@ -42,7 +43,7 @@ def __init__(self, values): # self._values = self.values = self.data @classmethod - def _from_sequence(cls, scalars): + def _from_sequence(cls, scalars, copy=False): return cls(scalars) @classmethod @@ -103,5 +104,8 @@ def _concat_same_type(cls, to_concat): return cls(np.concatenate([x._data for x in to_concat])) +DecimalDtype.array_type = DecimalArray + + def make_data(): return [decimal.Decimal(random.random()) for _ in range(100)] diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index f74b4d7e94f11..68c36395a94f5 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -107,10 +107,18 @@ class TestInterface(BaseDecimal, base.BaseInterfaceTests): pass -class TestConstructors(BaseDecimal, base.BaseConstructorsTests): +class TestOps(BaseDecimal, base.BaseOpsTests): pass +class TestConstructors(BaseDecimal, base.BaseConstructorsTests): + + @pytest.mark.xfail(reason="not implemented constructor from dtype") + def test_from_dtype(self, data): + # construct from our dtype & string dtype + pass + + class TestReshaping(BaseDecimal, base.BaseReshapingTests): pass @@ -155,6 +163,10 @@ class TestGroupby(BaseDecimal, base.BaseGroupbyTests): pass +# TODO(extension) +@pytest.mark.xfail(reason=( + "raising AssertionError as this is not implemented, " + "though easy enough to do")) def test_series_constructor_coerce_data_to_extension_dtype_raises(): xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the " "extension array directly.") diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 10be7836cb8d7..97d579b362ee2 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -44,7 +44,7 @@ def construct_from_string(cls, string): class JSONArray(ExtensionArray): dtype = JSONDtype() - def __init__(self, values): + def __init__(self, values, copy=False): for val in values: if not isinstance(val, self.dtype.type): raise TypeError @@ -58,7 +58,7 @@ def __init__(self, values): # self._values = self.values = self.data @classmethod - def _from_sequence(cls, scalars): + def _from_sequence(cls, scalars, copy=False): return cls(scalars) @classmethod @@ -171,6 +171,9 @@ def _values_for_argsort(self): return np.array(frozen, dtype=object)[1:] +JSONDtype.array_type = JSONArray + + def make_data(): # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer return [collections.UserDict([ diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 85a282ae4007f..1b17da32b511c 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -129,10 +129,18 @@ def test_custom_asserts(self): self.assert_frame_equal(a.to_frame(), b.to_frame()) -class TestConstructors(BaseJSON, base.BaseConstructorsTests): +class TestOps(BaseJSON, base.BaseOpsTests): pass +class TestConstructors(BaseJSON, base.BaseConstructorsTests): + + @pytest.mark.xfail(reason="not implemented constructor from dtype") + def test_from_dtype(self, data): + # construct from our dtype & string dtype + pass + + class TestReshaping(BaseJSON, base.BaseReshapingTests): pass From 90fccb097c8911ef517078e67fbf4913fb650d95 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 24 May 2018 18:43:51 -0400 Subject: [PATCH 02/20] review comments --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/arrays/base.py | 1 - pandas/core/dtypes/base.py | 8 +++++ pandas/core/dtypes/dtypes.py | 56 ++++++++++-------------------- pandas/core/indexes/interval.py | 2 +- pandas/tests/extension/base/ops.py | 6 ---- 6 files changed, 29 insertions(+), 46 deletions(-) delete mode 100644 pandas/tests/extension/base/ops.py diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 57cddc327c21a..1714ec508285e 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -125,7 +125,7 @@ ExtensionType Changes ^^^^^^^^^^^^^^^^^^^^^ - ``ExtensionArray`` has gained the abstract methods ``.dropna()`` and ``.append()``, and attribute ``array_type`` (:issue:`21185`) -- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instaniate a registered ``DecimalDtype`` (:issue:`21185`) +- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype`` (:issue:`21185`) - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) .. _whatsnew_0240.api.other: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6a9a90c9dd53c..dd36b5d5cdf43 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -38,7 +38,6 @@ class ExtensionArray(object): * copy * append * _concat_same_type - * array_type An additional method is available to satisfy pandas' internal, private block API. diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index ba359c9ef4982..701863a2595aa 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -109,6 +109,12 @@ class ExtensionDtype(_DtypeOpsMixin): * name * construct_from_string + + Optionally one can assign an array_type for construction with the name + of this dtype via the Registry + + * array_type + The `na_value` class attribute can be used to set the default NA value for this type. :attr:`numpy.nan` is used by default. @@ -118,6 +124,8 @@ class ExtensionDtype(_DtypeOpsMixin): provided for registering virtual subclasses. """ + array_type = None + def __str__(self): return self.name diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 102f4b99704a2..4ab9795ab6863 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2,7 +2,6 @@ import re import numpy as np -from collections import OrderedDict from pandas import compat from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex @@ -18,22 +17,19 @@ class Registry(object): These are tried in order for inference. """ - dtypes = OrderedDict() + dtypes = [] @classmethod - def register(self, dtype, constructor=None): + def register(self, dtype): """ Parameters ---------- - dtype : PandasExtension Dtype + dtype : ExtensionDtype """ if not issubclass(dtype, (PandasExtensionDtype, ExtensionDtype)): raise ValueError("can only register pandas extension dtypes") - if constructor is None: - constructor = dtype.construct_from_string - - self.dtypes[dtype] = constructor + self.dtypes.append(dtype) def find(self, dtype): """ @@ -54,9 +50,9 @@ def find(self, dtype): return None - for dtype_type, constructor in self.dtypes.items(): + for dtype_type in self.dtypes: try: - return constructor(dtype) + return dtype_type.construct_from_string(dtype) except TypeError: pass @@ -614,11 +610,16 @@ def _parse_dtype_strict(cls, freq): @classmethod def construct_from_string(cls, string): """ - attempt to construct this type from a string, raise a TypeError - if its not possible + Strict construction from a string, raise a TypeError if not + possible """ from pandas.tseries.offsets import DateOffset - if isinstance(string, (compat.string_types, DateOffset)): + + if (isinstance(string, compat.string_types) and + (string.startswith('period[') or + string.startswith('Period[')) or + isinstance(string, DateOffset)): + # do not parse string like U as period[U] # avoid tuple to be regarded as freq try: return cls(freq=string) @@ -626,17 +627,6 @@ def construct_from_string(cls, string): pass raise TypeError("could not construct PeriodDtype") - @classmethod - def construct_from_string_strict(cls, string): - """ - Strict construction from a string, raise a TypeError if not - possible - """ - if string.startswith('period[') or string.startswith('Period['): - # do not parse string like U as period[U] - return PeriodDtype.construct_from_string(string) - raise TypeError("could not construct PeriodDtype") - def __unicode__(self): return "period[{freq}]".format(freq=self.freq.freqstr) @@ -751,21 +741,13 @@ def construct_from_string(cls, string): attempt to construct this type from a string, raise a TypeError if its not possible """ - if isinstance(string, compat.string_types): + if (isinstance(string, compat.string_types) and + (string.startswith('interval') or + string.startswith('Interval'))): return cls(string) msg = "a string needs to be passed, got type {typ}" raise TypeError(msg.format(typ=type(string))) - @classmethod - def construct_from_string_strict(cls, string): - """ - Strict construction from a string, raise a TypeError if not - possible - """ - if string.startswith('interval') or string.startswith('Interval'): - return IntervalDtype.construct_from_string(string) - raise TypeError("cannot construct IntervalDtype") - def __unicode__(self): if self.subtype is None: return "interval" @@ -810,6 +792,6 @@ def is_dtype(cls, dtype): # register the dtypes in search order registry.register(DatetimeTZDtype) -registry.register(PeriodDtype, PeriodDtype.construct_from_string_strict) -registry.register(IntervalDtype, IntervalDtype.construct_from_string_strict) +registry.register(PeriodDtype) +registry.register(IntervalDtype) registry.register(CategoricalDtype) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 80619c7beb28c..44c56eece4d1e 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -796,7 +796,7 @@ def astype(self, dtype, copy=True): @cache_readonly def dtype(self): """Return the dtype object of the underlying data""" - return IntervalDtype.construct_from_string(str(self.left.dtype)) + return IntervalDtype(str(self.left.dtype)) @property def inferred_type(self): diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py deleted file mode 100644 index 3742f342e4346..0000000000000 --- a/pandas/tests/extension/base/ops.py +++ /dev/null @@ -1,6 +0,0 @@ -from .base import BaseExtensionTests - - -class BaseOpsTests(BaseExtensionTests): - """Various Series and DataFrame ops methos.""" - pass From 80e15fdcf918e35a2d834612b06ee68bab90d70c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 25 May 2018 19:26:08 -0400 Subject: [PATCH 03/20] remove ops --- pandas/tests/extension/base/__init__.py | 1 - pandas/tests/extension/category/test_categorical.py | 4 ---- pandas/tests/extension/decimal/test_decimal.py | 4 ---- pandas/tests/extension/json/test_json.py | 4 ---- 4 files changed, 13 deletions(-) diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 1f42de6737528..9da985625c4ee 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -45,7 +45,6 @@ class TestMyDtype(BaseDtypeTests): from .dtype import BaseDtypeTests # noqa from .getitem import BaseGetitemTests # noqa from .groupby import BaseGroupbyTests # noqa -from .ops import BaseOpsTests # noqa from .interface import BaseInterfaceTests # noqa from .methods import BaseMethodsTests # noqa from .missing import BaseMissingTests # noqa diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 9145d4a551fb4..61fdb8454b542 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -65,10 +65,6 @@ class TestDtype(base.BaseDtypeTests): pass -class TestOps(base.BaseOpsTests): - pass - - class TestInterface(base.BaseInterfaceTests): @pytest.mark.skip(reason="Memory usage doesn't match") def test_memory_usage(self): diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 68c36395a94f5..3ab728f7dc1aa 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -107,10 +107,6 @@ class TestInterface(BaseDecimal, base.BaseInterfaceTests): pass -class TestOps(BaseDecimal, base.BaseOpsTests): - pass - - class TestConstructors(BaseDecimal, base.BaseConstructorsTests): @pytest.mark.xfail(reason="not implemented constructor from dtype") diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 1b17da32b511c..e6880ca0d160c 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -129,10 +129,6 @@ def test_custom_asserts(self): self.assert_frame_equal(a.to_frame(), b.to_frame()) -class TestOps(BaseJSON, base.BaseOpsTests): - pass - - class TestConstructors(BaseJSON, base.BaseConstructorsTests): @pytest.mark.xfail(reason="not implemented constructor from dtype") From 1c3d02326a5b9fd4e964b7b02faf50bdd2c42c06 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 25 May 2018 19:44:14 -0400 Subject: [PATCH 04/20] move array_type -> construct_array_type --- doc/source/whatsnew/v0.24.0.txt | 5 +++-- pandas/core/algorithms.py | 2 +- pandas/core/arrays/categorical.py | 4 ---- pandas/core/dtypes/base.py | 23 ++++++++++++++--------- pandas/core/dtypes/dtypes.py | 15 +++++++++++++++ pandas/core/series.py | 2 +- pandas/tests/extension/decimal/array.py | 17 ++++++++++++++--- pandas/tests/extension/json/array.py | 17 ++++++++++++++--- 8 files changed, 62 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1714ec508285e..909543a100054 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -124,8 +124,9 @@ Datetimelike API Changes ExtensionType Changes ^^^^^^^^^^^^^^^^^^^^^ -- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` and ``.append()``, and attribute ``array_type`` (:issue:`21185`) -- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype`` (:issue:`21185`) +- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` and ``.append()`` (:issue:`21185`) +- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore + the dtype has gained the ``construct_array_type`` (:issue:`21185`) - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) .. _whatsnew_0240.api.other: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c937dcf0429d3..ff12405704006 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -154,7 +154,7 @@ def _reconstruct_data(values, dtype, original): """ from pandas import Index if is_extension_array_dtype(dtype): - values = dtype.array_type._from_sequence(values) + values = dtype.construct_array_type(values)._from_sequence(values) elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype): values = Index(original)._shallow_copy(values, name=None) elif is_bool_dtype(dtype): diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c7069b266ef91..0252b5b52ae94 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2411,10 +2411,6 @@ def isin(self, values): return algorithms.isin(self.codes, code_values) -# inform the Dtype about us -CategoricalDtype.array_type = Categorical - - # The Series.cat accessor diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 701863a2595aa..f09151f88c2c9 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -109,11 +109,10 @@ class ExtensionDtype(_DtypeOpsMixin): * name * construct_from_string + Optionally one can override construct_array_type for construction + with the name of this dtype via the Registry - Optionally one can assign an array_type for construction with the name - of this dtype via the Registry - - * array_type + * construct_array_type The `na_value` class attribute can be used to set the default NA value for this type. :attr:`numpy.nan` is used by default. @@ -124,8 +123,6 @@ class ExtensionDtype(_DtypeOpsMixin): provided for registering virtual subclasses. """ - array_type = None - def __str__(self): return self.name @@ -164,11 +161,19 @@ def name(self): """ raise AbstractMethodError(self) - @property - def array_type(self): + @classmethod + def construct_array_type(cls, array): """Return the array type associated with this dtype + + Parameters + ---------- + string : str + + Returns + ------- + type """ - raise AbstractMethodError(self) + return type(array) @classmethod def construct_from_string(cls, string): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 4ab9795ab6863..76d57f80f1a6c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -319,6 +319,21 @@ def _hash_categories(categories, ordered=True): else: return np.bitwise_xor.reduce(hashed) + @classmethod + def construct_array_type(cls, array): + """Return the array type associated with this dtype + + Parameters + ---------- + string : str + + Returns + ------- + type + """ + from pandas import Categorical + return Categorical + @classmethod def construct_from_string(cls, string): """ attempt to construct this type from a string, raise a TypeError if diff --git a/pandas/core/series.py b/pandas/core/series.py index ff6fa52725f8c..fc4669bf83e6f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4093,7 +4093,7 @@ def _try_cast(arr, take_fast_path): ordered=dtype.ordered) elif is_extension_array_dtype(dtype): # create an extension array from its dtype - array_type = dtype.array_type + array_type = dtype.construct_array_type(subarr) subarr = array_type(subarr, copy=copy) elif dtype is not None and raise_cast_failure: diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 24bb97af739a7..39e5a75913f12 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -15,6 +15,20 @@ class DecimalDtype(ExtensionDtype): name = 'decimal' na_value = decimal.Decimal('NaN') + @classmethod + def construct_array_type(cls, array): + """Return the array type associated with this dtype + + Parameters + ---------- + string : str + + Returns + ------- + type + """ + return DecimalArray + @classmethod def construct_from_string(cls, string): if string == cls.name: @@ -104,8 +118,5 @@ def _concat_same_type(cls, to_concat): return cls(np.concatenate([x._data for x in to_concat])) -DecimalDtype.array_type = DecimalArray - - def make_data(): return [decimal.Decimal(random.random()) for _ in range(100)] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 97d579b362ee2..9dee52c043750 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -32,6 +32,20 @@ class JSONDtype(ExtensionDtype): # source compatibility with Py2. na_value = {} + @classmethod + def construct_array_type(cls, array): + """Return the array type associated with this dtype + + Parameters + ---------- + string : str + + Returns + ------- + type + """ + return JSONArray + @classmethod def construct_from_string(cls, string): if string == cls.name: @@ -171,9 +185,6 @@ def _values_for_argsort(self): return np.array(frozen, dtype=object)[1:] -JSONDtype.array_type = JSONArray - - def make_data(): # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer return [collections.UserDict([ From 4b05a1c468aa3dfa174e413f3a5a6f7b2f74f089 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 28 May 2018 16:29:53 -0400 Subject: [PATCH 05/20] interval dtypes --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/dtypes/dtypes.py | 3 ++- pandas/tests/dtypes/test_dtypes.py | 22 ++++++++++++++-------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 909543a100054..0efe023b205d8 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -161,6 +161,7 @@ Current Behavior: OverflowError: Trying to coerce negative values to unsigned integers - :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`) +- Invalid consruction of ``IntervalDtype`` will now always raise a ``TypeError`` rather than a ``ValueError`` if the subdtype is invalid (:issue:`21185`) - - diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 76d57f80f1a6c..e2f03fe153809 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -734,7 +734,7 @@ def __new__(cls, subtype=None): try: subtype = pandas_dtype(subtype) except TypeError: - raise ValueError("could not construct IntervalDtype") + raise TypeError("could not construct IntervalDtype") if is_categorical_dtype(subtype) or is_string_dtype(subtype): # GH 19016 @@ -760,6 +760,7 @@ def construct_from_string(cls, string): (string.startswith('interval') or string.startswith('Interval'))): return cls(string) + msg = "a string needs to be passed, got type {typ}" raise TypeError(msg.format(typ=type(string))) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 10d400ec2880d..e969e30cfa4d2 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -7,7 +7,6 @@ from pandas import ( Series, Categorical, CategoricalIndex, IntervalIndex, date_range) -from pandas.compat import string_types from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, PeriodDtype, IntervalDtype, CategoricalDtype, registry) @@ -448,7 +447,7 @@ def test_construction_not_supported(self, subtype): def test_construction_errors(self): msg = 'could not construct IntervalDtype' - with tm.assert_raises_regex(ValueError, msg): + with tm.assert_raises_regex(TypeError, msg): IntervalDtype('xx') def test_construction_from_string(self): @@ -458,14 +457,21 @@ def test_construction_from_string(self): assert is_dtype_equal(self.dtype, result) @pytest.mark.parametrize('string', [ - 'foo', 'interval[foo]', 'foo[int64]', 0, 3.14, ('a', 'b'), None]) + 'foo', 'foo[int64]', 0, 3.14, ('a', 'b'), None]) def test_construction_from_string_errors(self, string): - if isinstance(string, string_types): - error, msg = ValueError, 'could not construct IntervalDtype' - else: - error, msg = TypeError, 'a string needs to be passed, got type' + # these are invalid entirely + msg = 'a string needs to be passed, got type' + + with tm.assert_raises_regex(TypeError, msg): + IntervalDtype.construct_from_string(string) - with tm.assert_raises_regex(error, msg): + @pytest.mark.parametrize('string', [ + 'interval[foo]']) + def test_construction_from_string_error_subtype(self, string): + # this is an invalid subtype + msg = 'could not construct IntervalDtype' + + with tm.assert_raises_regex(TypeError, msg): IntervalDtype.construct_from_string(string) def test_subclass(self): From 9cfbc07d52df9eccb9ad0b6aedc8f0a760994f55 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 28 May 2018 16:32:07 -0400 Subject: [PATCH 06/20] doc in construct_array_type --- pandas/core/dtypes/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e2f03fe153809..1c8e169d3baa5 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -325,7 +325,7 @@ def construct_array_type(cls, array): Parameters ---------- - string : str + array : value array Returns ------- From 72e10101ca5a4414b121a3b8f5d7d533443d508d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 28 May 2018 20:11:02 -0400 Subject: [PATCH 07/20] add repr tests --- pandas/tests/extension/base/interface.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 8ef8debbdc666..69de0e1900831 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -40,6 +40,16 @@ def test_repr(self, data): df = pd.DataFrame({"A": data}) repr(df) + def test_repr_array(self, data): + # some arrays may be able to assert + # attributes in the repr + repr(data) + + def test_repr_array_long(self, data): + # some arrays may be able to assert a ... in the repr + with pd.option_context('display.max_seq_items', 1): + repr(data) + def test_dtype_name_in_info(self, data): buf = StringIO() pd.DataFrame({"A": data}).info(buf=buf) From 59b35106fc432e6b84eb64cccf904c9189991e50 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 29 May 2018 06:34:04 -0400 Subject: [PATCH 08/20] make construct_array_type arg optional --- pandas/core/dtypes/base.py | 8 +++++--- pandas/core/dtypes/dtypes.py | 4 ++-- pandas/tests/extension/base/dtype.py | 8 ++++++++ pandas/tests/extension/category/test_categorical.py | 4 +++- pandas/tests/extension/decimal/array.py | 4 ++-- pandas/tests/extension/decimal/test_decimal.py | 4 +++- pandas/tests/extension/json/array.py | 4 ++-- pandas/tests/extension/json/test_json.py | 4 +++- 8 files changed, 28 insertions(+), 12 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index f09151f88c2c9..c0c9a8d22ce4f 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -162,18 +162,20 @@ def name(self): raise AbstractMethodError(self) @classmethod - def construct_array_type(cls, array): + def construct_array_type(cls, array=None): """Return the array type associated with this dtype Parameters ---------- - string : str + array : array-like, optional Returns ------- type """ - return type(array) + if array is None: + return cls + raise NotImplementedError @classmethod def construct_from_string(cls, string): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 1c8e169d3baa5..108e1849798e0 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -320,12 +320,12 @@ def _hash_categories(categories, ordered=True): return np.bitwise_xor.reduce(hashed) @classmethod - def construct_array_type(cls, array): + def construct_array_type(cls, array=None): """Return the array type associated with this dtype Parameters ---------- - array : value array + array : array-like, optional Returns ------- diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index 63d3d807c270c..52a12816c8722 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -1,3 +1,4 @@ +import pytest import numpy as np import pandas as pd @@ -46,3 +47,10 @@ def test_eq_with_str(self, dtype): def test_eq_with_numpy_object(self, dtype): assert dtype != np.dtype('object') + + def test_array_type(self, data, dtype): + assert dtype.construct_array_type() is type(data) + + def test_array_type_with_arg(self, data, dtype): + with pytest.raises(NotImplementedError): + dtype.construct_array_type('foo') diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 61fdb8454b542..d1dd9fe8a893d 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -62,7 +62,9 @@ def data_for_grouping(): class TestDtype(base.BaseDtypeTests): - pass + + def test_array_type_with_arg(self, data, dtype): + assert dtype.construct_array_type('foo') is Categorical class TestInterface(base.BaseInterfaceTests): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 39e5a75913f12..4ccc2f8aec711 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -16,12 +16,12 @@ class DecimalDtype(ExtensionDtype): na_value = decimal.Decimal('NaN') @classmethod - def construct_array_type(cls, array): + def construct_array_type(cls, array=None): """Return the array type associated with this dtype Parameters ---------- - string : str + array : array-like, optional Returns ------- diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 3ab728f7dc1aa..691ce958dec8c 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -100,7 +100,9 @@ def assert_frame_equal(self, left, right, *args, **kwargs): class TestDtype(BaseDecimal, base.BaseDtypeTests): - pass + + def test_array_type_with_arg(self, data, dtype): + assert dtype.construct_array_type('foo') is DecimalArray class TestInterface(BaseDecimal, base.BaseInterfaceTests): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 9dee52c043750..f5d7d58277cc5 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -33,12 +33,12 @@ class JSONDtype(ExtensionDtype): na_value = {} @classmethod - def construct_array_type(cls, array): + def construct_array_type(cls, array=None): """Return the array type associated with this dtype Parameters ---------- - string : str + array : array-like, optional Returns ------- diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index e6880ca0d160c..1d921d8aa611c 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -107,7 +107,9 @@ def assert_frame_equal(self, left, right, *args, **kwargs): class TestDtype(BaseJSON, base.BaseDtypeTests): - pass + + def test_array_type_with_arg(self, data, dtype): + assert dtype.construct_array_type('foo') is JSONArray class TestInterface(BaseJSON, base.BaseInterfaceTests): From 07d10382a71e63b1f4d183936ce7211a45b922d4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 3 Jun 2018 18:34:22 -0400 Subject: [PATCH 09/20] small doc enhancements --- doc/source/extending.rst | 18 ++++++++++++++---- pandas/core/dtypes/dtypes.py | 4 ++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/doc/source/extending.rst b/doc/source/extending.rst index 8018d35770924..ad5b08afe972a 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -91,6 +91,15 @@ extension array for IP Address data, this might be ``ipaddress.IPv4Address``. See the `extension dtype source`_ for interface definition. +.. versionadded:: 0.24.0 + +:class:`pandas.api.extension.ExtensionDtype` can be registered to pandas to allow creation via a string dtype name. +This allows one to instantiate ``Series`` and ``.astype()`` with a registered string name, for +example ``'category'`` is a registered string accessor for the ``CategoricalDtype``. + +See the `extension dtype dtypes`_ for more on how to register dtypes. + + :class:`~pandas.api.extension.ExtensionArray` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -131,6 +140,7 @@ To use a test, subclass it: See https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/base/__init__.py for a list of all the tests available. +.. _extension dtype dtypes: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/dtypes.py .. _extension dtype source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/base.py .. _extension array source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/arrays/base.py @@ -174,11 +184,11 @@ There are 3 constructor properties to be defined: Following table shows how ``pandas`` data structures define constructor properties by default. =========================== ======================= ============= -Property Attributes ``Series`` ``DataFrame`` +Property Attributes ``Series`` ``DataFrame`` =========================== ======================= ============= -``_constructor`` ``Series`` ``DataFrame`` -``_constructor_sliced`` ``NotImplementedError`` ``Series`` -``_constructor_expanddim`` ``DataFrame`` ``Panel`` +``_constructor`` ``Series`` ``DataFrame`` +``_constructor_sliced`` ``NotImplementedError`` ``Series`` +``_constructor_expanddim`` ``DataFrame`` ``Panel`` =========================== ======================= ============= Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 108e1849798e0..7c5560e7ba342 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -16,6 +16,10 @@ class Registry(object): to try inference from a string or a dtype class These are tried in order for inference. + + Examples + -------- + registry.register(MyExtensionDtype) """ dtypes = [] From 7dbd2f356c2937e53b41e3e08898c18756837d43 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 19 Jun 2018 08:19:23 -0400 Subject: [PATCH 10/20] review comments --- doc/source/whatsnew/v0.24.0.txt | 9 +++------ pandas/core/arrays/base.py | 2 +- pandas/core/dtypes/base.py | 2 -- pandas/core/dtypes/dtypes.py | 11 ++++++----- pandas/tests/dtypes/test_dtypes.py | 1 + pandas/tests/extension/base/constructors.py | 4 ++-- 6 files changed, 13 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 0efe023b205d8..ad168dd11e389 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -128,6 +128,9 @@ ExtensionType Changes - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore the dtype has gained the ``construct_array_type`` (:issue:`21185`) - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) +- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) +- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) +- .. _whatsnew_0240.api.other: @@ -324,12 +327,6 @@ Reshaping - - -ExtensionArray -^^^^^^^^^^^^^^ - -- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) -- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) -- - Other diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index dd36b5d5cdf43..1af499b99e079 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -92,7 +92,7 @@ def _from_sequence(cls, scalars, copy=False): scalars : Sequence Each element will be an instance of the scalar type for this array, ``cls.dtype.type``. - copy : boolean, default True + copy : boolean, default False if True, copy the underlying data Returns ------- diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index c0c9a8d22ce4f..6689e9995a571 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -173,8 +173,6 @@ def construct_array_type(cls, array=None): ------- type """ - if array is None: - return cls raise NotImplementedError @classmethod diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 7c5560e7ba342..7692f9708fd08 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -9,13 +9,14 @@ class Registry(object): - """ Registry for dtype inference + """ + Registry for dtype inference - We can directly construct dtypes in pandas_dtypes if they are - a type; the registry allows us to register an extension dtype - to try inference from a string or a dtype class + The registry allows one to map a string repr of a extension + dtype to an extenstion dtype. - These are tried in order for inference. + Multiple extension types can be registered. + These are tried in order. Examples -------- diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index e969e30cfa4d2..62e0f1cb717f0 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -788,6 +788,7 @@ def test_registry(dtype): [('int64', None), ('interval', IntervalDtype()), ('interval[int64]', IntervalDtype()), + ('interval[datetime64[ns]]', IntervalDtype('datetime64[ns]')), ('category', CategoricalDtype()), ('period[D]', PeriodDtype('D')), ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern'))]) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 972ef7f37acca..41866be0e5511 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -52,8 +52,8 @@ def test_from_dtype(self, data): dtype = data.dtype expected = pd.Series(data) - result = pd.Series(np.array(data), dtype=dtype) + result = pd.Series(list(data), dtype=dtype) self.assert_series_equal(result, expected) - result = pd.Series(np.array(data), dtype=str(dtype)) + result = pd.Series(list(data), dtype=str(dtype)) self.assert_series_equal(result, expected) From 7a2cb6aee8803c8163689d5e5bddd0c0f69915bc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 19 Jun 2018 08:24:30 -0400 Subject: [PATCH 11/20] remove arg from construct_array_type as no longer needed --- pandas/core/algorithms.py | 2 +- pandas/core/dtypes/dtypes.py | 6 +----- pandas/core/series.py | 2 +- pandas/tests/extension/category/test_categorical.py | 2 +- pandas/tests/extension/decimal/array.py | 6 +----- pandas/tests/extension/decimal/test_decimal.py | 2 +- pandas/tests/extension/json/array.py | 6 +----- pandas/tests/extension/json/test_json.py | 2 +- 8 files changed, 8 insertions(+), 20 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ff12405704006..c4e4f5471c4be 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -154,7 +154,7 @@ def _reconstruct_data(values, dtype, original): """ from pandas import Index if is_extension_array_dtype(dtype): - values = dtype.construct_array_type(values)._from_sequence(values) + values = dtype.construct_array_type()._from_sequence(values) elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype): values = Index(original)._shallow_copy(values, name=None) elif is_bool_dtype(dtype): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 7692f9708fd08..de837efc235a0 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -325,13 +325,9 @@ def _hash_categories(categories, ordered=True): return np.bitwise_xor.reduce(hashed) @classmethod - def construct_array_type(cls, array=None): + def construct_array_type(cls): """Return the array type associated with this dtype - Parameters - ---------- - array : array-like, optional - Returns ------- type diff --git a/pandas/core/series.py b/pandas/core/series.py index fc4669bf83e6f..af3906a3e5c45 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4093,7 +4093,7 @@ def _try_cast(arr, take_fast_path): ordered=dtype.ordered) elif is_extension_array_dtype(dtype): # create an extension array from its dtype - array_type = dtype.construct_array_type(subarr) + array_type = dtype.construct_array_type() subarr = array_type(subarr, copy=copy) elif dtype is not None and raise_cast_failure: diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index d1dd9fe8a893d..fe45b514de4e8 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -64,7 +64,7 @@ def data_for_grouping(): class TestDtype(base.BaseDtypeTests): def test_array_type_with_arg(self, data, dtype): - assert dtype.construct_array_type('foo') is Categorical + assert dtype.construct_array_type() is Categorical class TestInterface(base.BaseInterfaceTests): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 4ccc2f8aec711..c3b2b3559f2ec 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -16,13 +16,9 @@ class DecimalDtype(ExtensionDtype): na_value = decimal.Decimal('NaN') @classmethod - def construct_array_type(cls, array=None): + def construct_array_type(cls): """Return the array type associated with this dtype - Parameters - ---------- - array : array-like, optional - Returns ------- type diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 691ce958dec8c..8a85a629310f7 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -102,7 +102,7 @@ def assert_frame_equal(self, left, right, *args, **kwargs): class TestDtype(BaseDecimal, base.BaseDtypeTests): def test_array_type_with_arg(self, data, dtype): - assert dtype.construct_array_type('foo') is DecimalArray + assert dtype.construct_array_type() is DecimalArray class TestInterface(BaseDecimal, base.BaseInterfaceTests): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index f5d7d58277cc5..a81413c46b5eb 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -33,13 +33,9 @@ class JSONDtype(ExtensionDtype): na_value = {} @classmethod - def construct_array_type(cls, array=None): + def construct_array_type(cls): """Return the array type associated with this dtype - Parameters - ---------- - array : array-like, optional - Returns ------- type diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 1d921d8aa611c..a5fc4afa657ee 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -109,7 +109,7 @@ def assert_frame_equal(self, left, right, *args, **kwargs): class TestDtype(BaseJSON, base.BaseDtypeTests): def test_array_type_with_arg(self, data, dtype): - assert dtype.construct_array_type('foo') is JSONArray + assert dtype.construct_array_type() is JSONArray class TestInterface(BaseJSON, base.BaseInterfaceTests): From 96d5d09df5592d74751a12783d7a9cfb402f0bc1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 19 Jun 2018 08:27:57 -0400 Subject: [PATCH 12/20] doc comments --- doc/source/whatsnew/v0.24.0.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ad168dd11e389..d451a772d116f 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -126,7 +126,7 @@ ExtensionType Changes - ``ExtensionArray`` has gained the abstract methods ``.dropna()`` and ``.append()`` (:issue:`21185`) - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore - the dtype has gained the ``construct_array_type`` (:issue:`21185`) + the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`) - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) @@ -164,7 +164,7 @@ Current Behavior: OverflowError: Trying to coerce negative values to unsigned integers - :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`) -- Invalid consruction of ``IntervalDtype`` will now always raise a ``TypeError`` rather than a ``ValueError`` if the subdtype is invalid (:issue:`21185`) +- Invalid construction of ``IntervalDtype`` will now always raise a ``TypeError`` rather than a ``ValueError`` if the subdtype is invalid (:issue:`21185`) - - From 51196726150c4b2abb8612d343dcc761eb202bbf Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 19 Jun 2018 08:35:18 -0400 Subject: [PATCH 13/20] lint --- pandas/tests/extension/base/constructors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 41866be0e5511..fdd2b99d9b3c7 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -1,6 +1,5 @@ import pytest -import numpy as np import pandas as pd import pandas.util.testing as tm from pandas.core.internals import ExtensionBlock From e22d4c744b32b459117b9e01631b4e247c279e96 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 19 Jun 2018 08:37:42 -0400 Subject: [PATCH 14/20] remove doc note --- doc/source/whatsnew/v0.24.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index d451a772d116f..c15ee7913f3fe 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -124,7 +124,6 @@ Datetimelike API Changes ExtensionType Changes ^^^^^^^^^^^^^^^^^^^^^ -- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` and ``.append()`` (:issue:`21185`) - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`) - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) From 7cc1a0aa6c43990391d994b9d449a6121086f02a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 20 Jun 2018 19:48:32 -0400 Subject: [PATCH 15/20] remove append --- pandas/core/arrays/base.py | 29 ------------------------ pandas/tests/extension/base/reshaping.py | 8 ------- 2 files changed, 37 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1af499b99e079..43fe904508611 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -36,7 +36,6 @@ class ExtensionArray(object): * isna * take * copy - * append * _concat_same_type An additional method is available to satisfy pandas' internal, @@ -581,34 +580,6 @@ def copy(self, deep=False): """ raise AbstractMethodError(self) - def append(self, other): - """ - Append a collection of Arrays together - - Parameters - ---------- - other : ExtensionArray or list/tuple of ExtensionArrays - - Returns - ------- - appended : ExtensionArray - """ - - to_concat = [self] - cls = self.__class__ - - if isinstance(other, (list, tuple)): - to_concat = to_concat + list(other) - else: - to_concat.append(other) - - for obj in to_concat: - if not isinstance(obj, cls): - raise TypeError('all inputs must be of type {}'.format( - cls.__name__)) - - return cls._concat_same_type(to_concat) - # ------------------------------------------------------------------------ # Block-related methods # ------------------------------------------------------------------------ diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index ff739c97f2785..c83726c5278a5 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -26,14 +26,6 @@ def test_concat(self, data, in_frame): assert dtype == data.dtype assert isinstance(result._data.blocks[0], ExtensionBlock) - def test_append(self, data): - - wrapped = pd.Series(data) - result = wrapped.append(wrapped) - expected = pd.concat([wrapped, wrapped]) - - self.assert_series_equal(result, expected) - @pytest.mark.parametrize('in_frame', [True, False]) def test_concat_all_na_block(self, data_missing, in_frame): valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) From 930cec56c53c54a2f3e26610c4af4ce4e2609378 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 20 Jun 2018 19:57:19 -0400 Subject: [PATCH 16/20] remove dropna text --- pandas/core/arrays/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 43fe904508611..32033010f1931 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -49,7 +49,6 @@ class ExtensionArray(object): methods: * fillna - * dropna * unique * factorize / _values_for_factorize * argsort / _values_for_argsort From bd5dcd3ffbbab5e87673d7ad86d0969c04fd252a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 20 Jun 2018 19:58:55 -0400 Subject: [PATCH 17/20] Revert "remove dropna text" This reverts commit 6d5c67b7baf14fa5ead713b2d1b172c7f7e0de4c. --- pandas/core/arrays/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 32033010f1931..43fe904508611 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -49,6 +49,7 @@ class ExtensionArray(object): methods: * fillna + * dropna * unique * factorize / _values_for_factorize * argsort / _values_for_argsort From 407d7b33ad6085fcc6240cd30cfbfd57453425c2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 20 Jun 2018 20:01:00 -0400 Subject: [PATCH 18/20] remove arg from construct_array_type in base class --- pandas/core/dtypes/base.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 6689e9995a571..5f405e0d10657 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -162,13 +162,9 @@ def name(self): raise AbstractMethodError(self) @classmethod - def construct_array_type(cls, array=None): + def construct_array_type(cls): """Return the array type associated with this dtype - Parameters - ---------- - array : array-like, optional - Returns ------- type From 908490653697236057079b07ee284210d41f0f28 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 20 Jun 2018 20:05:40 -0400 Subject: [PATCH 19/20] use simpler dtype for intervals --- pandas/core/indexes/interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 44c56eece4d1e..2fd4e099777bf 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -796,7 +796,7 @@ def astype(self, dtype, copy=True): @cache_readonly def dtype(self): """Return the dtype object of the underlying data""" - return IntervalDtype(str(self.left.dtype)) + return IntervalDtype(self.left.dtype.name) @property def inferred_type(self): From f560ea177e73acf9a94a2dd0aa2b5c10c6f48767 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 25 Jun 2018 06:28:54 -0400 Subject: [PATCH 20/20] rebase --- pandas/core/dtypes/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index a794adfad271a..ef4f36dc6df33 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -7,9 +7,9 @@ from pandas._libs.tslibs import conversion from pandas.core.dtypes.dtypes import ( - CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, + registry, CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, IntervalDtype, - IntervalDtypeType, ExtensionDtype, PandasExtensionDtype) + IntervalDtypeType, ExtensionDtype) from pandas.core.dtypes.generic import ( ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass,