From c25e2a4950953d69e789a8094c17ac147fdf686a Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 30 May 2019 13:20:04 +0200 Subject: [PATCH 1/4] Use _range instead of _start etc. in RangeIndex --- pandas/core/dtypes/concat.py | 21 +-- pandas/core/frame.py | 10 +- pandas/core/indexes/range.py | 225 +++++++++++++---------------- pandas/core/series.py | 6 +- pandas/io/packers.py | 7 +- pandas/tests/indexes/test_range.py | 8 +- 6 files changed, 126 insertions(+), 151 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index b22ed45642cf6..e2c6fba322be0 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -541,36 +541,37 @@ def _concat_rangeindex_same_dtype(indexes): """ from pandas import Int64Index, RangeIndex - start = step = next = None + start = step = next_ = None # Filter the empty indexes non_empty_indexes = [obj for obj in indexes if len(obj)] for obj in non_empty_indexes: + rng = obj._range # type: range if start is None: # This is set by the first non-empty index - start = obj._start - if step is None and len(obj) > 1: - step = obj._step + start = rng.start + if step is None and len(rng) > 1: + step = rng.step elif step is None: # First non-empty index had only one element - if obj._start == start: + if rng.start == start: return _concat_index_same_dtype(indexes, klass=Int64Index) - step = obj._start - start + step = rng.start - start - non_consecutive = ((step != obj._step and len(obj) > 1) or - (next is not None and obj._start != next)) + non_consecutive = ((step != rng.step and len(rng) > 1) or + (next_ is not None and rng.start != next_)) if non_consecutive: return _concat_index_same_dtype(indexes, klass=Int64Index) if step is not None: - next = obj[-1] + step + next_ = rng[-1] + step if non_empty_indexes: # Get the stop value from "next" or alternatively # from the last non-empty index - stop = non_empty_indexes[-1]._stop if next is None else next + stop = non_empty_indexes[-1].stop if next_ is None else next_ return RangeIndex(start, stop, step) # Here all "indexes" had 0 length, i.e. were empty. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5957b23535350..83fa26081f21c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2282,7 +2282,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, text_col 5 non-null object float_col 5 non-null float64 dtypes: float64(1), int64(1), object(1) - memory usage: 200.0+ bytes + memory usage: 312.0+ bytes Prints a summary of columns count and its dtypes but not per column information: @@ -2292,7 +2292,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, RangeIndex: 5 entries, 0 to 4 Columns: 3 entries, int_col to float_col dtypes: float64(1), int64(1), object(1) - memory usage: 200.0+ bytes + memory usage: 312.0+ bytes Pipe output of DataFrame.info to buffer instead of sys.stdout, get buffer content and writes to a text file: @@ -2494,7 +2494,7 @@ def memory_usage(self, index=True, deep=False): 4 1 1.0 1.0+0.0j 1 True >>> df.memory_usage() - Index 80 + Index 192 int64 40000 float64 40000 complex128 80000 @@ -2513,7 +2513,7 @@ def memory_usage(self, index=True, deep=False): The memory footprint of `object` dtype columns is ignored by default: >>> df.memory_usage(deep=True) - Index 80 + Index 192 int64 40000 float64 40000 complex128 80000 @@ -2525,7 +2525,7 @@ def memory_usage(self, index=True, deep=False): many repeated values. >>> df['object'].astype('category').memory_usage(deep=True) - 5168 + 5280 """ result = Series([c.memory_usage(index=False, deep=deep) for col, c in self.iteritems()], index=self.columns) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 9401de3346ccd..9c5f8b5909df9 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -65,6 +65,7 @@ class RangeIndex(Int64Index): _typ = 'rangeindex' _engine_type = libindex.Int64Engine + _range = None # type: range # check whether self._data has benn called _cached_data = None # type: np.ndarray @@ -142,7 +143,7 @@ def from_range(cls, data, name=None, dtype=None, **kwargs): 'range, {1} was passed'.format(cls.__name__, repr(data))) start, stop, step = data.start, data.stop, data.step - return RangeIndex(start, stop, step, dtype=dtype, name=name, **kwargs) + return cls(start, stop, step, dtype=dtype, name=name, **kwargs) @classmethod def _simple_new(cls, start, stop=None, step=None, name=None, @@ -156,14 +157,12 @@ def _simple_new(cls, start, stop=None, step=None, name=None, if start is None or not is_integer(start): try: - - return RangeIndex(start, stop, step, name=name, **kwargs) + return cls(start, stop, step, name=name, **kwargs) except TypeError: return Index(start, stop, step, name=name, **kwargs) - result._start = start - result._stop = stop or 0 - result._step = step or 1 + result._range = range(start, stop or 0, step or 1) # type: range + result.name = name for k, v in kwargs.items(): setattr(result, k, v) @@ -196,7 +195,7 @@ def _data(self): triggering the construction. """ if self._cached_data is None: - self._cached_data = np.arange(self._start, self._stop, self._step, + self._cached_data = np.arange(self.start, self.stop, self.step, dtype=np.int64) return self._cached_data @@ -206,9 +205,10 @@ def _int64index(self): def _get_data_as_items(self): """ return a list of tuples of start, stop, step """ - return [('start', self._start), - ('stop', self._stop), - ('step', self._step)] + rng = self._range + return [('start', rng.start), + ('stop', rng.stop), + ('step', rng.step)] def __reduce__(self): d = self._get_attributes_dict() @@ -235,39 +235,69 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs): return header + list(map(pprint_thing, self._range)) # -------------------------------------------------------------------- - @property + @cache_readonly def start(self): """ The value of the `start` parameter (or ``0`` if this was not supplied) """ # GH 25710 - return self._start + return self._range.start @property + def _start(self): + """ + The value of the `start` parameter (or ``0`` if this was not supplied) + + .. deprecated:: 0.25.0 + Use ._range.start or .start instead. + """ + return self._range.start + + @cache_readonly def stop(self): """ The value of the `stop` parameter """ - # GH 25710 - return self._stop + return self._range.stop @property + def _stop(self): + """ + The value of the `stop` parameter + + .. deprecated:: 0.25.0 + Use ._range.stop or .stop instead. + """ + # GH 25710 + return self._range.stop + + @cache_readonly def step(self): """ The value of the `step` parameter (or ``1`` if this was not supplied) """ # GH 25710 - return self._step + return self._range.step + + @property + def _step(self): + """ + The value of the `step` parameter (or ``1`` if this was not supplied) + + .. deprecated:: 0.25.0 + Use ._range.step or .step instead. + """ + # GH 25710 + return self._range.step @cache_readonly def nbytes(self): """ - Return the number of bytes in the underlying data - On implementations where this is undetermined (PyPy) - assume 24 bytes for each value + Return the number of bytes in the underlying data. """ - return sum(getsizeof(getattr(self, v), 24) for v in - ['_start', '_stop', '_step']) + rng = self._range + return getsizeof(rng) + sum(getsizeof(rng, v) + for v in ['start', 'stop', 'step']) def memory_usage(self, deep=False): """ @@ -305,11 +335,11 @@ def is_unique(self): @cache_readonly def is_monotonic_increasing(self): - return self._step > 0 or len(self) <= 1 + return self._range.step > 0 or len(self) <= 1 @cache_readonly def is_monotonic_decreasing(self): - return self._step < 0 or len(self) <= 1 + return self._range.step < 0 or len(self) <= 1 @property def has_duplicates(self): @@ -325,7 +355,7 @@ def get_loc(self, key, method=None, tolerance=None): return super().get_loc(key, method=method, tolerance=tolerance) def tolist(self): - return list(range(self._start, self._stop, self._step)) + return list(self._range) @Appender(_index_shared_docs['_shallow_copy']) def _shallow_copy(self, values=None, **kwargs): @@ -342,18 +372,17 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs): self._validate_dtype(dtype) if name is None: name = self.name - return RangeIndex._simple_new( - name=name, **dict(self._get_data_as_items())) + return RangeIndex.from_range(self._range, name=name) def _minmax(self, meth): no_steps = len(self) - 1 if no_steps == -1: return np.nan - elif ((meth == 'min' and self._step > 0) or - (meth == 'max' and self._step < 0)): - return self._start + elif ((meth == 'min' and self.step > 0) or + (meth == 'max' and self.step < 0)): + return self.start - return self._start + self._step * no_steps + return self.start + self.step * no_steps def min(self, axis=None, skipna=True, *args, **kwargs): """The minimum value of the RangeIndex""" @@ -382,7 +411,7 @@ def argsort(self, *args, **kwargs): """ nv.validate_argsort(args, kwargs) - if self._step > 0: + if self._range.step > 0: return np.arange(len(self)) else: return np.arange(len(self) - 1, -1, -1) @@ -392,15 +421,7 @@ def equals(self, other): Determines if two Index objects contain the same elements. """ if isinstance(other, RangeIndex): - ls = len(self) - lo = len(other) - return (ls == lo == 0 or - ls == lo == 1 and - self._start == other._start or - ls == lo and - self._start == other._start and - self._step == other._step) - + return self._range == other._range return super().equals(other) def intersection(self, other, sort=False): @@ -435,13 +456,13 @@ def intersection(self, other, sort=False): if not len(self) or not len(other): return RangeIndex._simple_new(None) - first = self[::-1] if self._step < 0 else self - second = other[::-1] if other._step < 0 else other + first = self._range[::-1] if self.step < 0 else self._range + second = other._range[::-1] if other.step < 0 else other._range # check whether intervals intersect # deals with in- and decreasing ranges - int_low = max(first._start, second._start) - int_high = min(first._stop, second._stop) + int_low = max(first.start, second.start) + int_high = min(first.stop, second.stop) if int_high <= int_low: return RangeIndex._simple_new(None) @@ -449,23 +470,26 @@ def intersection(self, other, sort=False): # solve intersection problem # performance hint: for identical step sizes, could use # cheaper alternative - gcd, s, t = first._extended_gcd(first._step, second._step) + gcd, s, t = self._extended_gcd(first.step, second.step) # check whether element sets intersect - if (first._start - second._start) % gcd: + if (first.start - second.start) % gcd: return RangeIndex._simple_new(None) # calculate parameters for the RangeIndex describing the # intersection disregarding the lower bounds - tmp_start = first._start + (second._start - first._start) * \ - first._step // gcd * s - new_step = first._step * second._step // gcd + tmp_start = first.start + (second.start - first.start) * \ + first.step // gcd * s + new_step = first.step * second.step // gcd new_index = RangeIndex._simple_new(tmp_start, int_high, new_step) # adjust index to limiting interval - new_index._start = new_index._min_fitting_element(int_low) + new_start = new_index._min_fitting_element(int_low) + new_index = RangeIndex._simple_new(new_start, + new_index.stop, + new_index.step) - if (self._step < 0 and other._step < 0) is not (new_index._step < 0): + if (self.step < 0 and other.step < 0) is not (new_index.step < 0): new_index = new_index[::-1] if sort is None: new_index = new_index.sort_values() @@ -473,13 +497,13 @@ def intersection(self, other, sort=False): def _min_fitting_element(self, lower_limit): """Returns the smallest element greater than or equal to the limit""" - no_steps = -(-(lower_limit - self._start) // abs(self._step)) - return self._start + abs(self._step) * no_steps + no_steps = -(-(lower_limit - self.start) // abs(self.step)) + return self.start + abs(self.step) * no_steps def _max_fitting_element(self, upper_limit): """Returns the largest element smaller than or equal to the limit""" - no_steps = (upper_limit - self._start) // abs(self._step) - return self._start + abs(self._step) * no_steps + no_steps = (upper_limit - self.start) // abs(self.step) + return self.start + abs(self.step) * no_steps def _extended_gcd(self, a, b): """ @@ -522,16 +546,16 @@ def _union(self, other, sort): return super()._union(other, sort=sort) if isinstance(other, RangeIndex) and sort is None: - start_s, step_s = self._start, self._step - end_s = self._start + self._step * (len(self) - 1) - start_o, step_o = other._start, other._step - end_o = other._start + other._step * (len(other) - 1) - if self._step < 0: + start_s, step_s = self.start, self.step + end_s = self.start + self.step * (len(self) - 1) + start_o, step_o = other.start, other.step + end_o = other.start + other.step * (len(other) - 1) + if self.step < 0: start_s, step_s, end_s = end_s, -step_s, start_s - if other._step < 0: + if other.step < 0: start_o, step_o, end_o = end_o, -step_o, start_o if len(self) == 1 and len(other) == 1: - step_s = step_o = abs(self._start - other._start) + step_s = step_o = abs(self.start - other.start) elif len(self) == 1: step_s = step_o elif len(other) == 1: @@ -576,7 +600,7 @@ def __len__(self): """ return the length of the RangeIndex """ - return max(0, -(-(self._stop - self._start) // self._step)) + return len(self._range) @property def size(self): @@ -597,59 +621,15 @@ def __getitem__(self, key): n = com.cast_scalar_indexer(key) if n != key: return super_getitem(key) - if n < 0: - n = len(self) + key - if n < 0 or n > len(self) - 1: + try: + return self._range[key] + except IndexError: raise IndexError("index {key} is out of bounds for axis 0 " "with size {size}".format(key=key, size=len(self))) - return self._start + n * self._step - if isinstance(key, slice): - - # This is basically PySlice_GetIndicesEx, but delegation to our - # super routines if we don't have integers - - length = len(self) - - # complete missing slice information - step = 1 if key.step is None else key.step - if key.start is None: - start = length - 1 if step < 0 else 0 - else: - start = key.start - - if start < 0: - start += length - if start < 0: - start = -1 if step < 0 else 0 - if start >= length: - start = length - 1 if step < 0 else length - - if key.stop is None: - stop = -1 if step < 0 else length - else: - stop = key.stop - - if stop < 0: - stop += length - if stop < 0: - stop = -1 - if stop > length: - stop = length - - # delegate non-integer slices - if (start != int(start) or - stop != int(stop) or - step != int(step)): - return super_getitem(key) - - # convert indexes to values - start = self._start + self._step * start - stop = self._start + self._step * stop - step = self._step * step - - return RangeIndex._simple_new(start, stop, step, name=self.name) + new_range = self._range[key] + return RangeIndex.from_range(new_range, name=self.name) # fall back to Int64Index return super_getitem(key) @@ -660,15 +640,15 @@ def __floordiv__(self, other): if is_integer(other) and other != 0: if (len(self) == 0 or - self._start % other == 0 and - self._step % other == 0): - start = self._start // other - step = self._step // other + self.start % other == 0 and + self.step % other == 0): + start = self.start // other + step = self.step // other stop = start + len(self) * step return RangeIndex._simple_new( start, stop, step, name=self.name) if len(self) == 1: - start = self._start // other + start = self.start // other return RangeIndex._simple_new( start, start + 1, 1, name=self.name) return self._int64index // other @@ -712,7 +692,7 @@ def _evaluate_numeric_binop(self, other): # apply if we have an override if step: with np.errstate(all='ignore'): - rstep = step(left._step, right) + rstep = step(left.step, right) # we don't have a representable op # so return a base index @@ -720,16 +700,13 @@ def _evaluate_numeric_binop(self, other): raise ValueError else: - rstep = left._step + rstep = left.step with np.errstate(all='ignore'): - rstart = op(left._start, right) - rstop = op(left._stop, right) + rstart = op(left.start, right) + rstop = op(left.stop, right) - result = RangeIndex(rstart, - rstop, - rstep, - **attrs) + result = RangeIndex(rstart, rstop, rstep, **attrs) # for compat with numpy / Int64Index # even if we can represent as a RangeIndex, return diff --git a/pandas/core/series.py b/pandas/core/series.py index 8fb6ad3e3ccc5..8f3ad365c5c9e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4010,7 +4010,7 @@ def memory_usage(self, index=True, deep=False): -------- >>> s = pd.Series(range(3)) >>> s.memory_usage() - 104 + 216 Not including the index gives the size of the rest of the data, which is necessarily smaller: @@ -4024,9 +4024,9 @@ def memory_usage(self, index=True, deep=False): >>> s.values array(['a', 'b'], dtype=object) >>> s.memory_usage() - 96 + 208 >>> s.memory_usage(deep=True) - 212 + 324 """ v = super().memory_usage(deep=deep) if index: diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 1309bd1fef421..ead0fbd263ebf 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -367,9 +367,10 @@ def encode(obj): return {'typ': 'range_index', 'klass': obj.__class__.__name__, 'name': getattr(obj, 'name', None), - 'start': getattr(obj, '_start', None), - 'stop': getattr(obj, '_stop', None), - 'step': getattr(obj, '_step', None)} + 'start': obj._range.start, + 'stop': obj._range.stop, + 'step': obj._range.step, + } elif isinstance(obj, PeriodIndex): return {'typ': 'period_index', 'klass': obj.__class__.__name__, diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 477a4e527f278..37a953f864ef4 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -51,10 +51,8 @@ def test_constructor(self, args, kwargs, start, stop, step, name): expected = Index(np.arange(start, stop, step, dtype=np.int64), name=name) assert isinstance(result, RangeIndex) - assert result._start == start - assert result._stop == stop - assert result._step == step assert result.name is name + assert result._range == range(start, stop, step) tm.assert_index_equal(result, expected) def test_constructor_invalid_args(self): @@ -174,9 +172,7 @@ def test_copy(self): i_copy = i.copy() assert i_copy is not i assert i_copy.identical(i) - assert i_copy._start == 0 - assert i_copy._stop == 5 - assert i_copy._step == 1 + assert i_copy._range == range(0, 5, 1) assert i_copy.name == 'Foo' def test_repr(self): From a86cd9a915b79ea06a44f49df9e24523cb3e7974 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 2 Jun 2019 01:52:33 +0200 Subject: [PATCH 2/4] Changes. --- doc/source/whatsnew/v0.25.0.rst | 3 ++ pandas/core/frame.py | 10 ++--- pandas/core/indexes/range.py | 62 ++++++++++++++---------------- pandas/core/series.py | 6 +-- pandas/tests/indexes/test_range.py | 7 ++-- 5 files changed, 44 insertions(+), 44 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 461c883f542ab..295452c300931 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -473,6 +473,9 @@ Other Deprecations the :meth:`SparseArray.to_dense` method instead (:issue:`26421`). - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`) - The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`). +- The internal attributes ``_start``, ``_stop`` and ``_step`` attributes of :class:`RangeIndex` have been deprecated. + Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`). + .. _whatsnew_0250.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 83fa26081f21c..48dfa57c47bf6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2282,7 +2282,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, text_col 5 non-null object float_col 5 non-null float64 dtypes: float64(1), int64(1), object(1) - memory usage: 312.0+ bytes + memory usage: 248.0+ bytes Prints a summary of columns count and its dtypes but not per column information: @@ -2292,7 +2292,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, RangeIndex: 5 entries, 0 to 4 Columns: 3 entries, int_col to float_col dtypes: float64(1), int64(1), object(1) - memory usage: 312.0+ bytes + memory usage: 248.0+ bytes Pipe output of DataFrame.info to buffer instead of sys.stdout, get buffer content and writes to a text file: @@ -2494,7 +2494,7 @@ def memory_usage(self, index=True, deep=False): 4 1 1.0 1.0+0.0j 1 True >>> df.memory_usage() - Index 192 + Index 128 int64 40000 float64 40000 complex128 80000 @@ -2513,7 +2513,7 @@ def memory_usage(self, index=True, deep=False): The memory footprint of `object` dtype columns is ignored by default: >>> df.memory_usage(deep=True) - Index 192 + Index 128 int64 40000 float64 40000 complex128 80000 @@ -2525,7 +2525,7 @@ def memory_usage(self, index=True, deep=False): many repeated values. >>> df['object'].astype('category').memory_usage(deep=True) - 5280 + 5216 """ result = Series([c.memory_usage(index=False, deep=deep) for col, c in self.iteritems()], index=self.columns) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 9c5f8b5909df9..f19ccfd3bc75c 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -167,8 +167,6 @@ def _simple_new(cls, start, stop=None, step=None, name=None, for k, v in kwargs.items(): setattr(result, k, v) - result._range = range(result._start, result._stop, result._step) - result._reset_identity() return result @@ -238,7 +236,7 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs): @cache_readonly def start(self): """ - The value of the `start` parameter (or ``0`` if this was not supplied) + The value of the `start` parameter (``0`` if this was not supplied) """ # GH 25710 return self._range.start @@ -246,12 +244,12 @@ def start(self): @property def _start(self): """ - The value of the `start` parameter (or ``0`` if this was not supplied) + The value of the `start` parameter (``0`` if this was not supplied) .. deprecated:: 0.25.0 - Use ._range.start or .start instead. + Use ``start`` instead. """ - return self._range.start + return self.start @cache_readonly def stop(self): @@ -266,15 +264,15 @@ def _stop(self): The value of the `stop` parameter .. deprecated:: 0.25.0 - Use ._range.stop or .stop instead. + Use ``stop`` instead. """ # GH 25710 - return self._range.stop + return self.stop @cache_readonly def step(self): """ - The value of the `step` parameter (or ``1`` if this was not supplied) + The value of the `step` parameter (``1`` if this was not supplied) """ # GH 25710 return self._range.step @@ -282,13 +280,13 @@ def step(self): @property def _step(self): """ - The value of the `step` parameter (or ``1`` if this was not supplied) + The value of the `step` parameter (``1`` if this was not supplied) .. deprecated:: 0.25.0 - Use ._range.step or .step instead. + Use ``step`` instead. """ # GH 25710 - return self._range.step + return self.step @cache_readonly def nbytes(self): @@ -296,8 +294,8 @@ def nbytes(self): Return the number of bytes in the underlying data. """ rng = self._range - return getsizeof(rng) + sum(getsizeof(rng, v) - for v in ['start', 'stop', 'step']) + return getsizeof(rng) + sum(getsizeof(getattr(rng, attr_name)) + for attr_name in ['start', 'stop', 'step']) def memory_usage(self, deep=False): """ @@ -361,7 +359,7 @@ def tolist(self): def _shallow_copy(self, values=None, **kwargs): if values is None: name = kwargs.get("name", self.name) - return RangeIndex._simple_new( + return self._simple_new( name=name, **dict(self._get_data_as_items())) else: kwargs.setdefault('name', self.name) @@ -372,7 +370,7 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs): self._validate_dtype(dtype) if name is None: name = self.name - return RangeIndex.from_range(self._range, name=name) + return self.from_range(self._range, name=name) def _minmax(self, meth): no_steps = len(self) - 1 @@ -454,7 +452,7 @@ def intersection(self, other, sort=False): return super().intersection(other, sort=sort) if not len(self) or not len(other): - return RangeIndex._simple_new(None) + return self._simple_new(None) first = self._range[::-1] if self.step < 0 else self._range second = other._range[::-1] if other.step < 0 else other._range @@ -464,7 +462,7 @@ def intersection(self, other, sort=False): int_low = max(first.start, second.start) int_high = min(first.stop, second.stop) if int_high <= int_low: - return RangeIndex._simple_new(None) + return self._simple_new(None) # Method hint: linear Diophantine equation # solve intersection problem @@ -474,20 +472,18 @@ def intersection(self, other, sort=False): # check whether element sets intersect if (first.start - second.start) % gcd: - return RangeIndex._simple_new(None) + return self._simple_new(None) # calculate parameters for the RangeIndex describing the # intersection disregarding the lower bounds tmp_start = first.start + (second.start - first.start) * \ first.step // gcd * s new_step = first.step * second.step // gcd - new_index = RangeIndex._simple_new(tmp_start, int_high, new_step) + new_index = self._simple_new(tmp_start, int_high, new_step) # adjust index to limiting interval new_start = new_index._min_fitting_element(int_low) - new_index = RangeIndex._simple_new(new_start, - new_index.stop, - new_index.step) + new_index = self._simple_new(new_start, new_index.stop, new_index.step) if (self.step < 0 and other.step < 0) is not (new_index.step < 0): new_index = new_index[::-1] @@ -566,21 +562,23 @@ def _union(self, other, sort): if ((start_s - start_o) % step_s == 0 and (start_s - end_o) <= step_s and (start_o - end_s) <= step_s): - return RangeIndex(start_r, end_r + step_s, step_s) + return self.__class__(start_r, end_r + step_s, step_s) if ((step_s % 2 == 0) and (abs(start_s - start_o) <= step_s / 2) and (abs(end_s - end_o) <= step_s / 2)): - return RangeIndex(start_r, end_r + step_s / 2, step_s / 2) + return self.__class__(start_r, + end_r + step_s / 2, + step_s / 2) elif step_o % step_s == 0: if ((start_o - start_s) % step_s == 0 and (start_o + step_s >= start_s) and (end_o - step_s <= end_s)): - return RangeIndex(start_r, end_r + step_s, step_s) + return self.__class__(start_r, end_r + step_s, step_s) elif step_s % step_o == 0: if ((start_s - start_o) % step_o == 0 and (start_s + step_o >= start_o) and (end_s - step_o <= end_o)): - return RangeIndex(start_r, end_r + step_o, step_o) + return self.__class__(start_r, end_r + step_o, step_o) return self._int64index._union(other, sort=sort) @Appender(_index_shared_docs['join']) @@ -629,7 +627,7 @@ def __getitem__(self, key): size=len(self))) if isinstance(key, slice): new_range = self._range[key] - return RangeIndex.from_range(new_range, name=self.name) + return self.from_range(new_range, name=self.name) # fall back to Int64Index return super_getitem(key) @@ -645,12 +643,10 @@ def __floordiv__(self, other): start = self.start // other step = self.step // other stop = start + len(self) * step - return RangeIndex._simple_new( - start, stop, step, name=self.name) + return self._simple_new(start, stop, step, name=self.name) if len(self) == 1: start = self.start // other - return RangeIndex._simple_new( - start, start + 1, 1, name=self.name) + return self._simple_new(start, start + 1, 1, name=self.name) return self._int64index // other @classmethod @@ -706,7 +702,7 @@ def _evaluate_numeric_binop(self, other): rstart = op(left.start, right) rstop = op(left.stop, right) - result = RangeIndex(rstart, rstop, rstep, **attrs) + result = self.__class__(rstart, rstop, rstep, **attrs) # for compat with numpy / Int64Index # even if we can represent as a RangeIndex, return diff --git a/pandas/core/series.py b/pandas/core/series.py index 8f3ad365c5c9e..472d984234275 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4010,7 +4010,7 @@ def memory_usage(self, index=True, deep=False): -------- >>> s = pd.Series(range(3)) >>> s.memory_usage() - 216 + 152 Not including the index gives the size of the rest of the data, which is necessarily smaller: @@ -4024,9 +4024,9 @@ def memory_usage(self, index=True, deep=False): >>> s.values array(['a', 'b'], dtype=object) >>> s.memory_usage() - 208 + 144 >>> s.memory_usage(deep=True) - 324 + 260 """ v = super().memory_usage(deep=deep) if index: diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 37a953f864ef4..53c846e83f9c1 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -239,8 +239,9 @@ def test_dtype(self): def test_cached_data(self): # GH 26565 - # Calling RangeIndex._data caches an int64 array of the same length at - # self._cached_data. This tests whether _cached_data has been set. + # Calling RangeIndex._data caches an int64 array of the same length as + # self at self._cached_data. + # This tests whether _cached_data is being set by various operations. idx = RangeIndex(0, 100, 10) assert idx._cached_data is None @@ -269,7 +270,7 @@ def test_cached_data(self): df.iloc[5:10] assert idx._cached_data is None - # actually calling data._data + # actually calling idx._data assert isinstance(idx._data, np.ndarray) assert isinstance(idx._cached_data, np.ndarray) From 233aad00c96398816fb0cc0e144a2db99d369535 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 3 Jun 2019 06:45:26 +0200 Subject: [PATCH 3/4] add deprecation code --- pandas/core/indexes/range.py | 10 ++++++++++ pandas/tests/indexes/test_range.py | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index f19ccfd3bc75c..14f6d73914471 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -233,6 +233,10 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs): return header + list(map(pprint_thing, self._range)) # -------------------------------------------------------------------- + _deprecation_message = ("RangeIndex.{} is deprecated and will be " + "removed in a future version. Use RangeIndex.{} " + "instead") + @cache_readonly def start(self): """ @@ -249,6 +253,8 @@ def _start(self): .. deprecated:: 0.25.0 Use ``start`` instead. """ + warnings.warn(self._deprecation_message.format("_start", "start"), + DeprecationWarning, stacklevel=2) return self.start @cache_readonly @@ -267,6 +273,8 @@ def _stop(self): Use ``stop`` instead. """ # GH 25710 + warnings.warn(self._deprecation_message.format("_stop", "stop"), + DeprecationWarning, stacklevel=2) return self.stop @cache_readonly @@ -286,6 +294,8 @@ def _step(self): Use ``step`` instead. """ # GH 25710 + warnings.warn(self._deprecation_message.format("_step", "step"), + DeprecationWarning, stacklevel=2) return self.step @cache_readonly diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 53c846e83f9c1..bca50186827de 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -167,6 +167,13 @@ def test_start_stop_step_attrs(self, index, start, stop, step): assert index.stop == stop assert index.step == step + def test_deprecated_start_stop_step_attrs(self): + # GH 26581 + idx = self.create_index() + for attr_name in ['_start', '_stop', '_step']: + with tm.assert_produces_warning(DeprecationWarning): + getattr(idx, attr_name) + def test_copy(self): i = RangeIndex(5, name='Foo') i_copy = i.copy() From 228b5605b996987aa1729f3982d7ba8231fac761 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 4 Jun 2019 10:43:45 +0200 Subject: [PATCH 4/4] collect ensure_int into ensure_python_int --- pandas/core/dtypes/common.py | 29 +++++++++++++++++++++++++ pandas/core/indexes/range.py | 41 ++++++++++-------------------------- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index b5cd73a81962b..4029e6f4bfdb5 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1,4 +1,5 @@ """ common type operations """ +from typing import Union import warnings import numpy as np @@ -125,6 +126,34 @@ def ensure_int_or_float(arr: ArrayLike, copy=False) -> np.array: return arr.astype('float64', copy=copy) +def ensure_python_int(value: Union[int, np.integer]) -> int: + """ + Ensure that a value is a python int. + + Parameters + ---------- + value: int or numpy.integer + + Returns + ------- + int + + Raises + ------ + TypeError: if the value isn't an int or can't be converted to one. + """ + if not is_scalar(value): + raise TypeError("Value needs to be a scalar value, was type {}" + .format(type(value))) + msg = "Wrong type {} for value {}" + try: + new_value = int(value) + assert (new_value == value) + except (TypeError, ValueError, AssertionError): + raise TypeError(msg.format(type(value), value)) + return new_value + + def classes(*klasses): """ evaluate if the tipo is a subclass of the klasses """ return lambda tipo: issubclass(tipo, klasses) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 14f6d73914471..82fd7342c027c 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -12,7 +12,8 @@ from pandas.core.dtypes import concat as _concat from pandas.core.dtypes.common import ( - is_int64_dtype, is_integer, is_scalar, is_timedelta64_dtype) + ensure_python_int, is_int64_dtype, is_integer, is_scalar, + is_timedelta64_dtype) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCSeries, ABCTimedeltaIndex) @@ -92,39 +93,19 @@ def __new__(cls, start=None, stop=None, step=None, **dict(start._get_data_as_items())) # validate the arguments - def ensure_int(value, field): - msg = ("RangeIndex(...) must be called with integers," - " {value} was passed for {field}") - if not is_scalar(value): - raise TypeError(msg.format(value=type(value).__name__, - field=field)) - try: - new_value = int(value) - assert(new_value == value) - except (TypeError, ValueError, AssertionError): - raise TypeError(msg.format(value=type(value).__name__, - field=field)) + if com._all_none(start, stop, step): + raise TypeError("RangeIndex(...) must be called with integers") - return new_value + start = ensure_python_int(start) if start is not None else 0 - if com._all_none(start, stop, step): - msg = "RangeIndex(...) must be called with integers" - raise TypeError(msg) - elif start is None: - start = 0 - else: - start = ensure_int(start, 'start') if stop is None: - stop = start - start = 0 + start, stop = 0, start else: - stop = ensure_int(stop, 'stop') - if step is None: - step = 1 - elif step == 0: + stop = ensure_python_int(stop) + + step = ensure_python_int(step) if step is not None else 1 + if step == 0: raise ValueError("Step must not be zero") - else: - step = ensure_int(step, 'step') return cls._simple_new(start, stop, step, name) @@ -161,7 +142,7 @@ def _simple_new(cls, start, stop=None, step=None, name=None, except TypeError: return Index(start, stop, step, name=name, **kwargs) - result._range = range(start, stop or 0, step or 1) # type: range + result._range = range(start, stop or 0, step or 1) result.name = name for k, v in kwargs.items():