From 3c23dc91146620f53f88eb456943da4440268323 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 1 Nov 2015 10:44:48 -0500 Subject: [PATCH 1/8] API: provide Rolling/Expanding/EWM objects for deferred rolling type calculations, xref #10702 --- pandas/core/base.py | 261 ++++ pandas/core/frame.py | 1 + pandas/core/generic.py | 33 +- pandas/core/groupby.py | 240 +--- pandas/core/series.py | 1 + pandas/core/window.py | 1077 +++++++++++++++++ pandas/stats/moments.py | 700 ++++------- .../test_moments.py => tests/test_window.py} | 393 ++++-- 8 files changed, 1945 insertions(+), 761 deletions(-) create mode 100644 pandas/core/window.py rename pandas/{stats/tests/test_moments.py => tests/test_window.py} (85%) diff --git a/pandas/core/base.py b/pandas/core/base.py index 6996bb06065af..855d89411b8a7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -2,6 +2,7 @@ Base and utility classes for pandas objects. """ from pandas import compat +from pandas.compat import builtins import numpy as np from pandas.core import common as com import pandas.core.nanops as nanops @@ -218,6 +219,266 @@ def __delete__(self, instance): raise AttributeError("can't delete attribute") +class GroupByError(Exception): + pass + + +class DataError(GroupByError): + pass + + +class SpecificationError(GroupByError): + pass + + +class SelectionMixin(object): + """ + mixin implementing the selection & aggregation interface on a group-like object + sub-classes need to define: obj, exclusions + """ + _selection = None + _internal_names = ['_cache'] + _internal_names_set = set(_internal_names) + _builtin_table = { + builtins.sum: np.sum, + builtins.max: np.max, + builtins.min: np.min, + } + _cython_table = { + builtins.sum: 'sum', + builtins.max: 'max', + builtins.min: 'min', + np.sum: 'sum', + np.mean: 'mean', + np.prod: 'prod', + np.std: 'std', + np.var: 'var', + np.median: 'median', + np.max: 'max', + np.min: 'min', + np.cumprod: 'cumprod', + np.cumsum: 'cumsum' + } + + @property + def name(self): + if self._selection is None: + return None # 'result' + else: + return self._selection + + @property + def _selection_list(self): + if not isinstance(self._selection, (list, tuple, com.ABCSeries, com.ABCIndex, np.ndarray)): + return [self._selection] + return self._selection + + @cache_readonly + def _selected_obj(self): + + if self._selection is None or isinstance(self.obj, com.ABCSeries): + return self.obj + else: + return self.obj[self._selection] + + @cache_readonly + def _obj_with_exclusions(self): + if self._selection is not None and isinstance(self.obj, com.ABCDataFrame): + return self.obj.reindex(columns=self._selection_list) + + if len(self.exclusions) > 0: + return self.obj.drop(self.exclusions, axis=1) + else: + return self.obj + + def __getitem__(self, key): + if self._selection is not None: + raise Exception('Column(s) %s already selected' % self._selection) + + if isinstance(key, (list, tuple, com.ABCSeries, com.ABCIndex, np.ndarray)): + if len(self.obj.columns.intersection(key)) != len(key): + bad_keys = list(set(key).difference(self.obj.columns)) + raise KeyError("Columns not found: %s" + % str(bad_keys)[1:-1]) + return self._gotitem(list(key), ndim=2) + + elif not getattr(self,'as_index',False): + if key not in self.obj.columns: + raise KeyError("Column not found: %s" % key) + return self._gotitem(key, ndim=2) + + else: + if key not in self.obj: + raise KeyError("Column not found: %s" % key) + return self._gotitem(key, ndim=1) + + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + + """ + raise AbstractMethodError(self) + + _agg_doc = """Aggregate using input function or dict of {column -> function} + +Parameters +---------- +arg : function or dict + Function to use for aggregating groups. If a function, must either + work when passed a DataFrame or when passed to DataFrame.apply. If + passed a dict, the keys must be DataFrame column names. + + Accepted Combinations are: + - string cythonized function name + - function + - list of functions + - dict of columns -> functions + - nested dict of names -> dicts of functions + +Notes +----- +Numpy functions mean/median/prod/sum/std/var are special cased so the +default behavior is applying the function along axis=0 +(e.g., np.mean(arr_2d, axis=0)) as opposed to +mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). + +Returns +------- +aggregated : DataFrame +""" + + @Appender(_agg_doc) + def agg(self, func, *args, **kwargs): + return self.aggregate(func, *args, **kwargs) + + @Appender(_agg_doc) + def aggregate(self, func, *args, **kwargs): + raise AbstractMethodError(self) + + def _aggregate(self, arg, *args, **kwargs): + """ + provide an implementation for the aggregators + + Returns + ------- + tuple of result, how + + Notes + ----- + how can be a string describe the required post-processing, or + None if not required + """ + + if isinstance(arg, compat.string_types): + return getattr(self, arg)(*args, **kwargs), None + + result = compat.OrderedDict() + if isinstance(arg, dict): + if self.axis != 0: # pragma: no cover + raise ValueError('Can only pass dict with axis=0') + + obj = self._selected_obj + + if any(isinstance(x, (list, tuple, dict)) for x in arg.values()): + new_arg = compat.OrderedDict() + for k, v in compat.iteritems(arg): + if not isinstance(v, (tuple, list, dict)): + new_arg[k] = [v] + else: + new_arg[k] = v + arg = new_arg + + keys = [] + if self._selection is not None: + subset = obj + + for fname, agg_how in compat.iteritems(arg): + colg = self._gotitem(self._selection, ndim=1, subset=subset) + result[fname] = colg.aggregate(agg_how) + keys.append(fname) + else: + for col, agg_how in compat.iteritems(arg): + colg = self._gotitem(col, ndim=1) + result[col] = colg.aggregate(agg_how) + keys.append(col) + + if isinstance(list(result.values())[0], com.ABCDataFrame): + from pandas.tools.merge import concat + result = concat([result[k] for k in keys], keys=keys, axis=1) + else: + from pandas import DataFrame + result = DataFrame(result) + + return result, True + elif hasattr(arg, '__iter__'): + return self._aggregate_multiple_funcs(arg), None + else: + result = None + + cy_func = self._is_cython_func(arg) + if cy_func and not args and not kwargs: + return getattr(self, cy_func)(), None + + # caller can react + return result, True + + def _aggregate_multiple_funcs(self, arg): + from pandas.tools.merge import concat + + if self.axis != 0: + raise NotImplementedError("axis other than 0 is not supported") + + obj = self._obj_with_exclusions + results = [] + keys = [] + + # degenerate case + if obj.ndim == 1: + for a in arg: + try: + colg = self._gotitem(obj.name, ndim=1, subset=obj) + results.append(colg.aggregate(a)) + keys.append(getattr(a,'name',a)) + except (TypeError, DataError): + pass + except SpecificationError: + raise + + # multiples + else: + for col in obj: + try: + colg = self._gotitem(col, ndim=1, subset=obj[col]) + results.append(colg.aggregate(arg)) + keys.append(col) + except (TypeError, DataError): + pass + except SpecificationError: + raise + result = concat(results, keys=keys, axis=1) + + return result + + def _is_cython_func(self, arg): + """ if we define an internal function for this argument, return it """ + return self._cython_table.get(arg) + + def _is_builtin_func(self, arg): + """ + if we define an builtin function for this argument, return it, + otherwise return the arg + """ + return self._builtin_table.get(arg, arg) + class FrozenList(PandasObject, list): """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ff110880d34ba..2fc0786aa1e09 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5149,6 +5149,7 @@ def combineMult(self, other): DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True, aliases={'rows': 0}) DataFrame._add_numeric_operations() +DataFrame._add_series_or_dataframe_operations() _EMPTY_SERIES = Series([]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b75573edc7157..e8abc96aab858 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -29,7 +29,6 @@ from pandas.util.decorators import Appender, Substitution, deprecate_kwarg from pandas.core import config - # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = dict() @@ -4734,6 +4733,36 @@ def nanptp(values, axis=0, skipna=True): method ``ptp``.""", nanptp) + @classmethod + def _add_series_or_dataframe_operations(cls): + """ add the series or dataframe only operations to the cls; evaluate the doc strings again """ + + from pandas.core import window as rwindow + + @Appender(rwindow.rolling.__doc__) + def rolling(self, window, min_periods=None, freq=None, center=False, + how=None, win_type=None, axis=0): + axis = self._get_axis_number(axis) + return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, center=center, + how=how, win_type=win_type, axis=axis) + cls.rolling = rolling + + @Appender(rwindow.expanding.__doc__) + def expanding(self, min_periods=None, freq=None, center=False, + how=None, axis=0): + axis = self._get_axis_number(axis) + return rwindow.expanding(self, min_periods=min_periods, freq=freq, center=center, + how=how, axis=axis) + cls.expanding = expanding + + @Appender(rwindow.ewm.__doc__) + def ewm(self, com=None, span=None, halflife=None, min_periods=0, freq=None, + adjust=True, how=None, ignore_na=False, axis=0): + axis = self._get_axis_number(axis) + return rwindow.ewm(self, com=com, span=span, halflife=halflife, min_periods=min_periods, + freq=freq, adjust=adjust, how=how, ignore_na=ignore_na, axis=axis) + cls.ewm = ewm + def _doc_parms(cls): """ return a tuple of the doc parms """ axis_descr = "{%s}" % ', '.join([ @@ -4916,6 +4945,6 @@ def logical_func(self, axis=None, bool_only=None, skipna=None, logical_func.__name__ = name return logical_func -# install the indexerse +# install the indexes for _name, _indexer in indexing.get_indexers_list(): NDFrame._create_indexer(_name, _indexer) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 28d95c40c7294..b156f4afa2711 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -12,7 +12,7 @@ ) from pandas import compat -from pandas.core.base import PandasObject +from pandas.core.base import PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame @@ -37,28 +37,6 @@ import pandas.algos as _algos import pandas.hashtable as _hash -_agg_doc = """Aggregate using input function or dict of {column -> function} - -Parameters ----------- -arg : function or dict - Function to use for aggregating groups. If a function, must either - work when passed a DataFrame or when passed to DataFrame.apply. If - passed a dict, the keys must be DataFrame column names. - -Notes ------ -Numpy functions mean/median/prod/sum/std/var are special cased so the -default behavior is applying the function along axis=0 -(e.g., np.mean(arr_2d, axis=0)) as opposed to -mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). - -Returns -------- -aggregated : DataFrame -""" - - # special case to prevent duplicate plots when catching exceptions when # forwarding methods from NDFrames _plotting_methods = frozenset(['plot', 'boxplot', 'hist']) @@ -91,18 +69,6 @@ _cython_transforms = frozenset(['cumprod', 'cumsum', 'shift']) -class GroupByError(Exception): - pass - - -class DataError(GroupByError): - pass - - -class SpecificationError(GroupByError): - pass - - def _groupby_function(name, alias, npfunc, numeric_only=True, _convert=False): def f(self): @@ -319,7 +285,7 @@ def f(self): return attr -class GroupBy(PandasObject): +class GroupBy(PandasObject, SelectionMixin): """ Class for grouping and aggregating relational data. See aggregate, @@ -387,8 +353,6 @@ class GroupBy(PandasObject): Number of groups """ _apply_whitelist = _common_apply_whitelist - _internal_names = ['_cache'] - _internal_names_set = set(_internal_names) _group_selection = None def __init__(self, obj, keys=None, axis=0, level=None, @@ -493,19 +457,6 @@ def _get_index(self, name): """ safe get index, translate keys for datelike to underlying repr """ return self._get_indices([name])[0] - @property - def name(self): - if self._selection is None: - return None # 'result' - else: - return self._selection - - @property - def _selection_list(self): - if not isinstance(self._selection, (list, tuple, Series, Index, np.ndarray)): - return [self._selection] - return self._selection - @cache_readonly def _selected_obj(self): @@ -558,9 +509,6 @@ def __getattr__(self, attr): raise AttributeError("%r object has no attribute %r" % (type(self).__name__, attr)) - def __getitem__(self, key): - raise NotImplementedError('Not implemented: %s' % key) - plot = property(GroupByPlot) def _make_wrapper(self, name): @@ -704,7 +652,7 @@ def apply(self, func, *args, **kwargs): ------- applied : type depending on grouped object and function """ - func = _intercept_function(func) + func = self._is_builtin_func(func) @wraps(func) def f(g): @@ -721,13 +669,6 @@ def _python_apply_general(self, f): return self._wrap_applied_output(keys, values, not_indexed_same=mutated) - def aggregate(self, func, *args, **kwargs): - raise AbstractMethodError(self) - - @Appender(_agg_doc) - def agg(self, func, *args, **kwargs): - return self.aggregate(func, *args, **kwargs) - def _iterate_slices(self): yield self.name, self._selected_obj @@ -1217,7 +1158,7 @@ def _cython_agg_general(self, how, numeric_only=True): return self._wrap_aggregated_output(output, names) def _python_agg_general(self, func, *args, **kwargs): - func = _intercept_function(func) + func = self._is_builtin_func(func) f = lambda x: func(x, *args, **kwargs) # iterate through "columns" ex exclusions to populate output dict @@ -1733,7 +1674,7 @@ def agg_series(self, obj, func): return self._aggregate_series_pure_python(obj, func) def _aggregate_series_fast(self, obj, func): - func = _intercept_function(func) + func = self._is_builtin_func(func) if obj.index._has_complex_internals: raise TypeError('Incompatible index for Cython grouper') @@ -2427,7 +2368,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs): if hasattr(func_or_funcs, '__iter__'): ret = self._aggregate_multiple_funcs(func_or_funcs) else: - cyfunc = _intercept_cython(func_or_funcs) + cyfunc = self._is_cython_func(func_or_funcs) if cyfunc and not args and not kwargs: return getattr(self, cyfunc)() @@ -2559,7 +2500,7 @@ def transform(self, func, *args, **kwargs): transformed : Series """ - func = _intercept_cython(func) or func + func = self._is_cython_func(func) or func # if string function if isinstance(func, compat.string_types): @@ -2912,68 +2853,16 @@ def _post_process_cython_aggregate(self, obj): obj = obj.swapaxes(0, 1) return obj - @cache_readonly - def _obj_with_exclusions(self): - if self._selection is not None: - return self.obj.reindex(columns=self._selection_list) - - if len(self.exclusions) > 0: - return self.obj.drop(self.exclusions, axis=1) - else: - return self.obj - - @Appender(_agg_doc) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): - if isinstance(arg, compat.string_types): - return getattr(self, arg)(*args, **kwargs) - - result = OrderedDict() - if isinstance(arg, dict): - if self.axis != 0: # pragma: no cover - raise ValueError('Can only pass dict with axis=0') - obj = self._selected_obj + result, how = self._aggregate(arg, *args, **kwargs) + if how is None: + return result - if any(isinstance(x, (list, tuple, dict)) for x in arg.values()): - new_arg = OrderedDict() - for k, v in compat.iteritems(arg): - if not isinstance(v, (tuple, list, dict)): - new_arg[k] = [v] - else: - new_arg[k] = v - arg = new_arg - - keys = [] - if self._selection is not None: - subset = obj - if isinstance(subset, DataFrame): - raise NotImplementedError("Aggregating on a DataFrame is " - "not supported") - - for fname, agg_how in compat.iteritems(arg): - colg = SeriesGroupBy(subset, selection=self._selection, - grouper=self.grouper) - result[fname] = colg.aggregate(agg_how) - keys.append(fname) - else: - for col, agg_how in compat.iteritems(arg): - colg = SeriesGroupBy(obj[col], selection=col, - grouper=self.grouper) - result[col] = colg.aggregate(agg_how) - keys.append(col) - - if isinstance(list(result.values())[0], DataFrame): - from pandas.tools.merge import concat - result = concat([result[k] for k in keys], keys=keys, axis=1) - else: - result = DataFrame(result) - elif isinstance(arg, list): - return self._aggregate_multiple_funcs(arg) - else: - cyfunc = _intercept_cython(arg) - if cyfunc and not args and not kwargs: - return getattr(self, cyfunc)() + if result is None: + # grouper specific aggregations if self.grouper.nkeys > 1: return self._python_agg_general(arg, *args, **kwargs) else: @@ -2993,30 +2882,6 @@ def aggregate(self, arg, *args, **kwargs): return result._convert(datetime=True) - def _aggregate_multiple_funcs(self, arg): - from pandas.tools.merge import concat - - if self.axis != 0: - raise NotImplementedError("axis other than 0 is not supported") - - obj = self._obj_with_exclusions - - results = [] - keys = [] - for col in obj: - try: - colg = SeriesGroupBy(obj[col], selection=col, - grouper=self.grouper) - results.append(colg.aggregate(arg)) - keys.append(col) - except (TypeError, DataError): - pass - except SpecificationError: - raise - result = concat(results, keys=keys, axis=1) - - return result - def _aggregate_generic(self, func, *args, **kwargs): if self.grouper.nkeys != 1: raise AssertionError('Number of keys must be 1') @@ -3318,7 +3183,7 @@ def transform(self, func, *args, **kwargs): """ # optimized transforms - func = _intercept_cython(func) or func + func = self._is_cython_func(func) or func if isinstance(func, compat.string_types): if func in _cython_transforms: # cythonized transform @@ -3463,35 +3328,34 @@ class DataFrameGroupBy(NDFrameGroupBy): _block_agg_axis = 1 - def __getitem__(self, key): - if self._selection is not None: - raise Exception('Column(s) %s already selected' % self._selection) + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object - if isinstance(key, (list, tuple, Series, Index, np.ndarray)): - if len(self.obj.columns.intersection(key)) != len(key): - bad_keys = list(set(key).difference(self.obj.columns)) - raise KeyError("Columns not found: %s" - % str(bad_keys)[1:-1]) - return DataFrameGroupBy(self.obj, self.grouper, selection=key, - grouper=self.grouper, - exclusions=self.exclusions, - as_index=self.as_index) + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ - elif not self.as_index: - if key not in self.obj.columns: - raise KeyError("Column not found: %s" % key) - return DataFrameGroupBy(self.obj, self.grouper, selection=key, + if ndim == 2: + if subset is None: + subset = self.obj + return DataFrameGroupBy(subset, self.grouper, selection=key, grouper=self.grouper, exclusions=self.exclusions, as_index=self.as_index) + elif ndim == 1: + if subset is None: + subset = self.obj[key] + return SeriesGroupBy(subset, selection=key, + grouper=self.grouper) - else: - if key not in self.obj: - raise KeyError("Column not found: %s" % key) - # kind of a kludge - return SeriesGroupBy(self.obj[key], selection=key, - grouper=self.grouper, - exclusions=self.exclusions) + raise AssertionError("invalid ndim for _gotitem") def _wrap_generic_output(self, result, obj): result_index = self.grouper.levels[0] @@ -4162,38 +4026,6 @@ def _reorder_by_uniques(uniques, labels): return uniques, labels -_func_table = { - builtins.sum: np.sum, - builtins.max: np.max, - builtins.min: np.min -} - - -_cython_table = { - builtins.sum: 'sum', - builtins.max: 'max', - builtins.min: 'min', - np.sum: 'sum', - np.mean: 'mean', - np.prod: 'prod', - np.std: 'std', - np.var: 'var', - np.median: 'median', - np.max: 'max', - np.min: 'min', - np.cumprod: 'cumprod', - np.cumsum: 'cumsum' -} - - -def _intercept_function(func): - return _func_table.get(func, func) - - -def _intercept_cython(func): - return _cython_table.get(func) - - def _groupby_indices(values): return _algos.groupby_indices(_values_from_object(com._ensure_object(values))) diff --git a/pandas/core/series.py b/pandas/core/series.py index ca55a834a33d2..d6eb18396e14c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2765,6 +2765,7 @@ def _dir_additions(self): aliases={'rows': 0}) Series._add_numeric_operations() Series._add_series_only_operations() +Series._add_series_or_dataframe_operations() _INDEX_TYPES = ndarray, Index, list, tuple #------------------------------------------------------------------------------ diff --git a/pandas/core/window.py b/pandas/core/window.py new file mode 100644 index 0000000000000..5467f7f41fb96 --- /dev/null +++ b/pandas/core/window.py @@ -0,0 +1,1077 @@ +""" + +provide a generic structure to support window functions, +similar to how we have a Groupby object + + +""" +from __future__ import division + +import numpy as np +from functools import wraps +from collections import defaultdict + +import pandas as pd +from pandas.core.base import PandasObject, SelectionMixin, AbstractMethodError +import pandas.core.common as com +import pandas.algos as algos +from pandas import compat +from pandas.util.decorators import Substitution, Appender + +class _Window(PandasObject, SelectionMixin): + _attributes = ['window','min_periods','freq','center','how','win_type','axis'] + exclusions = set() + + def __init__(self, obj, window=None, min_periods=None, freq=None, center=False, + how=None, win_type=None, axis=0): + self.blocks = [] + self.obj = obj + self.window = window + self.min_periods = min_periods + self.freq = freq + self.center = center + self.how = how + self.win_type = win_type + self.axis = axis + self._convert_freq() + self._setup() + + @property + def _constructor(self): + return Window + + def _setup(self): + pass + + def _create_blocks(self): + """ split data into blocks """ + return self._selected_obj.as_blocks(copy=False).values() + + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + + # create a new object to prevent aliasing + if subset is None: + subset = self.obj + new_self = self._shallow_copy(subset) + if ndim==2 and key in subset: + new_self._selection = key + new_self._reset_cache() + return new_self + + def __getattr__(self, attr): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self.obj: + return self[attr] + + raise AttributeError("%r object has no attribute %r" % + (type(self).__name__, attr)) + + def _dir_additions(self): + return self.obj._dir_additions() + + def _get_window(self, other=None): + return self.window + + def __unicode__(self): + """ provide a nice str repr of our rolling object """ + + attrs = [ "{k}->{v}".format(k=k,v=getattr(self,k)) \ + for k in self._attributes if getattr(self,k,None) is not None ] + return "{klass} [{attrs}]".format(klass=self.__class__.__name__, + attrs=','.join(attrs)) + + def _shallow_copy(self, obj=None, **kwargs): + """ return a new object with the replacement attributes """ + if obj is None: + obj = self._selected_obj.copy() + if isinstance(obj, self.__class__): + obj = obj.obj + for attr in self._attributes: + if attr not in kwargs: + kwargs[attr] = getattr(self,attr) + return self._constructor(obj, **kwargs) + + def _prep_values(self, values=None, kill_inf=True): + + if values is None: + values = getattr(self._selected_obj,'values',self._selected_obj) + + # coerce dtypes as appropriate + if com.is_float_dtype(values.dtype): + pass + elif com.is_integer_dtype(values.dtype): + values = values.astype(float) + elif com.is_timedelta64_dtype(values.dtype): + values = values.view('i8').astype(float) + else: + try: + values = values.astype(float) + except (ValueError, TypeError): + raise TypeError("cannot handle this type -> {0}".format(values.dtype)) + + if kill_inf: + values = values.copy() + values[np.isinf(values)] = np.NaN + + return values + + def _wrap_result(self, result, block=None): + """ wrap a single result """ + + obj = self._selected_obj + if isinstance(result, np.ndarray): + + # coerce if necessary + if block is not None: + if com.is_timedelta64_dtype(block.values.dtype): + result = pd.to_timedelta(result.ravel(),unit='ns').values.reshape(result.shape) + + if result.ndim == 1: + from pandas import Series + return Series(result, obj.index, name=obj.name) + + return type(obj)(result, + index=obj.index, + columns=block.columns) + return result + + def _wrap_results(self, results, blocks): + """ wrap lists of results, blocks """ + + obj = self._selected_obj + final = [] + for result, block in zip(results, blocks): + + result = self._wrap_result(result, block) + if result.ndim == 1: + return result + final.append(result) + + if not len(final): + return obj.astype('float64') + return pd.concat(final,axis=1).reindex(columns=obj.columns) + + def _center_window(self, result, window): + """ center the result in the window """ + if self.axis > result.ndim-1: + raise ValueError("Requested axis is larger then no. of argument " + "dimensions") + + from pandas import Series, DataFrame + offset = _offset(window, True) + if offset > 0: + if isinstance(result, (Series, DataFrame)): + result = result.slice_shift(-offset, axis=self.axis) + else: + lead_indexer = [slice(None)] * result.ndim + lead_indexer[self.axis] = slice(offset, None) + result = np.copy(result[tuple(lead_indexer)]) + return result + + def _convert_freq(self): + """ conform to our freq """ + + from pandas import Series, DataFrame + if self.freq is not None and isinstance(self.obj, (Series, DataFrame)): + self.obj = self.obj.resample(self.freq, how=self.how) + + @Appender(SelectionMixin._agg_doc) + def aggregate(self, arg, *args, **kwargs): + result, how = self._aggregate(arg, *args, **kwargs) + if result is None: + import pdb; pdb.set_trace() + return result + +class Window(_Window): + + def _prep_window(self, **kwargs): + """ provide validation for our window type, return the window """ + window = self._get_window() + + if isinstance(window, (list, tuple, np.ndarray)): + return com._asarray_tuplesafe(window).astype(float) + elif com.is_integer(window): + try: + import scipy.signal as sig + except ImportError: + raise ImportError('Please install scipy to generate window weight') + win_type = _validate_win_type(self.win_type, kwargs) # may pop from kwargs + return sig.get_window(win_type, window).astype(float) + + raise ValueError('Invalid window %s' % str(window)) + + def _apply_window(self, mean=True, **kwargs): + """ + Applies a moving window of type ``window_type`` on the data. + + Parameters + ---------- + mean : boolean, default True + If True computes weighted mean, else weighted sum + + Returns + ------- + y : type of input argument + + """ + window = self._prep_window(**kwargs) + center = self.center + + results, blocks = [], self._create_blocks() + for b in blocks: + try: + values = self._prep_values(b.values) + except TypeError: + results.append(b.values.copy()) + continue + + if values.size == 0: + results.append(values.copy()) + continue + + offset = _offset(window, center) + additional_nans = np.array([np.NaN] * offset) + def f(arg, *args, **kwargs): + minp = _use_window(self.min_periods, len(window)) + return algos.roll_window(np.concatenate((arg, additional_nans)) if center else arg, + window, minp, avg=mean) + + result = np.apply_along_axis(f, self.axis, values) + + if center: + result = self._center_window(result, window) + results.append(result) + + return self._wrap_results(results, blocks) + + def sum(self, **kwargs): + return self._apply_window(mean=False, **kwargs) + + def mean(self, **kwargs): + return self._apply_window(mean=True, **kwargs) + +class _Rolling(_Window): + + @property + def _constructor(self): + return Rolling + + def _apply(self, func, window=None, center=None, check_minp=None, how=None, **kwargs): + """ + Rolling statistical measure using supplied function. Designed to be + used with passed-in Cython array-based functions. + + Parameters + ---------- + func : string/callable to apply + window : int/array, default to _get_window() + center : boolean, default to self.center + check_minp : function, default to _use_window + how : string, default to None + + Returns + ------- + y : type of input + """ + + if center is None: + center = self.center + if window is None: + window = self._get_window() + + if check_minp is None: + check_minp = _use_window + + results, blocks = [], self._create_blocks() + for b in blocks: + try: + values = self._prep_values(b.values) + except TypeError: + results.append(b.values.copy()) + continue + + if values.size == 0: + results.append(values.copy()) + continue + + # if we have a string function name, wrap it + if isinstance(func, compat.string_types): + if not hasattr(algos, func): + raise ValueError("we do not support this function algos.{0}".format(func)) + + cfunc = getattr(algos, func) + def func(arg, window, min_periods=None): + minp = check_minp(min_periods, window) + return cfunc(arg, window, minp, **kwargs) + + # calculation function + if center: + offset = _offset(window, center) + additional_nans = np.array([np.NaN] * offset) + def calc(x): + return func(np.concatenate((x, additional_nans)), + window, min_periods=self.min_periods) + else: + def calc(x): + return func(x,window, min_periods=self.min_periods) + + if values.ndim > 1: + result = np.apply_along_axis(calc, self.axis, values) + else: + result = calc(values) + + if center: + result = self._center_window(result, window) + + results.append(result) + + return self._wrap_results(results, blocks) + +class Rolling(_Rolling): + + def count(self): + """ + Rolling count of number of non-NaN observations inside provided window. + + Returns + ------- + same type as input + """ + + obj = self._selected_obj + window = self._get_window() + window = min(window, len(obj)) if not self.center else window + try: + converted = np.isfinite(obj).astype(float) + except TypeError: + converted = np.isfinite(obj.astype(float)).astype(float) + result = self._constructor(converted, + window=window, + min_periods=0, + center=self.center).sum() + + result[result.isnull()] = 0 + return result + + def apply(self, func, args=(), kwargs={}): + """ + Moving function apply + + Parameters + ---------- + func : function + Must produce a single value from an ndarray input + *args and **kwargs are passed to the function + """ + window = self._get_window() + offset = _offset(window, self.center) + def f(arg, window, min_periods): + minp = _use_window(min_periods, window) + return algos.roll_generic(arg, window, minp, offset, func, args, kwargs) + + return self._apply(f, center=False) + + def sum(self): + """ + Moving sum + """ + return self._apply('roll_sum') + + def max(self, how='max'): + """ + Moving max + + Parameters + ---------- + how : string, default max + Method for down- or re-sampling + """ + return self._apply('roll_max', how=how) + + def min(self, how='min'): + """ + Moving min + + Parameters + ---------- + how : string, default min + Method for down- or re-sampling + """ + return self._apply('roll_min', how=how) + + def mean(self): + """ + Moving mean + """ + return self._apply('roll_mean') + + def median(self, how='median'): + """ + Moving median + + Parameters + ---------- + how : string, default median + Method for down- or re-sampling + """ + + return self._apply('roll_median_c', how=how) + + def std(self, ddof=1): + """ + Moving standard deviation + + Parameters + ---------- + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + window = self._get_window() + def f(arg, *args, **kwargs): + minp = _require_min_periods(1)(self.min_periods, window) + return _zsqrt(algos.roll_var(arg, window, minp, ddof)) + + return self._apply(f, check_minp=_require_min_periods(1)) + + def var(self, ddof=1): + """ + Moving variance + + Parameters + ---------- + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + return self._apply('roll_var', + check_minp=_require_min_periods(1), + ddof=ddof) + + def skew(self): + """ + Unbiased moving skewness + """ + return self._apply('roll_skew', + check_minp=_require_min_periods(3)) + + def kurt(self): + """ + Unbiased moving kurtosis + """ + return self._apply('roll_kurt', + check_minp=_require_min_periods(4)) + + def quantile(self, quantile): + """ + Rolling quantile + + Parameters + ---------- + quantile : float + 0 <= quantile <= 1 + """ + window = self._get_window() + def f(arg, *args, **kwargs): + minp = _use_window(self.min_periods, window) + return algos.roll_quantile(arg, window, minp, quantile) + + return self._apply(f) + + def cov(self, other=None, pairwise=False, ddof=1): + """ + Moving sample covariance + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default False + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + if other is None: + other = self._selected_obj + pairwise = True + other = self._shallow_copy(other) + window = self._get_window(other) + + def _get_cov(X, Y): + mean = lambda x: x.rolling(window, self.min_periods, center=self.center).mean() + count = (X+Y).rolling(window=window, center=self.center).count() + bias_adj = count / (count - ddof) + return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) + + def corr(self, other=None, pairwise=False): + """ + Moving sample correlation + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default False + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. + """ + + if other is None: + other = self._selected_obj + pairwise = True + other = self._shallow_copy(other) + window = self._get_window(other) + + def _get_corr(a, b): + a = a.rolling(window=window, + min_periods=self.min_periods, + freq=self.freq, + center=self.center) + b = b.rolling(window=window, + min_periods=self.min_periods, + freq=self.freq, + center=self.center) + + return a.cov(b) / (a.std() * b.std()) + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)) + +class Expanding(Rolling): + _attributes = ['min_periods','freq','center','how','axis'] + + @property + def _constructor(self): + return Expanding + + def _get_window(self, other=None): + obj = self._selected_obj + if other is None: + return max(len(obj), self.min_periods) if self.min_periods else len(obj) + return max((len(obj) + len(obj)), self.min_periods) if self.min_periods else (len(obj) + len(obj)) + +class EWM(_Rolling): + _attributes = ['com','min_periods','freq','adjust','how','ignore_na','axis'] + + def __init__(self, obj, com=None, span=None, halflife=None, min_periods=0, freq=None, + adjust=True, how=None, ignore_na=False, axis=0): + self.obj = obj + self.com = _get_center_of_mass(com, span, halflife) + self.min_periods = min_periods + self.freq = freq + self.adjust = adjust + self.how = how + self.ignore_na = ignore_na + self.axis = axis + self._convert_freq() + + @property + def _constructor(self): + return EWM + + def _apply(self, func, **kwargs): + """ + Rolling statistical measure using supplied function. Designed to be + used with passed-in Cython array-based functions. + + Parameters + ---------- + func : string/callable to apply + + Returns + ------- + y : type of input argument + + """ + results, blocks = [], self._create_blocks() + for b in blocks: + try: + values = self._prep_values(b.values) + except TypeError: + results.append(b.values.copy()) + continue + + if values.size == 0: + results.append(values.copy()) + continue + + # if we have a string function name, wrap it + if isinstance(func, compat.string_types): + if not hasattr(algos, func): + raise ValueError("we do not support this function algos.{0}".format(func)) + + cfunc = getattr(algos, func) + def func(arg): + return cfunc(arg, self.com, int(self.adjust), int(self.ignore_na), int(self.min_periods)) + + results.append(np.apply_along_axis(func, self.axis, values)) + + return self._wrap_results(results, blocks) + + def mean(self): + """ + exponential weighted moving average + """ + return self._apply('ewma') + + def std(self, bias=False): + """ + exponential weighted moving stddev + + Parameters + ---------- + bias : boolean, default False + Use a standard estimation bias correction + """ + return _zsqrt(self.var(bias=bias)) + vol=std + + def var(self, bias=False): + """ + exponential weighted moving average + + Parameters + ---------- + bias : boolean, default False + Use a standard estimation bias correction + """ + def f(arg): + return algos.ewmcov(arg, + arg, + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + int(bias)) + + return self._apply(f) + + def cov(self, other=None, pairwise=False, bias=False): + """ + exponential weighted sample covariance + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default False + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. + bias : boolean, default False + Use a standard estimation bias correction + """ + if other is None: + other = self._selected_obj + pairwise = True + other = self._shallow_copy(other) + + def _get_cov(X, Y): + X = self._shallow_copy(X) + Y = self._shallow_copy(Y) + cov = algos.ewmcov(X._prep_values(), + Y._prep_values(), + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + int(bias)) + return X._wrap_result(cov) + + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) + + def corr(self, other=None, pairwise=False): + """ + exponential weighted sample correlation + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default False + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. + """ + if other is None: + other = self._selected_obj + pairwise = True + other = self._shallow_copy(other) + + def _get_corr(X, Y): + X = self._shallow_copy(X) + Y = self._shallow_copy(Y) + def _cov(x, y): + return algos.ewmcov(x, y, self.com, int(self.adjust), int(self.ignore_na), int(self.min_periods), 1) + + x_values = X._prep_values() + y_values = Y._prep_values() + cov = _cov(x_values, y_values) + x_var = _cov(x_values, x_values) + y_var = _cov(y_values, y_values) + corr = cov / _zsqrt(x_var * y_var) + return X._wrap_result(corr) + + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)) + +######################## +##### Helper Funcs ##### +######################## + +def _flex_binary_moment(arg1, arg2, f, pairwise=False): + from pandas import Series, DataFrame, Panel + if not (isinstance(arg1,(np.ndarray, Series, DataFrame)) and + isinstance(arg2,(np.ndarray, Series, DataFrame))): + raise TypeError("arguments to moment function must be of type " + "np.ndarray/Series/DataFrame") + + if isinstance(arg1, (np.ndarray, Series)) and \ + isinstance(arg2, (np.ndarray,Series)): + X, Y = _prep_binary(arg1, arg2) + return f(X, Y) + + elif isinstance(arg1, DataFrame): + def dataframe_from_int_dict(data, frame_template): + result = DataFrame(data, index=frame_template.index) + if len(result.columns) > 0: + result.columns = frame_template.columns[result.columns] + return result + + results = {} + if isinstance(arg2, DataFrame): + if pairwise is False: + if arg1 is arg2: + # special case in order to handle duplicate column names + for i, col in enumerate(arg1.columns): + results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) + return dataframe_from_int_dict(results, arg1) + else: + if not arg1.columns.is_unique: + raise ValueError("'arg1' columns are not unique") + if not arg2.columns.is_unique: + raise ValueError("'arg2' columns are not unique") + X, Y = arg1.align(arg2, join='outer') + X = X + 0 * Y + Y = Y + 0 * X + res_columns = arg1.columns.union(arg2.columns) + for col in res_columns: + if col in X and col in Y: + results[col] = f(X[col], Y[col]) + return DataFrame(results, index=X.index, columns=res_columns) + elif pairwise is True: + results = defaultdict(dict) + for i, k1 in enumerate(arg1.columns): + for j, k2 in enumerate(arg2.columns): + if j 0: + p.major_axis = arg1.columns[p.major_axis] + if len(p.minor_axis) > 0: + p.minor_axis = arg2.columns[p.minor_axis] + return p + else: + raise ValueError("'pairwise' is not True/False") + else: + results = {} + for i, col in enumerate(arg1.columns): + results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) + return dataframe_from_int_dict(results, arg1) + + else: + return _flex_binary_moment(arg2, arg1, f) + +def _get_center_of_mass(com, span, halflife): + valid_count = len([x for x in [com, span, halflife] if x is not None]) + if valid_count > 1: + raise Exception("com, span, and halflife are mutually exclusive") + + if span is not None: + # convert span to center of mass + com = (span - 1) / 2. + elif halflife is not None: + # convert halflife to center of mass + decay = 1 - np.exp(np.log(0.5) / halflife) + com = 1 / decay - 1 + elif com is None: + raise Exception("Must pass one of com, span, or halflife") + + return float(com) + +def _offset(window, center): + if not com.is_integer(window): + window = len(window) + offset = (window - 1) / 2. if center else 0 + try: + return int(offset) + except: + return offset.astype(int) + +def _require_min_periods(p): + def _check_func(minp, window): + if minp is None: + return window + else: + return max(p, minp) + return _check_func + +def _use_window(minp, window): + if minp is None: + return window + else: + return minp + +def _zsqrt(x): + result = np.sqrt(x) + mask = x < 0 + + from pandas import DataFrame + if isinstance(x, DataFrame): + if mask.values.any(): + result[mask] = 0 + else: + if mask.any(): + result[mask] = 0 + + return result + +def _prep_binary(arg1, arg2): + if not isinstance(arg2, type(arg1)): + raise Exception('Input arrays must be of the same type!') + + # mask out values, this also makes a common index... + X = arg1 + 0 * arg2 + Y = arg2 + 0 * arg1 + + return X, Y + +def _validate_win_type(win_type, kwargs): + # may pop from kwargs + arg_map = {'kaiser': ['beta'], + 'gaussian': ['std'], + 'general_gaussian': ['power', 'width'], + 'slepian': ['width']} + if win_type in arg_map: + return tuple([win_type] + + _pop_args(win_type, arg_map[win_type], kwargs)) + return win_type + + +def _pop_args(win_type, arg_names, kwargs): + msg = '%s window requires %%s' % win_type + all_args = [] + for n in arg_names: + if n not in kwargs: + raise ValueError(msg % n) + all_args.append(kwargs.pop(n)) + return all_args + +############################# +##### top-level exports ##### +############################# + +def rolling(obj, win_type=None, **kwds): + """ + Provides rolling transformations. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + window : int + Size of the moving window. This is the number of observations used for + calculating the statistic. + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : string or DateOffset object, optional (default None) + Frequency to conform the data to before computing the statistic. Specified + as a frequency string or DateOffset object. + center : boolean, default False + Set the labels at the center of the window. + how : string, default None + Method for down- or re-sampling + win_type : string, default None + prove a window type, see the notes below + axis : int, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + + The recognized window types are: + + * ``boxcar`` + * ``triang`` + * ``blackman`` + * ``hamming`` + * ``bartlett`` + * ``parzen`` + * ``bohman`` + * ``blackmanharris`` + * ``nuttall`` + * ``barthann`` + * ``kaiser`` (needs beta) + * ``gaussian`` (needs std) + * ``general_gaussian`` (needs power, width) + * ``slepian`` (needs width). + """ + from pandas import Series, DataFrame + if not isinstance(obj, (Series, DataFrame)): + raise TypeError('invalid type: %s' % type(obj)) + + if win_type is not None: + return Window(obj, win_type=win_type, **kwds) + + return Rolling(obj, **kwds) + +def expanding(obj, **kwds): + """ + Provides expanding transformations. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : string or DateOffset object, optional (default None) + Frequency to conform the data to before computing the statistic. Specified + as a frequency string or DateOffset object. + center : boolean, default False + Set the labels at the center of the window. + how : string, default None + Method for down- or re-sampling + axis : int, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + """ + + from pandas import Series, DataFrame + if not isinstance(obj, (Series, DataFrame)): + raise TypeError('invalid type: %s' % type(obj)) + + return Expanding(obj, **kwds) + +def ewm(obj, **kwds): + """ + .. versionadded:: 0.18.0 + + Provides exponential weighted functions + + Parameters + ---------- + com : float. optional + Center of mass: :math:`\alpha = 1 / (1 + com)`, + span : float, optional + Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)` + halflife : float, optional + Specify decay in terms of halflife, :math:`\alpha = 1 - exp(log(0.5) / halflife)` + min_periods : int, default 0 + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : None or string alias / date offset object, default=None + Frequency to conform to before computing statistic + adjust : boolean, default True + Divide by decaying adjustment factor in beginning periods to account for + imbalance in relative weightings (viewing EWMA as a moving average) + how : string, default 'mean' + Method for down- or re-sampling + ignore_na : boolean, default False + Ignore missing values when calculating weights; + specify True to reproduce pre-0.15.0 behavior + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + Either center of mass, span or halflife must be specified + + EWMA is sometimes specified using a "span" parameter `s`, we have that the + decay parameter :math:`\alpha` is related to the span as + :math:`\alpha = 2 / (s + 1) = 1 / (1 + c)` + + where `c` is the center of mass. Given a span, the associated center of mass is + :math:`c = (s - 1) / 2` + + So a "20-day EWMA" would have center 9.5. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + + When adjust is True (default), weighted averages are calculated using weights + (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. + + When adjust is False, weighted averages are calculated recursively as: + weighted_average[0] = arg[0]; + weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. + + When ignore_na is False (default), weights are based on absolute positions. + For example, the weights of x and y used in calculating the final weighted + average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and + (1-alpha)**2 and alpha (if adjust is False). + + When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based on + relative positions. For example, the weights of x and y used in calculating + the final weighted average of [x, None, y] are 1-alpha and 1 (if adjust is + True), and 1-alpha and alpha (if adjust is False). + + More details can be found at + http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions + """ + from pandas import Series, DataFrame + if not isinstance(obj, (Series, DataFrame)): + raise TypeError('invalid type: %s' % type(obj)) + + return EWM(obj, **kwds) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 3cddae45e7516..c6cff614de9b1 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -4,16 +4,8 @@ """ from __future__ import division -from functools import wraps -from collections import defaultdict - -from numpy import NaN import numpy as np - -from pandas.core.api import DataFrame, Series, Panel, notnull -import pandas.algos as algos -import pandas.core.common as pdcom - +from pandas.core.api import DataFrame, Series from pandas.util.decorators import Substitution, Appender __all__ = ['rolling_count', 'rolling_max', 'rolling_min', @@ -179,8 +171,38 @@ Use a standard estimation bias correction """ +def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): + """ + wrapper function to dispatch to the appropriate window functions + wraps/unwraps ndarrays for compat + + can be removed when ndarray support is removed + """ + is_ndarray = isinstance(arg, np.ndarray) + if is_ndarray: + if arg.ndim == 1: + arg = Series(arg) + elif arg.ndim == 2: + arg = DataFrame(arg) + else: + raise AssertionError("cannot support ndim > 2 for ndarray compat") + + # get the functional keywords here + if func_kw is None: + func_kw = [] + kwds = {} + for k in func_kw: + value = kwargs.pop(k,None) + if value is not None: + kwds[k] = value + r = getattr(arg,dispatch)(**kwargs) + result = getattr(r,name)(*args, **kwds) + + if is_ndarray: + result = result.values + return result -def rolling_count(arg, window, freq=None, center=False, how=None): +def rolling_count(arg, window, **kwargs): """ Rolling count of number of non-NaN observations inside provided window. @@ -208,26 +230,12 @@ def rolling_count(arg, window, freq=None, center=False, how=None): frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - arg = _conv_timerule(arg, freq, how) - if not center: - window = min(window, len(arg)) - - return_hook, values = _process_data_structure(arg, kill_inf=False) - - converted = np.isfinite(values).astype(float) - result = rolling_sum(converted, window, min_periods=0, - center=center) # already converted - - # putmask here? - result[np.isnan(result)] = 0 - return return_hook(result) - + return ensure_compat('rolling', 'count', arg, window=window, **kwargs) @Substitution("Unbiased moving covariance.", _binary_arg_flex, _roll_kw%'None'+_pairwise_kw+_ddof_kw, _flex_retval, _roll_notes) @Appender(_doc_template) -def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, - center=False, pairwise=None, how=None, ddof=1): +def rolling_cov(arg1, arg2=None, window=None, pairwise=None, **kwargs): if window is None and isinstance(arg2, (int, float)): window = arg2 arg2 = arg1 @@ -235,23 +243,19 @@ def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, elif arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise # only default unset - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - - def _get_cov(X, Y): - mean = lambda x: rolling_mean(x, window, min_periods, center=center) - count = rolling_count(X + Y, window, center=center) - bias_adj = count / (count - ddof) - return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj - rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise)) - return rs - + return ensure_compat('rolling', + 'cov', + arg1, + other=arg2, + window=window, + pairwise=pairwise, + func_kw=['other','pairwise','ddof'], + **kwargs) @Substitution("Moving sample correlation.", _binary_arg_flex, _roll_kw%'None'+_pairwise_kw, _flex_retval, _roll_notes) @Appender(_doc_template) -def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None, - center=False, pairwise=None, how=None): +def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs): if window is None and isinstance(arg2, (int, float)): window = arg2 arg2 = arg1 @@ -259,86 +263,14 @@ def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None, elif arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise # only default unset - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - - def _get_corr(a, b): - num = rolling_cov(a, b, window, min_periods, freq=freq, - center=center) - den = (rolling_std(a, window, min_periods, freq=freq, - center=center) * - rolling_std(b, window, min_periods, freq=freq, - center=center)) - return num / den - - return _flex_binary_moment(arg1, arg2, _get_corr, pairwise=bool(pairwise)) - - -def _flex_binary_moment(arg1, arg2, f, pairwise=False): - if not (isinstance(arg1,(np.ndarray, Series, DataFrame)) and - isinstance(arg2,(np.ndarray, Series, DataFrame))): - raise TypeError("arguments to moment function must be of type " - "np.ndarray/Series/DataFrame") - - if isinstance(arg1, (np.ndarray, Series)) and \ - isinstance(arg2, (np.ndarray,Series)): - X, Y = _prep_binary(arg1, arg2) - return f(X, Y) - - elif isinstance(arg1, DataFrame): - def dataframe_from_int_dict(data, frame_template): - result = DataFrame(data, index=frame_template.index) - if len(result.columns) > 0: - result.columns = frame_template.columns[result.columns] - return result - - results = {} - if isinstance(arg2, DataFrame): - if pairwise is False: - if arg1 is arg2: - # special case in order to handle duplicate column names - for i, col in enumerate(arg1.columns): - results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) - return dataframe_from_int_dict(results, arg1) - else: - if not arg1.columns.is_unique: - raise ValueError("'arg1' columns are not unique") - if not arg2.columns.is_unique: - raise ValueError("'arg2' columns are not unique") - X, Y = arg1.align(arg2, join='outer') - X = X + 0 * Y - Y = Y + 0 * X - res_columns = arg1.columns.union(arg2.columns) - for col in res_columns: - if col in X and col in Y: - results[col] = f(X[col], Y[col]) - return DataFrame(results, index=X.index, columns=res_columns) - elif pairwise is True: - results = defaultdict(dict) - for i, k1 in enumerate(arg1.columns): - for j, k2 in enumerate(arg2.columns): - if j 0: - p.major_axis = arg1.columns[p.major_axis] - if len(p.minor_axis) > 0: - p.minor_axis = arg2.columns[p.minor_axis] - return p - else: - raise ValueError("'pairwise' is not True/False") - else: - results = {} - for i, col in enumerate(arg1.columns): - results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) - return dataframe_from_int_dict(results, arg1) - - else: - return _flex_binary_moment(arg2, arg1, f) - + return ensure_compat('rolling', + 'corr', + arg1, + other=arg2, + window=window, + pairwise=pairwise, + func_kw=['other','pairwise'], + **kwargs) @Substitution("Deprecated. Use rolling_corr(..., pairwise=True) instead.\n\n" "Pairwise moving sample correlation", _pairwise_arg, @@ -354,164 +286,65 @@ def rolling_corr_pairwise(df1, df2=None, window=None, min_periods=None, pairwise=True) -def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False, - how=None, args=(), kwargs={}, **kwds): - """ - Rolling statistical measure using supplied function. Designed to be - used with passed-in Cython array-based functions. - - Parameters - ---------- - arg : DataFrame or numpy ndarray-like - window : Number of observations used for calculating statistic - func : Cython function to compute rolling statistic on raw series - minp : int - Minimum number of observations required to have a value - axis : int, default 0 - freq : None or string alias / date offset object, default=None - Frequency to conform to before computing statistic - center : boolean, default False - Whether the label should correspond with center of window - how : string, default 'mean' - Method for down- or re-sampling - args : tuple - Passed on to func - kwargs : dict - Passed on to func - - Returns - ------- - y : type of input - """ - arg = _conv_timerule(arg, freq, how) - - return_hook, values = _process_data_structure(arg) - - if values.size == 0: - result = values.copy() - else: - # actually calculate the moment. Faster way to do this? - offset = int((window - 1) / 2.) if center else 0 - additional_nans = np.array([np.NaN] * offset) - calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x, - window, minp=minp, args=args, kwargs=kwargs, - **kwds) - if values.ndim > 1: - result = np.apply_along_axis(calc, axis, values) - else: - result = calc(values) - - if center: - result = _center_window(result, window, axis) - - return return_hook(result) - - -def _center_window(rs, window, axis): - if axis > rs.ndim-1: - raise ValueError("Requested axis is larger then no. of argument " - "dimensions") - - offset = int((window - 1) / 2.) - if offset > 0: - if isinstance(rs, (Series, DataFrame, Panel)): - rs = rs.slice_shift(-offset, axis=axis) - else: - lead_indexer = [slice(None)] * rs.ndim - lead_indexer[axis] = slice(offset, None) - rs = np.copy(rs[tuple(lead_indexer)]) - return rs - - -def _process_data_structure(arg, kill_inf=True): - if isinstance(arg, DataFrame): - return_hook = lambda v: type(arg)(v, index=arg.index, - columns=arg.columns) - values = arg.values - elif isinstance(arg, Series): - values = arg.values - return_hook = lambda v: Series(v, arg.index, name=arg.name) - else: - return_hook = lambda v: v - values = arg - - if not issubclass(values.dtype.type, float): - values = values.astype(float) - - if kill_inf: - values = values.copy() - values[np.isinf(values)] = np.NaN - - return return_hook, values #------------------------------------------------------------------------------ # Exponential moving moments -def _get_center_of_mass(com, span, halflife): - valid_count = len([x for x in [com, span, halflife] if x is not None]) - if valid_count > 1: - raise Exception("com, span, and halflife are mutually exclusive") - - if span is not None: - # convert span to center of mass - com = (span - 1) / 2. - elif halflife is not None: - # convert halflife to center of mass - decay = 1 - np.exp(np.log(0.5) / halflife) - com = 1 / decay - 1 - elif com is None: - raise Exception("Must pass one of com, span, or halflife") - - return float(com) - - @Substitution("Exponentially-weighted moving average", _unary_arg, _ewm_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None, adjust=True, how=None, ignore_na=False): - arg = _conv_timerule(arg, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _ewma(v): - return algos.ewma(v, com, int(adjust), int(ignore_na), int(min_periods)) - - return_hook, values = _process_data_structure(arg) - if values.size == 0: - output = values.copy() - else: - output = np.apply_along_axis(_ewma, 0, values) - return return_hook(output) - + return ensure_compat('ewm', + 'mean', + arg, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + adjust=adjust, + how=how, + ignore_na=ignore_na) @Substitution("Exponentially-weighted moving variance", _unary_arg, _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, freq=None, how=None, ignore_na=False, adjust=True): - arg = _conv_timerule(arg, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _ewmvar(v): - return algos.ewmcov(v, v, com, int(adjust), int(ignore_na), int(min_periods), int(bias)) - - return_hook, values = _process_data_structure(arg) - if values.size == 0: - output = values.copy() - else: - output = np.apply_along_axis(_ewmvar, 0, values) - return return_hook(output) - + return ensure_compat('ewm', + 'var', + arg, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + adjust=adjust, + how=how, + ignore_na=ignore_na, + bias=bias, + func_kw=['bias']) @Substitution("Exponentially-weighted moving std", _unary_arg, _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, - ignore_na=False, adjust=True): - result = ewmvar(arg, com=com, span=span, halflife=halflife, - min_periods=min_periods, bias=bias, adjust=adjust, ignore_na=ignore_na) - return _zsqrt(result) + freq=None, how=None, ignore_na=False, adjust=True): + return ensure_compat('ewm', + 'std', + arg, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + adjust=adjust, + how=how, + ignore_na=ignore_na, + bias=bias, + func_kw=['bias']) ewmvol = ewmstd @@ -528,21 +361,22 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, com = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _get_ewmcov(X, Y): - # X and Y have the same structure (and NaNs) when called from _flex_binary_moment() - return_hook, x_values = _process_data_structure(X) - return_hook, y_values = _process_data_structure(Y) - cov = algos.ewmcov(x_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), int(bias)) - return return_hook(cov) - - result = _flex_binary_moment(arg1, arg2, _get_ewmcov, - pairwise=bool(pairwise)) - return result + return ensure_compat('ewm', + 'cov', + arg1, + other=arg2, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + bias=bias, + freq=freq, + how=how, + ignore_na=ignore_na, + adjust=adjust, + pairwise=pairwise, + func_kw=['other','pairwise','bias']) @Substitution("Exponentially-weighted moving correlation", _binary_arg_flex, _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) @@ -556,80 +390,26 @@ def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, com = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _get_ewmcorr(X, Y): - # X and Y have the same structure (and NaNs) when called from _flex_binary_moment() - return_hook, x_values = _process_data_structure(X) - return_hook, y_values = _process_data_structure(Y) - cov = algos.ewmcov(x_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), 1) - x_var = algos.ewmcov(x_values, x_values, com, int(adjust), int(ignore_na), int(min_periods), 1) - y_var = algos.ewmcov(y_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), 1) - corr = cov / _zsqrt(x_var * y_var) - return return_hook(corr) - - result = _flex_binary_moment(arg1, arg2, _get_ewmcorr, - pairwise=bool(pairwise)) - return result - - -def _zsqrt(x): - result = np.sqrt(x) - mask = x < 0 - - if isinstance(x, DataFrame): - if mask.values.any(): - result[mask] = 0 - else: - if mask.any(): - result[mask] = 0 - - return result - - -def _prep_binary(arg1, arg2): - if not isinstance(arg2, type(arg1)): - raise Exception('Input arrays must be of the same type!') - - # mask out values, this also makes a common index... - X = arg1 + 0 * arg2 - Y = arg2 + 0 * arg1 - - return X, Y + return ensure_compat('ewm', + 'corr', + arg1, + other=arg2, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + how=how, + ignore_na=ignore_na, + adjust=adjust, + pairwise=pairwise, + func_kw=['other','pairwise']) #---------------------------------------------------------------------- # Python interface to Cython functions -def _conv_timerule(arg, freq, how): - - types = (DataFrame, Series) - if freq is not None and isinstance(arg, types): - # Conform to whatever frequency needed. - arg = arg.resample(freq, how=how) - - return arg - - -def _require_min_periods(p): - def _check_func(minp, window): - if minp is None: - return window - else: - return max(p, minp) - return _check_func - - -def _use_window(minp, window): - if minp is None: - return window - else: - return minp - - -def _rolling_func(func, desc, check_minp=_use_window, how=None, additional_kw=''): +def _rolling_func(name, desc, how=None, func_kw=None, additional_kw=''): if how is None: how_arg_str = 'None' else: @@ -638,36 +418,33 @@ def _rolling_func(func, desc, check_minp=_use_window, how=None, additional_kw='' @Substitution(desc, _unary_arg, _roll_kw%how_arg_str + additional_kw, _type_of_input_retval, _roll_notes) @Appender(_doc_template) - @wraps(func) def f(arg, window, min_periods=None, freq=None, center=False, how=how, **kwargs): - def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): - minp = check_minp(minp, window) - return func(arg, window, minp, **kwds) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=center, how=how, **kwargs) - + return ensure_compat('rolling', + name, + arg, + window=window, + min_periods=min_periods, + freq=freq, + center=center, + how=how, + func_kw=func_kw, + **kwargs) return f -rolling_max = _rolling_func(algos.roll_max, 'Moving maximum.', how='max') -rolling_min = _rolling_func(algos.roll_min, 'Moving minimum.', how='min') -rolling_sum = _rolling_func(algos.roll_sum, 'Moving sum.') -rolling_mean = _rolling_func(algos.roll_mean, 'Moving mean.') -rolling_median = _rolling_func(algos.roll_median_c, 'Moving median.', - how='median') - -_ts_std = lambda *a, **kw: _zsqrt(algos.roll_var(*a, **kw)) -rolling_std = _rolling_func(_ts_std, 'Moving standard deviation.', - check_minp=_require_min_periods(1), +rolling_max = _rolling_func('max', 'Moving maximum.', how='max') +rolling_min = _rolling_func('min', 'Moving minimum.', how='min') +rolling_sum = _rolling_func('sum', 'Moving sum.') +rolling_mean = _rolling_func('mean', 'Moving mean.') +rolling_median = _rolling_func('median', 'Moving median.', how='median') +rolling_std = _rolling_func('std', 'Moving standard deviation.', + func_kw=['ddof'], additional_kw=_ddof_kw) -rolling_var = _rolling_func(algos.roll_var, 'Moving variance.', - check_minp=_require_min_periods(1), +rolling_var = _rolling_func('var', 'Moving variance.', + func_kw=['ddof'], additional_kw=_ddof_kw) -rolling_skew = _rolling_func(algos.roll_skew, 'Unbiased moving skewness.', - check_minp=_require_min_periods(3)) -rolling_kurt = _rolling_func(algos.roll_kurt, 'Unbiased moving kurtosis.', - check_minp=_require_min_periods(4)) - +rolling_skew = _rolling_func('skew', 'Unbiased moving skewness.') +rolling_kurt = _rolling_func('kurt', 'Unbiased moving kurtosis.') def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, center=False): @@ -703,12 +480,15 @@ def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - - def call_cython(arg, window, minp, args=(), kwargs={}): - minp = _use_window(minp, window) - return algos.roll_quantile(arg, window, minp, quantile) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=center) + return ensure_compat('rolling', + 'quantile', + arg, + window=window, + freq=freq, + center=center, + min_periods=min_periods, + func_kw=['quantile'], + quantile=quantile) def rolling_apply(arg, window, func, min_periods=None, freq=None, @@ -749,12 +529,17 @@ def rolling_apply(arg, window, func, min_periods=None, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - offset = int((window - 1) / 2.) if center else 0 - def call_cython(arg, window, minp, args, kwargs): - minp = _use_window(minp, window) - return algos.roll_generic(arg, window, minp, offset, func, args, kwargs) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=False, args=args, kwargs=kwargs) + return ensure_compat('rolling', + 'apply', + arg, + window=window, + freq=freq, + center=center, + min_periods=min_periods, + func_kw=['func','args','kwargs'], + func=func, + args=args, + kwargs=kwargs) def rolling_window(arg, window=None, win_type=None, min_periods=None, @@ -816,97 +601,48 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - if isinstance(window, (list, tuple, np.ndarray)): - if win_type is not None: - raise ValueError(('Do not specify window type if using custom ' - 'weights')) - window = pdcom._asarray_tuplesafe(window).astype(float) - elif pdcom.is_integer(window): # window size - if win_type is None: - raise ValueError('Must specify window type') - try: - import scipy.signal as sig - except ImportError: - raise ImportError('Please install scipy to generate window weight') - win_type = _validate_win_type(win_type, kwargs) # may pop from kwargs - window = sig.get_window(win_type, window).astype(float) - else: - raise ValueError('Invalid window %s' % str(window)) - - minp = _use_window(min_periods, len(window)) - - arg = _conv_timerule(arg, freq, how) - return_hook, values = _process_data_structure(arg) - - if values.size == 0: - result = values.copy() - else: - offset = int((len(window) - 1) / 2.) if center else 0 - additional_nans = np.array([np.NaN] * offset) - f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x, - window, minp, avg=mean) - result = np.apply_along_axis(f, axis, values) - - if center: - result = _center_window(result, len(window), axis) - - return return_hook(result) - - -def _validate_win_type(win_type, kwargs): - # may pop from kwargs - arg_map = {'kaiser': ['beta'], - 'gaussian': ['std'], - 'general_gaussian': ['power', 'width'], - 'slepian': ['width']} - if win_type in arg_map: - return tuple([win_type] + - _pop_args(win_type, arg_map[win_type], kwargs)) - return win_type - - -def _pop_args(win_type, arg_names, kwargs): - msg = '%s window requires %%s' % win_type - all_args = [] - for n in arg_names: - if n not in kwargs: - raise ValueError(msg % n) - all_args.append(kwargs.pop(n)) - return all_args - - -def _expanding_func(func, desc, check_minp=_use_window, additional_kw=''): + func = 'mean' if mean else 'sum' + return ensure_compat('rolling', + func, + arg, + window=window, + win_type=win_type, + freq=freq, + center=center, + min_periods=min_periods, + axis=axis, + how=how, + func_kw=kwargs.keys(), + **kwargs) + +def _expanding_func(name, desc, func_kw=None, additional_kw=''): @Substitution(desc, _unary_arg, _expanding_kw + additional_kw, _type_of_input_retval, "") @Appender(_doc_template) - @wraps(func) def f(arg, min_periods=1, freq=None, **kwargs): - window = max(len(arg), min_periods) if min_periods else len(arg) - - def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): - minp = check_minp(minp, window) - return func(arg, window, minp, **kwds) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - **kwargs) - + return ensure_compat('expanding', + name, + arg, + min_periods=min_periods, + freq=freq, + func_kw=func_kw, + **kwargs) return f -expanding_max = _expanding_func(algos.roll_max, 'Expanding maximum.') -expanding_min = _expanding_func(algos.roll_min, 'Expanding minimum.') -expanding_sum = _expanding_func(algos.roll_sum, 'Expanding sum.') -expanding_mean = _expanding_func(algos.roll_mean, 'Expanding mean.') -expanding_median = _expanding_func(algos.roll_median_c, 'Expanding median.') +expanding_max = _expanding_func('max', 'Expanding maximum.') +expanding_min = _expanding_func('min', 'Expanding minimum.') +expanding_sum = _expanding_func('sum', 'Expanding sum.') +expanding_mean = _expanding_func('mean', 'Expanding mean.') +expanding_median = _expanding_func('median', 'Expanding median.') -expanding_std = _expanding_func(_ts_std, 'Expanding standard deviation.', - check_minp=_require_min_periods(1), +expanding_std = _expanding_func('std', 'Expanding standard deviation.', + func_kw=['ddof'], additional_kw=_ddof_kw) -expanding_var = _expanding_func(algos.roll_var, 'Expanding variance.', - check_minp=_require_min_periods(1), +expanding_var = _expanding_func('var', 'Expanding variance.', + func_kw=['ddof'], additional_kw=_ddof_kw) -expanding_skew = _expanding_func(algos.roll_skew, 'Unbiased expanding skewness.', - check_minp=_require_min_periods(3)) -expanding_kurt = _expanding_func(algos.roll_kurt, 'Unbiased expanding kurtosis.', - check_minp=_require_min_periods(4)) +expanding_skew = _expanding_func('skew', 'Unbiased expanding skewness.') +expanding_kurt = _expanding_func('kurt', 'Unbiased expanding kurtosis.') def expanding_count(arg, freq=None): @@ -930,7 +666,7 @@ def expanding_count(arg, freq=None): frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - return rolling_count(arg, len(arg), freq=freq) + return ensure_compat('expanding', 'count', arg, freq=freq) def expanding_quantile(arg, quantile, min_periods=1, freq=None): @@ -958,9 +694,13 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None): frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods, - freq=freq) - + return ensure_compat('expanding', + 'quantile', + arg, + freq=freq, + min_periods=min_periods, + func_kw=['quantile'], + quantile=quantile) @Substitution("Unbiased expanding covariance.", _binary_arg_flex, _expanding_kw+_pairwise_kw+_ddof_kw, _flex_retval, "") @@ -973,10 +713,15 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None, ddof min_periods = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - window = max((len(arg1) + len(arg2)), min_periods) if min_periods else (len(arg1) + len(arg2)) - return rolling_cov(arg1, arg2, window, - min_periods=min_periods, freq=freq, - pairwise=pairwise, ddof=ddof) + return ensure_compat('expanding', + 'cov', + arg1, + other=arg2, + min_periods=min_periods, + pairwise=pairwise, + freq=freq, + ddof=ddof, + func_kw=['other','pairwise','ddof']) @Substitution("Expanding sample correlation.", _binary_arg_flex, @@ -990,11 +735,14 @@ def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): min_periods = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - window = max((len(arg1) + len(arg2)), min_periods) if min_periods else (len(arg1) + len(arg2)) - return rolling_corr(arg1, arg2, window, - min_periods=min_periods, - freq=freq, pairwise=pairwise) - + return ensure_compat('expanding', + 'corr', + arg1, + other=arg2, + min_periods=min_periods, + pairwise=pairwise, + freq=freq, + func_kw=['other','pairwise','ddof']) @Substitution("Deprecated. Use expanding_corr(..., pairwise=True) instead.\n\n" "Pairwise expanding sample correlation", _pairwise_arg, @@ -1038,6 +786,12 @@ def expanding_apply(arg, func, min_periods=1, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - window = max(len(arg), min_periods) if min_periods else len(arg) - return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq, - args=args, kwargs=kwargs) + return ensure_compat('expanding', + 'apply', + arg, + freq=freq, + min_periods=min_periods, + func_kw=['func','args','kwargs'], + func=func, + args=args, + kwargs=kwargs) diff --git a/pandas/stats/tests/test_moments.py b/pandas/tests/test_window.py similarity index 85% rename from pandas/stats/tests/test_moments.py rename to pandas/tests/test_window.py index b9efa875735d2..116236ae7e422 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/tests/test_window.py @@ -9,12 +9,14 @@ import numpy as np from distutils.version import LooseVersion +import pandas as pd from pandas import Series, DataFrame, Panel, bdate_range, isnull, notnull, concat from pandas.util.testing import ( assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_index_equal ) import pandas.core.datetools as datetools import pandas.stats.moments as mom +import pandas.core.window as rwindow import pandas.util.testing as tm from pandas.compat import range, zip, PY3, StringIO @@ -33,12 +35,87 @@ def _create_data(self): self.arr = arr self.rng = bdate_range(datetime(2009, 1, 1), periods=N) - self.series = Series(arr.copy(), index=self.rng) - self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K)) +class TestApi(Base): + + def setUp(self): + self._create_data() + + def test_getitem(self): + + r = self.frame.rolling(window=5) + tm.assert_index_equal(r._selected_obj.columns,self.frame.columns) + + r = self.frame.rolling(window=5)[1] + self.assertEqual(r._selected_obj.name,self.frame.columns[1]) + + r = self.frame.rolling(window=5)[1,3] + tm.assert_index_equal(r._selected_obj.columns,self.frame.columns[[1,3]]) + + def test_select_bad_cols(self): + df = DataFrame([[1, 2]], columns=['A', 'B']) + g = df.rolling(window=5) + self.assertRaises(KeyError, g.__getitem__, ['C']) # g[['C']] + + self.assertRaises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] + with tm.assertRaisesRegexp(KeyError, '^[^A]+$'): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[['A', 'C']] + + def test_attribute_access(self): + + df = DataFrame([[1, 2]], columns=['A', 'B']) + r = df.rolling(window=5) + tm.assert_series_equal(r.A.sum(),r['A'].sum()) + self.assertRaises(AttributeError, lambda : r.F) + + def tests_skip_nuiscance(self): + + df = DataFrame({'A' : range(5), 'B' : range(5,10), 'C' : 'foo'}) + + r = df.rolling(window=3) + result = r[['A','B']].sum() + expected = DataFrame({'A' : [np.nan,np.nan,3,6,9], + 'B' : [np.nan,np.nan,18,21,24]}, + columns=list('AB')) + assert_frame_equal(result, expected) + + expected = pd.concat([r[['A','B']].sum(),df[['C']]],axis=1) + result = r.sum() + assert_frame_equal(result, expected) + + def test_timedeltas(self): + + df = DataFrame({'A' : range(5), 'B' : pd.timedelta_range('1 day',periods=5)}) + r = df.rolling(window=3) + result = r.sum() + expected = DataFrame({'A' : [np.nan,np.nan,3,6,9], + 'B' : pd.to_timedelta([pd.NaT,pd.NaT,'6 days','9 days','12 days'])}, + columns=list('AB')) + assert_frame_equal(result, expected) + + def test_agg(self): + df = DataFrame({'A' : range(5), + 'B' : range(0,10,2)}) + + r = df.rolling(window=3) + + import pdb; pdb.set_trace() + agged = r.aggregate([np.mean, np.std]) + agged = r.aggregate({'A': np.mean, + 'B': np.std}) + agged = r.aggregate({'A': ['mean','sum']}) + agged = r['A'].aggregate(['mean','sum']) + agged = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' } }) + agged = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' }, + 'B': { 'mean2' : 'mean', 'sum2' : 'sum' }}) + agged = r.aggregate({'r1': { 'A' : ['mean','sum'] }, + 'r2' : { 'B' : ['mean','sum'] }}) + class TestMoments(Base): def setUp(self): @@ -57,17 +134,18 @@ def test_centered_axis_validation(self): self.assertRaises(ValueError, mom.rolling_mean,DataFrame(np.ones((10,10))),3,center=True ,axis=2) def test_rolling_sum(self): - self._check_moment_func(mom.rolling_sum, np.sum) + self._check_moment_func(mom.rolling_sum, np.sum, name='sum') def test_rolling_count(self): counter = lambda x: np.isfinite(x).astype(float).sum() self._check_moment_func(mom.rolling_count, counter, + name='count', has_min_periods=False, preserve_nan=False, fill_value=0) def test_rolling_mean(self): - self._check_moment_func(mom.rolling_mean, np.mean) + self._check_moment_func(mom.rolling_mean, np.mean, name='mean') def test_cmov_mean(self): # GH 8238 @@ -101,6 +179,9 @@ def test_cmov_window(self): rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type='boxcar', center=True).mean() + assert_series_equal(xp, rs) + def test_cmov_window_corner(self): # GH 8238 tm._skip_if_no_scipy() @@ -152,6 +233,27 @@ def test_cmov_window_frame(self): rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True) assert_frame_equal(DataFrame(xp), rs) + rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).mean() + assert_frame_equal(DataFrame(xp), rs) + + # invalid method + self.assertRaises(AttributeError, lambda : DataFrame(vals).rolling(5, win_type='boxcar', center=True).std()) + + # sum + xp = np.array([[ np.nan, np.nan], + [ np.nan, np.nan], + [ 46.26, 46.96], + [ 43.22, 49.53], + [ 44.35, 51.04], + [ 34.05, 42.94], + [ 38.96, 43.22], + [ 45.25, 39.12], + [ np.nan, np.nan], + [ np.nan, np.nan]]) + + rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).sum() + assert_frame_equal(DataFrame(xp), rs) + def test_cmov_window_na_min_periods(self): tm._skip_if_no_scipy() @@ -162,7 +264,9 @@ def test_cmov_window_na_min_periods(self): xp = mom.rolling_mean(vals, 5, min_periods=4, center=True) rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True) + assert_series_equal(xp, rs) + rs = vals.rolling(5, win_type='boxcar', min_periods=4, center=True).mean() assert_series_equal(xp, rs) def test_cmov_window_regular(self): @@ -197,6 +301,9 @@ def test_cmov_window_regular(self): rs = mom.rolling_window(Series(vals), 5, wt, center=True) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean() + assert_series_equal(xp, rs) + def test_cmov_window_regular_linear_range(self): # GH 8238 tm._skip_if_no_scipy() @@ -214,6 +321,9 @@ def test_cmov_window_regular_linear_range(self): rs = mom.rolling_window(Series(vals), 5, wt, center=True) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean() + assert_series_equal(xp, rs) + def test_cmov_window_regular_missing_data(self): # GH 8238 tm._skip_if_no_scipy() @@ -248,6 +358,9 @@ def test_cmov_window_regular_missing_data(self): rs = mom.rolling_window(Series(vals), 5, wt, min_periods=3) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=wt, min_periods=3).mean() + assert_series_equal(xp, rs) + def test_cmov_window_special(self): # GH 8238 tm._skip_if_no_scipy() @@ -278,6 +391,9 @@ def test_cmov_window_special(self): **k) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) + assert_series_equal(xp, rs) + def test_cmov_window_special_linear_range(self): # GH 8238 tm._skip_if_no_scipy() @@ -297,11 +413,14 @@ def test_cmov_window_special_linear_range(self): **k) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) + assert_series_equal(xp, rs) + def test_rolling_median(self): - self._check_moment_func(mom.rolling_median, np.median) + self._check_moment_func(mom.rolling_median, np.median, name='median') def test_rolling_min(self): - self._check_moment_func(mom.rolling_min, np.min) + self._check_moment_func(mom.rolling_min, np.min, name='min') a = np.array([1, 2, 3, 4, 5]) b = mom.rolling_min(a, window=100, min_periods=1) @@ -311,7 +430,7 @@ def test_rolling_min(self): 2, 3]), window=3, min_periods=5) def test_rolling_max(self): - self._check_moment_func(mom.rolling_max, np.max) + self._check_moment_func(mom.rolling_max, np.max, name='max') a = np.array([1, 2, 3, 4, 5]) b = mom.rolling_max(a, window=100, min_periods=1) @@ -330,8 +449,8 @@ def scoreatpercentile(a, per): return values[int(idx)] for q in qs: - def f(x, window, min_periods=None, freq=None, center=False): - return mom.rolling_quantile(x, window, q, + def f(x, window, quantile, min_periods=None, freq=None, center=False): + return mom.rolling_quantile(x, window, quantile, min_periods=min_periods, freq=freq, center=center) @@ -339,7 +458,7 @@ def f(x, window, min_periods=None, freq=None, center=False): def alt(x): return scoreatpercentile(x, q) - self._check_moment_func(f, alt) + self._check_moment_func(f, alt, name='quantile', quantile=q) def test_rolling_apply(self): # suppress warnings about empty slices, as we are deliberately testing with a 0-length Series @@ -349,13 +468,14 @@ def test_rolling_apply(self): ser = Series([]) assert_series_equal(ser, mom.rolling_apply(ser, 10, lambda x: x.mean())) - def roll_mean(x, window, min_periods=None, freq=None, center=False): + f = lambda x: x[np.isfinite(x)].mean() + def roll_mean(x, window, min_periods=None, freq=None, center=False, **kwargs): return mom.rolling_apply(x, window, - lambda x: x[np.isfinite(x)].mean(), + func=f, min_periods=min_periods, freq=freq, center=center) - self._check_moment_func(roll_mean, np.mean) + self._check_moment_func(roll_mean, np.mean, name='apply', func=f) # GH 8080 s = Series([None, None, None]) @@ -363,6 +483,9 @@ def roll_mean(x, window, min_periods=None, freq=None, center=False): expected = Series([1., 2., 2.]) assert_series_equal(result, expected) + result = s.rolling(2, min_periods=0).apply(len) + assert_series_equal(result, expected) + def test_rolling_apply_out_of_bounds(self): # #1850 arr = np.arange(4) @@ -376,9 +499,12 @@ def test_rolling_apply_out_of_bounds(self): def test_rolling_std(self): self._check_moment_func(mom.rolling_std, - lambda x: np.std(x, ddof=1)) - self._check_moment_func(functools.partial(mom.rolling_std, ddof=0), - lambda x: np.std(x, ddof=0)) + lambda x: np.std(x, ddof=1), + name='std') + self._check_moment_func(mom.rolling_std, + lambda x: np.std(x, ddof=0), + name='std', + ddof=0) def test_rolling_std_1obs(self): result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), @@ -414,9 +540,12 @@ def test_rolling_std_neg_sqrt(self): def test_rolling_var(self): self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=1), - test_stable=True) - self._check_moment_func(functools.partial(mom.rolling_var, ddof=0), - lambda x: np.var(x, ddof=0)) + test_stable=True, + name='var') + self._check_moment_func(mom.rolling_var, + lambda x: np.var(x, ddof=0), + name='var', + ddof=0) def test_rolling_skew(self): try: @@ -424,7 +553,8 @@ def test_rolling_skew(self): except ImportError: raise nose.SkipTest('no scipy') self._check_moment_func(mom.rolling_skew, - lambda x: skew(x, bias=False)) + lambda x: skew(x, bias=False), + name='skew') def test_rolling_kurt(self): try: @@ -432,7 +562,8 @@ def test_rolling_kurt(self): except ImportError: raise nose.SkipTest('no scipy') self._check_moment_func(mom.rolling_kurt, - lambda x: kurtosis(x, bias=False)) + lambda x: kurtosis(x, bias=False), + name='kurt') def test_fperr_robustness(self): # TODO: remove this once python 2.5 out of picture @@ -463,36 +594,55 @@ def test_fperr_robustness(self): result = mom.rolling_mean(-arr, 1) self.assertTrue(result[-1] <= 0) - def _check_moment_func(self, func, static_comp, window=50, + def _check_moment_func(self, f, static_comp, + name=None, + window=50, has_min_periods=True, has_center=True, has_time_rule=True, preserve_nan=True, fill_value=None, - test_stable=False): + test_stable=False, + **kwargs): - self._check_ndarray(func, static_comp, window=window, + self._check_ndarray(f, static_comp, window=window, has_min_periods=has_min_periods, preserve_nan=preserve_nan, has_center=has_center, fill_value=fill_value, - test_stable=test_stable) + test_stable=test_stable, + **kwargs) - self._check_structures(func, static_comp, + self._check_structures(f, static_comp, has_min_periods=has_min_periods, has_time_rule=has_time_rule, fill_value=fill_value, - has_center=has_center) - - def _check_ndarray(self, func, static_comp, window=50, + has_center=has_center, + **kwargs) + + # new API + if name is not None: + self._check_structures(f, static_comp, + name=name, + has_min_periods=has_min_periods, + has_time_rule=has_time_rule, + fill_value=fill_value, + has_center=has_center, + **kwargs) + + def _check_ndarray(self, f, static_comp, window=50, has_min_periods=True, preserve_nan=True, has_center=True, fill_value=None, test_stable=False, - test_window=True): + test_window=True, + **kwargs): + + def get_result(arr, window, min_periods=None, center=False): + return f(arr, window, min_periods=min_periods, center=center, **kwargs) - result = func(self.arr, window) + result = get_result(self.arr, window) assert_almost_equal(result[-1], static_comp(self.arr[-50:])) @@ -505,11 +655,11 @@ def _check_ndarray(self, func, static_comp, window=50, arr[-10:] = np.NaN if has_min_periods: - result = func(arr, 50, min_periods=30) + result = get_result(arr, 50, min_periods=30) assert_almost_equal(result[-1], static_comp(arr[10:-10])) # min_periods is working correctly - result = func(arr, 20, min_periods=15) + result = get_result(arr, 20, min_periods=15) self.assertTrue(np.isnan(result[23])) self.assertFalse(np.isnan(result[24])) @@ -517,31 +667,31 @@ def _check_ndarray(self, func, static_comp, window=50, self.assertTrue(np.isnan(result[-5])) arr2 = randn(20) - result = func(arr2, 10, min_periods=5) + result = get_result(arr2, 10, min_periods=5) self.assertTrue(isnull(result[3])) self.assertTrue(notnull(result[4])) # min_periods=0 - result0 = func(arr, 20, min_periods=0) - result1 = func(arr, 20, min_periods=1) + result0 = get_result(arr, 20, min_periods=0) + result1 = get_result(arr, 20, min_periods=1) assert_almost_equal(result0, result1) else: - result = func(arr, 50) + result = get_result(arr, 50) assert_almost_equal(result[-1], static_comp(arr[10:-10])) # GH 7925 if has_center: if has_min_periods: - result = func(arr, 20, min_periods=15, center=True) - expected = func(np.concatenate((arr, np.array([np.NaN] * 9))), 20, min_periods=15)[9:] + result = get_result(arr, 20, min_periods=15, center=True) + expected = get_result(np.concatenate((arr, np.array([np.NaN] * 9))), 20, min_periods=15)[9:] else: - result = func(arr, 20, center=True) - expected = func(np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:] + result = get_result(arr, 20, center=True) + expected = get_result(np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:] self.assert_numpy_array_equal(result, expected) if test_stable: - result = func(self.arr + 1e9, window) + result = get_result(self.arr + 1e9, window) assert_almost_equal(result[-1], static_comp(self.arr[-50:] + 1e9)) @@ -549,16 +699,16 @@ def _check_ndarray(self, func, static_comp, window=50, if test_window: if has_min_periods: for minp in (0, len(self.arr)-1, len(self.arr)): - result = func(self.arr, len(self.arr)+1, min_periods=minp) - expected = func(self.arr, len(self.arr), min_periods=minp) + result = get_result(self.arr, len(self.arr)+1, min_periods=minp) + expected = get_result(self.arr, len(self.arr), min_periods=minp) nan_mask = np.isnan(result) self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) nan_mask = ~nan_mask assert_almost_equal(result[nan_mask], expected[nan_mask]) else: - result = func(self.arr, len(self.arr)+1) - expected = func(self.arr, len(self.arr)) + result = get_result(self.arr, len(self.arr)+1) + expected = get_result(self.arr, len(self.arr)) nan_mask = np.isnan(result) self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) nan_mask = ~nan_mask @@ -567,15 +717,34 @@ def _check_ndarray(self, func, static_comp, window=50, - def _check_structures(self, func, static_comp, + def _check_structures(self, f, static_comp, + name=None, has_min_periods=True, has_time_rule=True, has_center=True, - fill_value=None): + fill_value=None, + **kwargs): - series_result = func(self.series, 50) - tm.assertIsInstance(series_result, Series) + def get_result(obj, window, min_periods=None, freq=None, center=False): + + # check via the API calls if name is provided + if name is not None: + return getattr(obj.rolling(window=window, + min_periods=min_periods, + freq=freq, + center=center),name)(**kwargs) - frame_result = func(self.frame, 50) + # check via the moments API + return f(obj, + window=window, + min_periods=min_periods, + freq=freq, + center=center, + **kwargs) + + series_result = get_result(self.series, window=50) + frame_result = get_result(self.frame, window=50) + + tm.assertIsInstance(series_result, Series) self.assertEqual(type(frame_result), DataFrame) # check time_rule works @@ -584,13 +753,11 @@ def _check_structures(self, func, static_comp, minp = 10 if has_min_periods: - series_result = func(self.series[::2], win, min_periods=minp, - freq='B') - frame_result = func(self.frame[::2], win, min_periods=minp, - freq='B') + series_result = get_result(self.series[::2], window=win, min_periods=minp, freq='B') + frame_result = get_result(self.frame[::2], window=win, min_periods=minp, freq='B') else: - series_result = func(self.series[::2], win, freq='B') - frame_result = func(self.frame[::2], win, freq='B') + series_result = get_result(self.series[::2], window=win, freq='B') + frame_result = get_result(self.frame[::2], window=win, freq='B') last_date = series_result.index[-1] prev_date = last_date - 24 * datetools.bday @@ -605,22 +772,41 @@ def _check_structures(self, func, static_comp, # GH 7925 if has_center: + + # shifter index + s = ['x%d'%x for x in range(12)] + if has_min_periods: minp = 10 - series_xp = func(self.series.reindex(list(self.series.index)+['x%d'%x for x in range(12)]), 25, min_periods=minp).shift(-12).reindex(self.series.index) - frame_xp = func(self.frame.reindex(list(self.frame.index)+['x%d'%x for x in range(12)]), 25, min_periods=minp).shift(-12).reindex(self.frame.index) - series_rs = func(self.series, 25, min_periods=minp, - center=True) - frame_rs = func(self.frame, 25, min_periods=minp, - center=True) + series_xp = get_result(self.series.reindex(list(self.series.index)+s), + window=25, + min_periods=minp).shift(-12).reindex(self.series.index) + frame_xp = get_result(self.frame.reindex(list(self.frame.index)+s), + window=25, + min_periods=minp).shift(-12).reindex(self.frame.index) + + series_rs = get_result(self.series, + window=25, + min_periods=minp, + center=True) + frame_rs = get_result(self.frame, + window=25, + min_periods=minp, + center=True) else: - series_xp = func(self.series.reindex(list(self.series.index)+['x%d'%x for x in range(12)]), 25).shift(-12).reindex(self.series.index) - frame_xp = func(self.frame.reindex(list(self.frame.index)+['x%d'%x for x in range(12)]), 25).shift(-12).reindex(self.frame.index) - - series_rs = func(self.series, 25, center=True) - frame_rs = func(self.frame, 25, center=True) + series_xp = get_result(self.series.reindex(list(self.series.index)+s), + window=25).shift(-12).reindex(self.series.index) + frame_xp = get_result(self.frame.reindex(list(self.frame.index)+s), + window=25).shift(-12).reindex(self.frame.index) + + series_rs = get_result(self.series, + window=25, + center=True) + frame_rs = get_result(self.frame, + window=25, + center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) @@ -642,7 +828,10 @@ def test_ewma(self): for f in [lambda s: mom.ewma(s, com=2.0, adjust=True), lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False), lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=True), - ]: + lambda s: s.ewm(com=2.0, adjust=True).mean(), + lambda s: s.ewm(com=2.0, adjust=True, ignore_na=False).mean(), + lambda s: s.ewm(com=2.0, adjust=True, ignore_na=True).mean(), + ]: result = f(s) assert_series_equal(result, expected) @@ -650,6 +839,9 @@ def test_ewma(self): for f in [lambda s: mom.ewma(s, com=2.0, adjust=False), lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=False), lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=True), + lambda s: s.ewm(com=2.0, adjust=False).mean(), + lambda s: s.ewm(com=2.0, adjust=False, ignore_na=False).mean(), + lambda s: s.ewm(com=2.0, adjust=False, ignore_na=True).mean(), ]: result = f(s) assert_series_equal(result, expected) @@ -695,16 +887,20 @@ def simple_wma(s, w): expected = simple_wma(s, Series(w)) result = mom.ewma(s, com=com, adjust=adjust, ignore_na=ignore_na) assert_series_equal(result, expected) + result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() + assert_series_equal(result, expected) if ignore_na is False: # check that ignore_na defaults to False result = mom.ewma(s, com=com, adjust=adjust) assert_series_equal(result, expected) + result = s.ewm(com=com, adjust=adjust).mean() + assert_series_equal(result, expected) def test_ewmvar(self): - self._check_ew(mom.ewmvar) + self._check_ew(mom.ewmvar, name='var') def test_ewmvol(self): - self._check_ew(mom.ewmvol) + self._check_ew(mom.ewmvol, name='vol') def test_ewma_span_com_args(self): A = mom.ewma(self.arr, com=9.5) @@ -727,11 +923,17 @@ def test_ewma_halflife_arg(self): def test_moment_preserve_series_name(self): # GH 10565 s = Series(np.arange(100), name='foo') + s2 = mom.rolling_mean(s, 30) s3 = mom.rolling_sum(s, 20) self.assertEqual(s2.name, 'foo') self.assertEqual(s3.name, 'foo') + s2 = s.rolling(30).mean() + s3 = s.rolling(20).sum() + self.assertEqual(s2.name, 'foo') + self.assertEqual(s3.name, 'foo') + def test_ew_empty_arrays(self): arr = np.array([], dtype=np.float64) @@ -740,11 +942,11 @@ def test_ew_empty_arrays(self): result = f(arr, 3) assert_almost_equal(result, arr) - def _check_ew(self, func): - self._check_ew_ndarray(func) - self._check_ew_structures(func) + def _check_ew(self, func, name=None): + self._check_ew_ndarray(func, name=name) + self._check_ew_structures(func, name=name) - def _check_ew_ndarray(self, func, preserve_nan=False): + def _check_ew_ndarray(self, func, preserve_nan=False, name=None): result = func(self.arr, com=10) if preserve_nan: assert(np.isnan(result[self._nan_locs]).all()) @@ -787,11 +989,18 @@ def _check_ew_ndarray(self, func, preserve_nan=False): result2 = func(np.arange(50), span=10) self.assertEqual(result2.dtype, np.float_) - def _check_ew_structures(self, func): + def _check_ew_structures(self, func, name=None): series_result = func(self.series, com=10) tm.assertIsInstance(series_result, Series) + if name is not None: + series_result = getattr(self.series.ewm(com=10),name)() + tm.assertIsInstance(series_result, Series) + frame_result = func(self.frame, com=10) self.assertEqual(type(frame_result), DataFrame) + if name is not None: + frame_result = getattr(self.frame.ewm(com=10),name)() + self.assertEqual(type(frame_result), DataFrame) # create the data only once as we are not setting it def _create_consistency_data(): @@ -1204,8 +1413,11 @@ def test_rolling_cov(self): result = mom.rolling_cov(A, B, 50, min_periods=25) assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) + result = A.rolling(window=50, min_periods=25).cov(B) + assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) + def test_rolling_cov_pairwise(self): - self._check_pairwise_moment(mom.rolling_cov, 10, min_periods=5) + self._check_pairwise_moment(mom.rolling_cov, window=10, min_periods=5, name='cov') def test_rolling_corr(self): A = self.series @@ -1214,6 +1426,9 @@ def test_rolling_corr(self): result = mom.rolling_corr(A, B, 50, min_periods=25) assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) + result = A.rolling(window=50, min_periods=25).corr(B) + assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) + # test for correct bias correction a = tm.makeTimeSeries() b = tm.makeTimeSeries() @@ -1223,21 +1438,32 @@ def test_rolling_corr(self): result = mom.rolling_corr(a, b, len(a), min_periods=1) assert_almost_equal(result[-1], a.corr(b)) + result = a.rolling(window=len(a), min_periods=1).corr(b) + assert_almost_equal(result[-1], a.corr(b)) + def test_rolling_corr_pairwise(self): - self._check_pairwise_moment(mom.rolling_corr, 10, min_periods=5) + self._check_pairwise_moment(mom.rolling_corr, window=10, min_periods=5, name='corr') - def _check_pairwise_moment(self, func, *args, **kwargs): - panel = func(self.frame, *args, **kwargs) + def _check_pairwise_moment(self, func, name=None, **kwargs): + def get_result(obj, obj2=None): + return func(obj, obj2, **kwargs) + panel = get_result(self.frame) actual = panel.ix[:, 1, 5] - expected = func(self.frame[1], self.frame[5], *args, **kwargs) + expected = get_result(self.frame[1], self.frame[5]) tm.assert_series_equal(actual, expected, check_names=False) self.assertEqual(actual.name, 5) + if name is not None: + panel = getattr(self.frame.rolling(**kwargs),name)() + actual = panel.ix[:, 1, 5] + tm.assert_series_equal(actual, expected, check_names=False) + self.assertEqual(actual.name, 5) + def test_flex_binary_moment(self): # GH3155 # don't blow the stack - self.assertRaises(TypeError, mom._flex_binary_moment,5,6,None) + self.assertRaises(TypeError, rwindow._flex_binary_moment,5,6,None) def test_corr_sanity(self): #GH 3155 @@ -1373,6 +1599,9 @@ def test_expanding_count(self): result = mom.expanding_count(self.series) assert_almost_equal(result, mom.rolling_count(self.series, len(self.series))) + result = self.series.expanding().count() + assert_almost_equal(result, mom.rolling_count(self.series, + len(self.series))) def test_expanding_quantile(self): result = mom.expanding_quantile(self.series, 0.5) From 36fb83540523c53c91198f2ad94d3dbc3a964ac1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 20 Nov 2015 22:19:08 -0500 Subject: [PATCH 2/8] BUG/API: consistency in .agg with nested dicts #9052 --- doc/source/whatsnew/v0.18.0.txt | 46 +++++++++++++++++++++++ pandas/core/base.py | 33 +++++++++++++---- pandas/core/groupby.py | 15 ++++++-- pandas/core/window.py | 14 +++---- pandas/tests/test_groupby.py | 42 +++++++++++++++++++++ pandas/tests/test_window.py | 66 +++++++++++++++++++++++++++------ 6 files changed, 188 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 86a7be5857035..a5c1e9b1d2057 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -25,10 +25,55 @@ New features ~~~~~~~~~~~~ +.. _whatsnew_0180.enhancements.moments: +Computation moments are now methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Computational moments have been refactored to be method on ``Series/DataFrame`` objects, rather than top-level functions, which are now deprecated. This allows these window-type functions, to have a similar API to that of ``.groupby``. See the full documentation :ref:`here ` (:issue:`11603`) +.. ipython:: python + + np.random.seed(1234) + df = DataFrame({'A' : range(10), 'B' : np.random.randn(10)}) + df + +Previous Behavior: + +.. code-block:: python + + In [8]: pd.rolling_mean(df,window=3) + Out[8]: + A B + 0 NaN NaN + 1 NaN NaN + 2 1 0.237722 + 3 2 -0.023640 + 4 3 0.133155 + 5 4 -0.048693 + 6 5 0.342054 + 7 6 0.370076 + 8 7 0.079587 + 9 8 -0.954504 + + New Behavior: + + .. ipython:: python + + r = df.rolling(window=3) + + # descriptive repr + r + + # operate on this Rolling object itself + r.mean() + + # getitem access + r['A'].mean() + # aggregates + r.agg({'A' : {'ra' : ['mean','std']}, + 'B' : {'rb' : ['mean','std']}}) .. _whatsnew_0180.enhancements.other: @@ -195,6 +240,7 @@ Bug Fixes - Bug in ``Period.end_time`` when a multiple of time period is requested (:issue:`11738`) - Regression in ``.clip`` with tz-aware datetimes (:issue:`11838`) - Bug in ``date_range`` when the boundaries fell on the frequency (:issue:`11804`) +- Bug in consistency of passing nested dicts to ``.groupby(...).agg(...)`` (:issue:`9052`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 855d89411b8a7..e5a825599f4aa 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -237,7 +237,7 @@ class SelectionMixin(object): sub-classes need to define: obj, exclusions """ _selection = None - _internal_names = ['_cache'] + _internal_names = ['_cache','__setstate__'] _internal_names_set = set(_internal_names) _builtin_table = { builtins.sum: np.sum, @@ -368,6 +368,13 @@ def _aggregate(self, arg, *args, **kwargs): """ provide an implementation for the aggregators + Parameters + ---------- + arg : string, dict, function + *args : args to pass on to the function + **kwargs : kwargs to pass on to the function + + Returns ------- tuple of result, how @@ -378,6 +385,7 @@ def _aggregate(self, arg, *args, **kwargs): None if not required """ + _level = kwargs.pop('_level',None) if isinstance(arg, compat.string_types): return getattr(self, arg)(*args, **kwargs), None @@ -403,24 +411,24 @@ def _aggregate(self, arg, *args, **kwargs): for fname, agg_how in compat.iteritems(arg): colg = self._gotitem(self._selection, ndim=1, subset=subset) - result[fname] = colg.aggregate(agg_how) + result[fname] = colg.aggregate(agg_how, _level=None) keys.append(fname) else: for col, agg_how in compat.iteritems(arg): colg = self._gotitem(col, ndim=1) - result[col] = colg.aggregate(agg_how) + result[col] = colg.aggregate(agg_how, _level=(_level or 0) + 1) keys.append(col) if isinstance(list(result.values())[0], com.ABCDataFrame): from pandas.tools.merge import concat - result = concat([result[k] for k in keys], keys=keys, axis=1) + result = concat([ result[k] for k in keys ], keys=keys, axis=1) else: from pandas import DataFrame result = DataFrame(result) return result, True elif hasattr(arg, '__iter__'): - return self._aggregate_multiple_funcs(arg), None + return self._aggregate_multiple_funcs(arg, _level=_level), None else: result = None @@ -431,7 +439,7 @@ def _aggregate(self, arg, *args, **kwargs): # caller can react return result, True - def _aggregate_multiple_funcs(self, arg): + def _aggregate_multiple_funcs(self, arg, _level): from pandas.tools.merge import concat if self.axis != 0: @@ -447,7 +455,15 @@ def _aggregate_multiple_funcs(self, arg): try: colg = self._gotitem(obj.name, ndim=1, subset=obj) results.append(colg.aggregate(a)) - keys.append(getattr(a,'name',a)) + + # find a good name, this could be a function that we don't recognize + name = self._is_cython_func(a) or a + if not isinstance(name, compat.string_types): + name = getattr(a,name,a) + if not isinstance(name, compat.string_types): + name = getattr(a,func_name,a) + + keys.append(name) except (TypeError, DataError): pass except SpecificationError: @@ -464,6 +480,9 @@ def _aggregate_multiple_funcs(self, arg): pass except SpecificationError: raise + + if _level: + keys = None result = concat(results, keys=keys, axis=1) return result diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index b156f4afa2711..38c0a0b147618 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2362,6 +2362,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs): ------- Series or DataFrame """ + _level = kwargs.pop('_level',None) if isinstance(func_or_funcs, compat.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) @@ -2411,11 +2412,18 @@ def _aggregate_multiple_funcs(self, arg): results = {} for name, func in arg: + obj = self if name in results: raise SpecificationError('Function names must be unique, ' 'found multiple named %s' % name) - results[name] = self.aggregate(func) + # reset the cache so that we + # only include the named selection + if name in self._selected_obj: + obj = copy.copy(obj) + obj._reset_cache() + obj._selection = name + results[name] = obj.aggregate(func) return DataFrame(results, columns=columns) @@ -2856,7 +2864,8 @@ def _post_process_cython_aggregate(self, obj): @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): - result, how = self._aggregate(arg, *args, **kwargs) + _level = kwargs.pop('_level',None) + result, how = self._aggregate(arg, _level=_level, *args, **kwargs) if how is None: return result @@ -2870,7 +2879,7 @@ def aggregate(self, arg, *args, **kwargs): # try to treat as if we are passing a list try: assert not args and not kwargs - result = self._aggregate_multiple_funcs([arg]) + result = self._aggregate_multiple_funcs([arg], _level=_level) result.columns = Index(result.columns.levels[0], name=self._selected_obj.columns.name) except: diff --git a/pandas/core/window.py b/pandas/core/window.py index 5467f7f41fb96..208a9d862927a 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -12,6 +12,7 @@ from collections import defaultdict import pandas as pd +from pandas.lib import isscalar from pandas.core.base import PandasObject, SelectionMixin, AbstractMethodError import pandas.core.common as com import pandas.algos as algos @@ -64,11 +65,12 @@ def _gotitem(self, key, ndim, subset=None): # create a new object to prevent aliasing if subset is None: subset = self.obj - new_self = self._shallow_copy(subset) - if ndim==2 and key in subset: - new_self._selection = key - new_self._reset_cache() - return new_self + self = self._shallow_copy(subset) + self._reset_cache() + if subset.ndim==2: + if isscalar(key) and key in subset or com.is_list_like(key): + self._selection = key + return self def __getattr__(self, attr): if attr in self._internal_names_set: @@ -191,8 +193,6 @@ def _convert_freq(self): @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): result, how = self._aggregate(arg, *args, **kwargs) - if result is None: - import pdb; pdb.set_trace() return result class Window(_Window): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index bd21053f37568..d067b2fd7b969 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1443,6 +1443,48 @@ def test_frame_set_name_single(self): result = grouped['C'].agg({'foo': np.mean, 'bar': np.std}) self.assertEqual(result.index.name, 'A') + def test_aggregate_api_consistency(self): + # GH 9052 + # make sure that the aggregates via dict + # are consistent + + + def compare(result, expected): + # if we ar passin dicts then ordering is not guaranteed for output columns + assert_frame_equal(result.reindex_like(expected), expected) + + + df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B' : ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C' : np.random.randn(8), + 'D' : np.random.randn(8)}) + + grouped = df.groupby(['A', 'B']) + result = grouped[['D','C']].agg({'r':np.sum, 'r2':np.mean}) + expected = pd.concat([grouped[['D','C']].sum(), + grouped[['D','C']].mean()], + keys=['r','r2'], + axis=1).stack(level=1) + compare(result, expected) + + result = grouped[['D','C']].agg({'r': { 'C' : np.sum }, 'r2' : { 'D' : np.mean }}) + expected = pd.concat([grouped[['C']].sum(), + grouped[['D']].mean()], + axis=1) + expected.columns = MultiIndex.from_tuples([('r','C'),('r2','D')]) + compare(result, expected) + + result = grouped[['D','C']].agg([np.sum, np.mean]) + expected = pd.concat([grouped['D'].sum(), + grouped['D'].mean(), + grouped['C'].sum(), + grouped['C'].mean()], + axis=1) + expected.columns = MultiIndex.from_product([['D','C'],['sum','mean']]) + compare(result, expected) + def test_multi_iter(self): s = Series(np.arange(6)) k1 = np.array(['a', 'a', 'a', 'b', 'b', 'b']) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 116236ae7e422..1aa9ccf4b457d 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -103,18 +103,62 @@ def test_agg(self): 'B' : range(0,10,2)}) r = df.rolling(window=3) + a_mean = r['A'].mean() + a_std = r['A'].std() + a_sum = r['A'].sum() + b_mean = r['B'].mean() + b_std = r['B'].std() + b_sum = r['B'].sum() + + def compare(result, expected): + # if we are using dicts, the orderings is not guaranteed + assert_frame_equal(result.reindex_like(expected), expected) + + result = r.aggregate([np.mean, np.std]) + expected = pd.concat([a_mean,a_std,b_mean,b_std],axis=1) + expected.columns = pd.MultiIndex.from_product([['A','B'],['mean','std']]) + assert_frame_equal(result, expected) + + result = r.aggregate({'A': np.mean, + 'B': np.std}) + expected = pd.concat([a_mean,b_std],axis=1) + compare(result, expected) + + result = r.aggregate({'A': ['mean','std']}) + expected = pd.concat([a_mean,a_std],axis=1) + expected.columns = pd.MultiIndex.from_product([['A'],['mean','std']]) + assert_frame_equal(result, expected) + + result = r['A'].aggregate(['mean','sum']) + expected = pd.concat([a_mean,a_sum],axis=1) + expected.columns = pd.MultiIndex.from_product([['A'],['mean','sum']]) + assert_frame_equal(result, expected) - import pdb; pdb.set_trace() - agged = r.aggregate([np.mean, np.std]) - agged = r.aggregate({'A': np.mean, - 'B': np.std}) - agged = r.aggregate({'A': ['mean','sum']}) - agged = r['A'].aggregate(['mean','sum']) - agged = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' } }) - agged = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' }, - 'B': { 'mean2' : 'mean', 'sum2' : 'sum' }}) - agged = r.aggregate({'r1': { 'A' : ['mean','sum'] }, - 'r2' : { 'B' : ['mean','sum'] }}) + result = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' } }) + expected = pd.concat([a_mean,a_sum],axis=1) + expected.columns = pd.MultiIndex.from_product([['A'],['mean','sum']]) + compare(result, expected) + + result = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' }, + 'B': { 'mean2' : 'mean', 'sum2' : 'sum' }}) + expected = pd.concat([a_mean,a_sum,b_mean,b_sum],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','sum'), + ('B','mean2'),('B','sum2')]) + compare(result, expected) + + result = r.aggregate({'r1' : { 'A' : ['mean','sum'] }, + 'r2' : { 'B' : ['mean','sum'] }}) + expected = pd.concat([a_mean,a_sum,b_mean,b_sum],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('r1','A','mean'),('r1','A','sum'), + ('r2','B','mean'),('r2','B','sum')]) + compare(result, expected) + + result = r.agg({'A' : {'ra' : ['mean','std']}, + 'B' : {'rb' : ['mean','std']}}) + expected = pd.concat([a_mean,a_std,b_mean,b_std],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A','ra','mean'),('A','ra','std'), + ('B','rb','mean'),('B','rb','std')]) + compare(result, expected) class TestMoments(Base): From 9587d463949af56e312225deb7ceca2635f0650e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 21 Nov 2015 10:38:28 -0500 Subject: [PATCH 3/8] DOC: update docs for back-refs to groupby & window functions --- doc/source/api.rst | 149 +++++----- doc/source/computation.rst | 415 +++++++++++++++++---------- doc/source/whatsnew/v0.18.0.txt | 42 ++- pandas/core/base.py | 16 +- pandas/core/groupby.py | 477 ++++++++++++++++++-------------- pandas/core/window.py | 449 +++++++++++++++++++++--------- pandas/tests/test_window.py | 8 + 7 files changed, 986 insertions(+), 570 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 12dc0b0cb50b9..eb683fff3ac47 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -194,65 +194,6 @@ Top-level evaluation eval -Standard moving window functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - :toctree: generated/ - - rolling_count - rolling_sum - rolling_mean - rolling_median - rolling_var - rolling_std - rolling_min - rolling_max - rolling_corr - rolling_corr_pairwise - rolling_cov - rolling_skew - rolling_kurt - rolling_apply - rolling_quantile - rolling_window - -.. _api.functions_expanding: - -Standard expanding window functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - :toctree: generated/ - - expanding_count - expanding_sum - expanding_mean - expanding_median - expanding_var - expanding_std - expanding_min - expanding_max - expanding_corr - expanding_corr_pairwise - expanding_cov - expanding_skew - expanding_kurt - expanding_apply - expanding_quantile - -Exponentially-weighted moving window functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - :toctree: generated/ - - ewma - ewmstd - ewmvar - ewmcorr - ewmcov - .. _api.series: Series @@ -260,6 +201,9 @@ Series Constructor ~~~~~~~~~~~ + +.. currentmodule:: pandas + .. autosummary:: :toctree: generated/ @@ -344,14 +288,17 @@ Binary operator functions Series.ne Series.eq -Function application, GroupBy -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Function application, GroupBy & Window +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ Series.apply Series.map Series.groupby + Series.rolling + Series.expanding + Series.ewm .. _api.series.stats: @@ -846,14 +793,17 @@ Binary operator functions DataFrame.combine DataFrame.combine_first -Function application, GroupBy -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Function application, GroupBy & Window +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ DataFrame.apply DataFrame.applymap DataFrame.groupby + DataFrame.rolling + DataFrame.expanding + DataFrame.ewm .. _api.dataframe.stats: @@ -1551,6 +1501,79 @@ Conversion TimedeltaIndex.to_series TimedeltaIndex.round +Window +------ +.. currentmodule:: pandas.core.window + +Rolling objects are returned by rolling calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. +Expanding objects are returned by rolling calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc. +EWM objects are returned by rolling calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc. + +Standard moving window functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. currentmodule:: pandas.core.window + +.. autosummary:: + :toctree: generated/ + + Rolling.count + Rolling.sum + Rolling.mean + Rolling.median + Rolling.var + Rolling.std + Rolling.min + Rolling.max + Rolling.corr + Rolling.corr_pairwise + Rolling.cov + Rolling.skew + Rolling.kurt + Rolling.apply + Rolling.quantile + Rolling.window + +.. _api.functions_expanding: + +Standard expanding window functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. currentmodule:: pandas.core.window + +.. autosummary:: + :toctree: generated/ + + Expanding.count + Expanding.sum + Expanding.mean + Expanding.median + Expanding.var + Expanding.std + Expanding.min + Expanding.max + Expanding.corr + Expanding.corr_pairwise + Expanding.cov + Expanding.skew + Expanding.kurt + Expanding.apply + Expanding.quantile + +Exponentially-weighted moving window functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. currentmodule:: pandas.core.window + +.. autosummary:: + :toctree: generated/ + + EWM.mean + EWM.std + EWM.var + EWM.corr + EWM.cov + GroupBy ------- .. currentmodule:: pandas.core.groupby diff --git a/doc/source/computation.rst b/doc/source/computation.rst index b2fa7f6749379..bf593acd73537 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -21,7 +21,7 @@ Computational tools =================== -Statistical functions +Statistical Functions --------------------- .. _computation.pct_change: @@ -196,90 +196,118 @@ parameter: - ``max`` : highest rank in the group - ``first`` : ranks assigned in the order they appear in the array +.. _stats.moments: -.. currentmodule:: pandas - -.. currentmodule:: pandas.stats.api +Window Functions +---------------- -.. _stats.moments: +.. warning:: -Moving (rolling) statistics / moments -------------------------------------- + Prior to version 0.18.0, these were module level functions that have been deprecated. + You can see the previous documentation + `here `__ -For working with time series data, a number of functions are provided for -computing common *moving* or *rolling* statistics. Among these are count, sum, +For working with data, a number of windows functions are provided for +computing common *window* or *rolling* statistics. Among these are count, sum, mean, median, correlation, variance, covariance, standard deviation, skewness, -and kurtosis. All of these methods are in the :mod:`pandas` namespace, but -otherwise they can be found in :mod:`pandas.stats.moments`. +and kurtosis. -.. currentmodule:: pandas +.. currentmodule:: pandas.core.window -.. csv-table:: - :header: "Function", "Description" - :widths: 20, 80 +.. note:: - :func:`rolling_count`, Number of non-null observations - :func:`rolling_sum`, Sum of values - :func:`rolling_mean`, Mean of values - :func:`rolling_median`, Arithmetic median of values - :func:`rolling_min`, Minimum - :func:`rolling_max`, Maximum - :func:`rolling_std`, Unbiased standard deviation - :func:`rolling_var`, Unbiased variance - :func:`rolling_skew`, Unbiased skewness (3rd moment) - :func:`rolling_kurt`, Unbiased kurtosis (4th moment) - :func:`rolling_quantile`, Sample quantile (value at %) - :func:`rolling_apply`, Generic apply - :func:`rolling_cov`, Unbiased covariance (binary) - :func:`rolling_corr`, Correlation (binary) - :func:`rolling_window`, Moving window function - -Generally these methods all have the same interface. The binary operators -(e.g. :func:`rolling_corr`) take two Series or DataFrames. Otherwise, they all + The API for window statistics is quite similar to the way one works with ``Groupby`` objects, see the documentation :ref:`here ` + +We work with ``rolling``, ``expanding`` and ``exponentially weighted`` data through the corresponding +objects, :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expanding` and :class:`~pandas.core.window.EWM`. + +.. ipython:: python + + s = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000)) + s = s.cumsum() + s + +These are created from methods on ``Series`` and ``DataFrames``. + +.. ipython:: python + + r = s.rolling(window=60) + r + +Generally these methods all have the same interface. They all accept the following arguments: - - ``window``: size of moving window - - ``min_periods``: threshold of non-null data points to require (otherwise - result is NA) - - ``freq``: optionally specify a :ref:`frequency string ` - or :ref:`DateOffset ` to pre-conform the data to. - Note that prior to pandas v0.8.0, a keyword argument ``time_rule`` was used - instead of ``freq`` that referred to the legacy time rule constants - - ``how``: optionally specify method for down or re-sampling. Default is - is min for :func:`rolling_min`, max for :func:`rolling_max`, median for - :func:`rolling_median`, and mean for all other rolling functions. See - :meth:`DataFrame.resample`'s how argument for more information. +- ``window``: size of moving window +- ``min_periods``: threshold of non-null data points to require (otherwise + result is NA) +- ``freq``: optionally specify a :ref:`frequency string ` + or :ref:`DateOffset ` to pre-conform the data to. +- ``how``: optionally specify method for down or re-sampling. Default is + is ``min`` for :meth:`~Rolling.min`, ``max`` for :meth:`~Rolling.max`, ``median`` for + :meth:`~Rolling.median`, and ``mean`` for all other rolling functions. See + :meth:`DataFrame.resample`'s how argument for more information. -These functions can be applied to ndarrays or Series objects: +We can then call functions on these ``rolling`` objects. Which return like-indexed objects: .. ipython:: python - ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000)) - ts = ts.cumsum() + r.mean() - ts.plot(style='k--') +.. ipython:: python - @savefig rolling_mean_ex.png - pd.rolling_mean(ts, 60).plot(style='k') + s.plot(style='k--') -They can also be applied to DataFrame objects. This is really just syntactic -sugar for applying the moving window operator to all of the DataFrame's columns: + @savefig rolling_mean_ex.png + r.mean().plot(style='k') .. ipython:: python :suppress: plt.close('all') +They can also be applied to DataFrame objects. This is really just syntactic +sugar for applying the moving window operator to all of the DataFrame's columns: + .. ipython:: python - df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, - columns=['A', 'B', 'C', 'D']) + df = pd.DataFrame(np.random.randn(1000, 4), index=s.index, + columns=['A', 'B', 'C', 'D']) df = df.cumsum() @savefig rolling_mean_frame.png - pd.rolling_sum(df, 60).plot(subplots=True) + df.rolling(window=60).sum().plot(subplots=True) + +.. _stats.summary: + +Method Summary +~~~~~~~~~~~~~~ -The :func:`rolling_apply` function takes an extra ``func`` argument and performs +We provide a number of the common statistical functions: + +.. currentmodule:: pandas.core.window + +.. csv-table:: + :header: "Method", "Description" + :widths: 20, 80 + + :meth:`~Rolling.count`, Number of non-null observations + :meth:`~Rolling.sum`, Sum of values + :meth:`~Rolling.mean`, Mean of values + :meth:`~Rolling.median`, Arithmetic median of values + :meth:`~Rolling.min`, Minimum + :meth:`~Rolling.max`, Maximum + :meth:`~Rolling.std`, Unbiased standard deviation + :meth:`~Rolling.var`, Unbiased variance + :meth:`~Rolling.skew`, Unbiased skewness (3rd moment) + :meth:`~Rolling.kurt`, Unbiased kurtosis (4th moment) + :meth:`~Rolling.quantile`, Sample quantile (value at %) + :meth:`~Rolling.apply`, Generic apply + :meth:`~Rolling.cov`, Unbiased covariance (binary) + :meth:`~Rolling.corr`, Correlation (binary) + :meth:`~Window.mean`, Moving window mean function + :meth:`~Window.sum`, Moving window sum function + +The :meth:`~Rolling.apply` function takes an extra ``func`` argument and performs generic rolling computations. The ``func`` argument should be a single function that produces a single value from an ndarray input. Suppose we wanted to compute the mean absolute deviation on a rolling basis: @@ -288,46 +316,50 @@ compute the mean absolute deviation on a rolling basis: mad = lambda x: np.fabs(x - x.mean()).mean() @savefig rolling_apply_ex.png - pd.rolling_apply(ts, 60, mad).plot(style='k') + s.rolling(window=60).apply(mad).plot(style='k') + +.. _stats.rolling_window: -The :func:`rolling_window` function performs a generic rolling window computation +Rolling Windows +~~~~~~~~~~~~~~~ + +The :meth:`~Window.mean`, and :meth:`~Window.sum` functions performs a generic rolling window computation on the input data. The weights used in the window are specified by the ``win_type`` keyword. The list of recognized types are: - - ``boxcar`` - - ``triang`` - - ``blackman`` - - ``hamming`` - - ``bartlett`` - - ``parzen`` - - ``bohman`` - - ``blackmanharris`` - - ``nuttall`` - - ``barthann`` - - ``kaiser`` (needs beta) - - ``gaussian`` (needs std) - - ``general_gaussian`` (needs power, width) - - ``slepian`` (needs width). +- ``boxcar`` +- ``triang`` +- ``blackman`` +- ``hamming`` +- ``bartlett`` +- ``parzen`` +- ``bohman`` +- ``blackmanharris`` +- ``nuttall`` +- ``barthann`` +- ``kaiser`` (needs beta) +- ``gaussian`` (needs std) +- ``general_gaussian`` (needs power, width) +- ``slepian`` (needs width). .. ipython:: python ser = pd.Series(np.random.randn(10), index=pd.date_range('1/1/2000', periods=10)) - pd.rolling_window(ser, 5, 'triang') + ser.rolling(window=5, win_type='triang').mean() -Note that the ``boxcar`` window is equivalent to :func:`rolling_mean`. +Note that the ``boxcar`` window is equivalent to :meth:`~Rolling.mean`. .. ipython:: python - pd.rolling_window(ser, 5, 'boxcar') - - pd.rolling_mean(ser, 5) + ser.rolling(window=5, win_type='boxcar').mean() + ser.rolling(window=5).mean() For some windowing functions, additional parameters must be specified: .. ipython:: python - pd.rolling_window(ser, 5, 'gaussian', std=0.1) + ser.rolling(window=5, win_type='gaussian').mean(std=0.1) By default the labels are set to the right edge of the window, but a ``center`` keyword is available so the labels can be set at the center. @@ -335,32 +367,32 @@ This keyword is available in other rolling functions as well. .. ipython:: python - pd.rolling_window(ser, 5, 'boxcar') + ser.rolling(window=5, win_type='boxcar').mean() - pd.rolling_window(ser, 5, 'boxcar', center=True) + ser.rolling(window=5, win_type='boxcar', center=True).mean() - pd.rolling_mean(ser, 5, center=True) + ser.rolling(window=5, center=True).mean() .. _stats.moments.normalization: .. note:: - In rolling sum mode (``mean=False``) there is no normalization done to the + For ``.sum()`` with a ``win_type``, there is no normalization done to the weights. Passing custom weights of ``[1, 1, 1]`` will yield a different result than passing weights of ``[2, 2, 2]``, for example. When passing a ``win_type`` instead of explicitly specifying the weights, the weights are already normalized so that the largest weight is 1. - In contrast, the nature of the rolling mean calculation (``mean=True``)is + In contrast, the nature of the ``.mean()`` calculation is such that the weights are normalized with respect to each other. Weights of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result. .. _stats.moments.binary: -Binary rolling moments -~~~~~~~~~~~~~~~~~~~~~~ +Binary Window Functions +~~~~~~~~~~~~~~~~~~~~~~~ -:func:`rolling_cov` and :func:`rolling_corr` can compute moving window statistics about +:meth:`~Rolling.cov` and :meth:`~Rolling.corr` can compute moving window statistics about two ``Series`` or any combination of ``DataFrame/Series`` or ``DataFrame/DataFrame``. Here is the behavior in each case: @@ -378,7 +410,7 @@ For example: .. ipython:: python df2 = df[:20] - pd.rolling_corr(df2, df2['B'], window=5) + df2.rolling(window=5).corr(df2['B']) .. _stats.moments.corr_pairwise: @@ -403,23 +435,16 @@ can even be omitted: .. ipython:: python - covs = pd.rolling_cov(df[['B','C','D']], df[['A','B','C']], 50, pairwise=True) + covs = df[['B','C','D']].rolling(window=50).cov(df[['A','B','C']], pairwise=True) covs[df.index[-50]] .. ipython:: python - correls = pd.rolling_corr(df, 50) + correls = df.rolling(window=50).corr() correls[df.index[-50]] -.. note:: - - Prior to version 0.14 this was available through ``rolling_corr_pairwise`` - which is now simply syntactic sugar for calling ``rolling_corr(..., - pairwise=True)`` and deprecated. This is likely to be removed in a future - release. - You can efficiently retrieve the time series of correlations between two -columns using ``ix`` indexing: +columns using ``.loc`` indexing: .. ipython:: python :suppress: @@ -429,62 +454,153 @@ columns using ``ix`` indexing: .. ipython:: python @savefig rolling_corr_pairwise_ex.png - correls.ix[:, 'A', 'C'].plot() + correls.loc[:, 'A', 'C'].plot() + +.. _stats.aggregate: + +Aggregation +----------- + +Once the ``Rolling``, ``Expanding`` or ``EWM`` objects have been created, several methods are available to +perform multiple computations on the data. This is very similar to a ``.groupby.agg`` seen :ref:`here `. + +An obvious one is aggregation via the ``aggregate`` or equivalently ``agg`` method: + +.. ipython:: python + + dfa = pd.DataFrame(np.random.randn(1000, 3), index=s.index, + columns=['A', 'B', 'C']) + r = dfa.rolling(window=60,min_periods=1) + r + +We can aggregate by passing a function to the entire DataFrame, or select a Series (or multiple Series) via standard getitem. + +.. ipython:: python + + r.aggregate(np.sum) + + r['A'].aggregate(np.sum) + + r['A','B'].aggregate(np.sum) + +As you can see, the result of the aggregation will have the selection columns, or all +columns if none are selected. + +.. _stats.aggregate.multifunc: + +Applying multiple functions at once +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +With windowed Series you can also pass a list or dict of functions to do +aggregation with, outputting a DataFrame: + +.. ipython:: python + + r['A'].agg([np.sum, np.mean, np.std]) + +If a dict is passed, the keys will be used to name the columns. Otherwise the +function's name (stored in the function object) will be used. + +.. ipython:: python + + r['A'].agg({'result1' : np.sum, + 'result2' : np.mean}) + +On a widowed DataFrame, you can pass a list of functions to apply to each +column, which produces an aggregated result with a hierarchical index: + +.. ipython:: python + + r.agg([np.sum, np.mean]) + +Passing a dict of functions has different behavior by default, see the next +section. + +Applying different functions to DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By passing a dict to ``aggregate`` you can apply a different aggregation to the +columns of a DataFrame: + +.. ipython:: python + + r.agg({'A' : np.sum, + 'B' : lambda x: np.std(x, ddof=1)}) + +The function names can also be strings. In order for a string to be valid it +must be either implemented on the Windowed object + +.. ipython:: python + + r.agg({'A' : 'sum', 'B' : 'std'}) + +Furthermore you can pass a nested dict to indicate different aggregations on different columns. + +.. ipython:: python + + r.agg({'A' : {'ra' : 'sum'}, 'B' : {'rb' : 'std' }}) + .. _stats.moments.expanding: -Expanding window moment functions ---------------------------------- +Expanding Windows +----------------- + A common alternative to rolling statistics is to use an *expanding* window, which yields the value of the statistic with all the data available up to that -point in time. As these calculations are a special case of rolling statistics, +point in time. + +These follow a similar interface to ``.rolling``, with the ``.expanding`` method +returning an :class:`~pandas.core.window.Expanding` object. + +As these calculations are a special case of rolling statistics, they are implemented in pandas such that the following two calls are equivalent: .. ipython:: python - pd.rolling_mean(df, window=len(df), min_periods=1)[:5] + df.rolling(window=len(df), min_periods=1).mean()[:5] - pd.expanding_mean(df)[:5] + df.expanding(min_periods=1).mean()[:5] -Like the ``rolling_`` functions, the following methods are included in the -``pandas`` namespace or can be located in ``pandas.stats.moments``. +These have a similar set of methods to ``.rolling`` methods. -.. currentmodule:: pandas +Method Summary +~~~~~~~~~~~~~~ + +.. currentmodule:: pandas.core.window .. csv-table:: :header: "Function", "Description" :widths: 20, 80 - :func:`expanding_count`, Number of non-null observations - :func:`expanding_sum`, Sum of values - :func:`expanding_mean`, Mean of values - :func:`expanding_median`, Arithmetic median of values - :func:`expanding_min`, Minimum - :func:`expanding_max`, Maximum - :func:`expanding_std`, Unbiased standard deviation - :func:`expanding_var`, Unbiased variance - :func:`expanding_skew`, Unbiased skewness (3rd moment) - :func:`expanding_kurt`, Unbiased kurtosis (4th moment) - :func:`expanding_quantile`, Sample quantile (value at %) - :func:`expanding_apply`, Generic apply - :func:`expanding_cov`, Unbiased covariance (binary) - :func:`expanding_corr`, Correlation (binary) + :meth:`~Expanding.count`, Number of non-null observations + :meth:`~Expanding.sum`, Sum of values + :meth:`~Expanding.mean`, Mean of values + :meth:`~Expanding.median`, Arithmetic median of values + :meth:`~Expanding.min`, Minimum + :meth:`~Expanding.max`, Maximum + :meth:`~Expanding.std`, Unbiased standard deviation + :meth:`~Expanding.var`, Unbiased variance + :meth:`~Expanding.skew`, Unbiased skewness (3rd moment) + :meth:`~Expanding.kurt`, Unbiased kurtosis (4th moment) + :meth:`~Expanding.quantile`, Sample quantile (value at %) + :meth:`~Expanding.apply`, Generic apply + :meth:`~Expanding.cov`, Unbiased covariance (binary) + :meth:`~Expanding.corr`, Correlation (binary) Aside from not having a ``window`` parameter, these functions have the same -interfaces as their ``rolling_`` counterpart. Like above, the parameters they +interfaces as their ``.rolling`` counterparts. Like above, the parameters they all accept are: - - ``min_periods``: threshold of non-null data points to require. Defaults to - minimum needed to compute statistic. No ``NaNs`` will be output once - ``min_periods`` non-null data points have been seen. - - ``freq``: optionally specify a :ref:`frequency string ` - or :ref:`DateOffset ` to pre-conform the data to. - Note that prior to pandas v0.8.0, a keyword argument ``time_rule`` was used - instead of ``freq`` that referred to the legacy time rule constants +- ``min_periods``: threshold of non-null data points to require. Defaults to + minimum needed to compute statistic. No ``NaNs`` will be output once + ``min_periods`` non-null data points have been seen. +- ``freq``: optionally specify a :ref:`frequency string ` + or :ref:`DateOffset ` to pre-conform the data to. .. note:: - The output of the ``rolling_`` and ``expanding_`` functions do not return a + The output of the ``.rolling`` and ``.expanding`` methods do not return a ``NaN`` if there are at least ``min_periods`` non-null values in the current window. This differs from ``cumsum``, ``cumprod``, ``cummax``, and ``cummin``, which return ``NaN`` in the output wherever a ``NaN`` is @@ -493,7 +609,7 @@ all accept are: An expanding window statistic will be more stable (and less responsive) than its rolling window counterpart as the increasing window size decreases the relative impact of an individual data point. As an example, here is the -:func:`expanding_mean` output for the previous time series dataset: +:meth:`~Expanding.mean` output for the previous time series dataset: .. ipython:: python :suppress: @@ -502,31 +618,34 @@ relative impact of an individual data point. As an example, here is the .. ipython:: python - ts.plot(style='k--') + s.plot(style='k--') @savefig expanding_mean_frame.png - pd.expanding_mean(ts).plot(style='k') + s.expanding().mean().plot(style='k') + .. _stats.moments.exponentially_weighted: -Exponentially weighted moment functions ---------------------------------------- +Exponentially Weighted Windows +------------------------------ A related set of functions are exponentially weighted versions of several of -the above statistics. A number of expanding EW (exponentially weighted) -functions are provided: +the above statistics. A similar interface to ``.rolling`` and ``.expanding`` is accessed +thru the ``.ewm`` method to receive a :class:`~pandas.core.window.EWM` object. +A number of expanding EW (exponentially weighted) +methods are provided: -.. currentmodule:: pandas +.. currentmodule:: pandas.core.window .. csv-table:: :header: "Function", "Description" :widths: 20, 80 - :func:`ewma`, EW moving average - :func:`ewmvar`, EW moving variance - :func:`ewmstd`, EW moving standard deviation - :func:`ewmcorr`, EW moving correlation - :func:`ewmcov`, EW moving covariance + :meth:`~EWM.mean`, EW moving average + :meth:`~EWM.var`, EW moving variance + :meth:`~EWM.std`, EW moving standard deviation + :meth:`~EWM.corr`, EW moving correlation + :meth:`~EWM.cov`, EW moving covariance In general, a weighted moving average is calculated as @@ -621,20 +740,20 @@ Here is an example for a univariate time series: .. ipython:: python - ts.plot(style='k--') + s.plot(style='k--') @savefig ewma_ex.png - pd.ewma(ts, span=20).plot(style='k') + s.ewm(span=20).mean().plot(style='k') -All the EW functions have a ``min_periods`` argument, which has the same -meaning it does for all the ``expanding_`` and ``rolling_`` functions: +EWM has a ``min_periods`` argument, which has the same +meaning it does for all the ``.expanding`` and ``.rolling`` methods: no output values will be set until at least ``min_periods`` non-null values are encountered in the (expanding) window. (This is a change from versions prior to 0.15.0, in which the ``min_periods`` argument affected only the ``min_periods`` consecutive entries starting at the first non-null value.) -All the EW functions also have an ``ignore_na`` argument, which deterines how +EWM also has an ``ignore_na`` argument, which deterines how intermediate null values affect the calculation of the weights. When ``ignore_na=False`` (the default), weights are calculated based on absolute positions, so that intermediate null values affect the result. @@ -653,7 +772,7 @@ Whereas if ``ignore_na=True``, the weighted average would be calculated as \frac{(1-\alpha) \cdot 3 + 1 \cdot 5}{(1-\alpha) + 1}. -The :func:`ewmvar`, :func:`ewmstd`, and :func:`ewmcov` functions have a ``bias`` argument, +The :meth:`~Ewm.var`, :meth:`~Ewm.std`, and :meth:`~Ewm.cov` functions have a ``bias`` argument, specifying whether the result should contain biased or unbiased statistics. For example, if ``bias=True``, ``ewmvar(x)`` is calculated as ``ewmvar(x) = ewma(x**2) - ewma(x)**2``; diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index a5c1e9b1d2057..b8fc0fff7d984 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -27,10 +27,10 @@ New features .. _whatsnew_0180.enhancements.moments: -Computation moments are now methods -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Window functions are now methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Computational moments have been refactored to be method on ``Series/DataFrame`` objects, rather than top-level functions, which are now deprecated. This allows these window-type functions, to have a similar API to that of ``.groupby``. See the full documentation :ref:`here ` (:issue:`11603`) +Window functions have been refactored to be methods on ``Series/DataFrame`` objects, rather than top-level functions, which are now deprecated. This allows these window-type functions, to have a similar API to that of ``.groupby``. See the full documentation :ref:`here ` (:issue:`11603`) .. ipython:: python @@ -56,24 +56,36 @@ Previous Behavior: 8 7 0.079587 9 8 -0.954504 - New Behavior: +New Behavior: - .. ipython:: python +.. ipython:: python + + r = df.rolling(window=3) - r = df.rolling(window=3) +These show a descriptive repr, with tab-completion of available methods + +.. ipython:: python - # descriptive repr - r + r + +The methods operate on this ``Rolling`` object itself + +.. ipython:: python - # operate on this Rolling object itself - r.mean() + r.mean() - # getitem access - r['A'].mean() +They provide getitem accessors + +.. ipython:: python + + r['A'].mean() + +And multiple aggregations + +.. ipython:: python - # aggregates - r.agg({'A' : {'ra' : ['mean','std']}, - 'B' : {'rb' : ['mean','std']}}) + r.agg({'A' : {'ra' : ['mean','std']}, + 'B' : {'rb' : ['mean','std']}}) .. _whatsnew_0180.enhancements.other: diff --git a/pandas/core/base.py b/pandas/core/base.py index e5a825599f4aa..fafd6b7821dfe 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -7,7 +7,8 @@ from pandas.core import common as com import pandas.core.nanops as nanops import pandas.lib as lib -from pandas.util.decorators import Appender, cache_readonly, deprecate_kwarg +from pandas.util.decorators import (Appender, Substitution, + cache_readonly, deprecate_kwarg) from pandas.core.common import AbstractMethodError _shared_docs = dict() @@ -356,14 +357,19 @@ def _gotitem(self, key, ndim, subset=None): aggregated : DataFrame """ - @Appender(_agg_doc) - def agg(self, func, *args, **kwargs): - return self.aggregate(func, *args, **kwargs) + _see_also_template = """ + +See also +-------- +:func:`pandas.Series.%(name)s` +:func:`pandas.DataFrame.%(name)s` +""" - @Appender(_agg_doc) def aggregate(self, func, *args, **kwargs): raise AbstractMethodError(self) + agg = aggregate + def _aggregate(self, arg, *args, **kwargs): """ provide an implementation for the aggregators diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 38c0a0b147618..61fcf55af6d6a 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -20,7 +20,7 @@ from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series from pandas.core.panel import Panel -from pandas.util.decorators import (cache_readonly, Appender, make_signature, +from pandas.util.decorators import (cache_readonly, Substitution, Appender, make_signature, deprecate_kwarg) import pandas.core.algorithms as algos import pandas.core.common as com @@ -37,6 +37,19 @@ import pandas.algos as _algos import pandas.hashtable as _hash +_doc_template = """ + +Returns +------- +same type as input + +See also +-------- +:func:`pandas.Series.%(name)s` +:func:`pandas.DataFrame.%(name)s` +:func:`pandas.Panel.%(name)s` +""" + # special case to prevent duplicate plots when catching exceptions when # forwarding methods from NDFrames _plotting_methods = frozenset(['plot', 'boxplot', 'hist']) @@ -71,6 +84,12 @@ def _groupby_function(name, alias, npfunc, numeric_only=True, _convert=False): + + _local_template = "Compute %(f)s of group values" + + @Substitution(name='groupby',f=name) + @Appender(_doc_template) + @Appender(_local_template) def f(self): self._set_selection_from_grouper() try: @@ -83,8 +102,7 @@ def f(self): result = result._convert(datetime=True) return result - f.__doc__ = "Compute %s of group values" % name - f.__name__ = name + f.__name__ = name return f @@ -608,50 +626,46 @@ def __iter__(self): """ return self.grouper.get_iterator(self.obj, axis=self.axis) + @Substitution(name='groupby') + @Appender(_doc_template) def apply(self, func, *args, **kwargs): - """ - Apply function and combine results together in an intelligent way. The - split-apply-combine combination rules attempt to be as common sense - based as possible. For example: - - case 1: - group DataFrame - apply aggregation function (f(chunk) -> Series) - yield DataFrame, with group axis having group labels - - case 2: - group DataFrame - apply transform function ((f(chunk) -> DataFrame with same indexes) - yield DataFrame with resulting chunks glued together - - case 3: - group Series - apply function with f(chunk) -> DataFrame - yield DataFrame with result of chunks glued together - - Parameters - ---------- - func : function - - Notes - ----- - See online documentation for full exposition on how to use apply. - - In the current implementation apply calls func twice on the - first group to decide whether it can take a fast or slow code - path. This can lead to unexpected behavior if func has - side-effects, as they will take effect twice for the first - group. - - - See also - -------- - aggregate, transform - - Returns - ------- - applied : type depending on grouped object and function - """ + """Apply function and combine results together in an intelligent way. The +split-apply-combine combination rules attempt to be as common sense +based as possible. For example: + +case 1: +group DataFrame +apply aggregation function (f(chunk) -> Series) +yield DataFrame, with group axis having group labels + +case 2: +group DataFrame +apply transform function ((f(chunk) -> DataFrame with same indexes) +yield DataFrame with resulting chunks glued together + +case 3: +group Series +apply function with f(chunk) -> DataFrame +yield DataFrame with result of chunks glued together + +Parameters +---------- +func : function + +Notes +----- +See online documentation for full exposition on how to use apply. + +In the current implementation apply calls func twice on the +first group to decide whether it can take a fast or slow code +path. This can lead to unexpected behavior if func has +side-effects, as they will take effect twice for the first +group. + + +See also +-------- +aggregate, transform""" func = self._is_builtin_func(func) @wraps(func) @@ -685,15 +699,18 @@ def irow(self, i): FutureWarning, stacklevel=2) return self.nth(i) + @Substitution(name='groupby') + @Appender(_doc_template) def count(self): - """ Compute count of group, excluding missing values """ + """Compute count of group, excluding missing values""" # defined here for API doc raise NotImplementedError + @Substitution(name='groupby') + @Appender(_doc_template) def mean(self): - """ - Compute mean of groups, excluding missing values + """Compute mean of groups, excluding missing values For multiple groupings, the result index will be a MultiIndex """ @@ -706,9 +723,10 @@ def mean(self): f = lambda x: x.mean(axis=self.axis) return self._python_agg_general(f) + @Substitution(name='groupby') + @Appender(_doc_template) def median(self): - """ - Compute median of groups, excluding missing values + """Compute median of groups, excluding missing values For multiple groupings, the result index will be a MultiIndex """ @@ -725,21 +743,33 @@ def f(x): return x.median(axis=self.axis) return self._python_agg_general(f) + @Substitution(name='groupby') + @Appender(_doc_template) def std(self, ddof=1): - """ - Compute standard deviation of groups, excluding missing values + """Compute standard deviation of groups, excluding missing values + +For multiple groupings, the result index will be a MultiIndex + +Parameters +---------- +ddof : integer, default 1 +degrees of freedom""" - For multiple groupings, the result index will be a MultiIndex - """ # todo, implement at cython level? return np.sqrt(self.var(ddof=ddof)) + @Substitution(name='groupby') + @Appender(_doc_template) def var(self, ddof=1): - """ - Compute variance of groups, excluding missing values + """Compute variance of groups, excluding missing values + +For multiple groupings, the result index will be a MultiIndex + +Parameters +---------- +ddof : integer, default 1 +degrees of freedom""" - For multiple groupings, the result index will be a MultiIndex - """ if ddof == 1: return self._cython_agg_general('var') else: @@ -747,19 +777,24 @@ def var(self, ddof=1): f = lambda x: x.var(ddof=ddof) return self._python_agg_general(f) + @Substitution(name='groupby') + @Appender(_doc_template) def sem(self, ddof=1): - """ - Compute standard error of the mean of groups, excluding missing values + """Compute standard error of the mean of groups, excluding missing values + +For multiple groupings, the result index will be a MultiIndex + +Parameters +---------- +ddof : integer, default 1 +degrees of freedom""" - For multiple groupings, the result index will be a MultiIndex - """ return self.std(ddof=ddof)/np.sqrt(self.count()) + @Substitution(name='groupby') + @Appender(_doc_template) def size(self): - """ - Compute group sizes - - """ + """Compute group sizes""" return self.grouper.size() sum = _groupby_function('sum', 'add', np.sum) @@ -771,58 +806,59 @@ def size(self): last = _groupby_function('last', 'last', _last_compat, numeric_only=False, _convert=True) + @Substitution(name='groupby') + @Appender(_doc_template) def ohlc(self): - """ - Compute sum of values, excluding missing values - For multiple groupings, the result index will be a MultiIndex - """ + """Compute sum of values, excluding missing values +For multiple groupings, the result index will be a MultiIndex""" + return self._apply_to_column_groupbys( lambda x: x._cython_agg_general('ohlc')) + @Substitution(name='groupby') + @Appender(_doc_template) def nth(self, n, dropna=None): - """ - Take the nth row from each group if n is an int, or a subset of rows - if n is a list of ints. - - If dropna, will take the nth non-null row, dropna is either - Truthy (if a Series) or 'all', 'any' (if a DataFrame); this is equivalent - to calling dropna(how=dropna) before the groupby. - - Parameters - ---------- - n : int or list of ints - a single nth value for the row or a list of nth values - dropna : None or str, optional - apply the specified dropna operation before counting which row is - the nth row. Needs to be None, 'any' or 'all' - - Examples - -------- - >>> df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) - >>> g = df.groupby('A') - >>> g.nth(0) - A B - 0 1 NaN - 2 5 6 - >>> g.nth(1) - A B - 1 1 4 - >>> g.nth(-1) - A B - 1 1 4 - 2 5 6 - >>> g.nth(0, dropna='any') - B + """Take the nth row from each group if n is an int, or a subset of rows +if n is a list of ints. + +If dropna, will take the nth non-null row, dropna is either +Truthy (if a Series) or 'all', 'any' (if a DataFrame); this is equivalent +to calling dropna(how=dropna) before the groupby. + +Parameters +---------- +n : int or list of ints + a single nth value for the row or a list of nth values +dropna : None or str, optional + apply the specified dropna operation before counting which row is + the nth row. Needs to be None, 'any' or 'all' + +Examples +-------- +>>> df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) +>>> g = df.groupby('A') +>>> g.nth(0) + A B + 0 1 NaN + 2 5 6 +>>> g.nth(1) + A B + 1 1 4 +>>> g.nth(-1) + A B + 1 1 4 + 2 5 6 +>>> g.nth(0, dropna='any') + B + A + 1 4 + 5 6 +>>> g.nth(1, dropna='any') # NaNs denote group exhausted when using dropna + B A - 1 4 - 5 6 - >>> g.nth(1, dropna='any') # NaNs denote group exhausted when using dropna - B - A - 1 NaN - 5 NaN + 1 NaN + 5 NaN""" - """ if isinstance(n, int): nth_values = [n] elif isinstance(n, (set, list, tuple)): @@ -914,80 +950,85 @@ def nth(self, n, dropna=None): return result + @Substitution(name='groupby') + @Appender(_doc_template) def cumcount(self, ascending=True): - """ - Number each item in each group from 0 to the length of that group - 1. + """Number each item in each group from 0 to the length of that group - 1. + +Essentially this is equivalent to + +>>> self.apply(lambda x: Series(np.arange(len(x)), x.index)) + +Parameters +---------- +ascending : bool, default True + If False, number in reverse, from length of group - 1 to 0. + +Examples +-------- + +>>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], + ... columns=['A']) +>>> df + A + 0 a + 1 a + 2 a + 3 b + 4 b + 5 a +>>> df.groupby('A').cumcount() + 0 0 + 1 1 + 2 2 + 3 0 + 4 1 + 5 3 + dtype: int64 +>>> df.groupby('A').cumcount(ascending=False) + 0 3 + 1 2 + 2 1 + 3 1 + 4 0 + 5 0 + dtype: int64""" - Essentially this is equivalent to - - >>> self.apply(lambda x: Series(np.arange(len(x)), x.index)) - - Parameters - ---------- - ascending : bool, default True - If False, number in reverse, from length of group - 1 to 0. - - Examples - -------- - - >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], - ... columns=['A']) - >>> df - A - 0 a - 1 a - 2 a - 3 b - 4 b - 5 a - >>> df.groupby('A').cumcount() - 0 0 - 1 1 - 2 2 - 3 0 - 4 1 - 5 3 - dtype: int64 - >>> df.groupby('A').cumcount(ascending=False) - 0 3 - 1 2 - 2 1 - 3 1 - 4 0 - 5 0 - dtype: int64 - - """ self._set_selection_from_grouper() index = self._selected_obj.index cumcounts = self._cumcount_array(ascending=ascending) return Series(cumcounts, index) + @Substitution(name='groupby') + @Appender(_doc_template) def cumprod(self, axis=0): - """ - Cumulative product for each group - - """ + """Cumulative product for each group""" if axis != 0: return self.apply(lambda x: x.cumprod(axis=axis)) return self._cython_transform('cumprod') + @Substitution(name='groupby') + @Appender(_doc_template) def cumsum(self, axis=0): - """ - Cumulative sum for each group - - """ + """Cumulative sum for each group""" if axis != 0: return self.apply(lambda x: x.cumprod(axis=axis)) return self._cython_transform('cumsum') + @Substitution(name='groupby') + @Appender(_doc_template) def shift(self, periods=1, freq=None, axis=0): - """ - Shift each group by periods observations - """ + """Shift each group by periods observations + +Parameters +---------- +periods : integer, default 1 + number of periods to shift +freq : frequency string +axis : axis to shift, default 0""" if freq is not None or axis != 0: return self.apply(lambda x: x.shift(periods, freq, axis)) @@ -1003,55 +1044,53 @@ def shift(self, periods=1, freq=None, axis=0): return self._wrap_transformed_output(output) + @Substitution(name='groupby') + @Appender(_doc_template) def head(self, n=5): - """ - Returns first n rows of each group. - - Essentially equivalent to ``.apply(lambda x: x.head(n))``, - except ignores as_index flag. - - Examples - -------- - - >>> df = DataFrame([[1, 2], [1, 4], [5, 6]], - columns=['A', 'B']) - >>> df.groupby('A', as_index=False).head(1) - A B - 0 1 2 - 2 5 6 - >>> df.groupby('A').head(1) - A B - 0 1 2 - 2 5 6 - - """ + """Returns first n rows of each group. + +Essentially equivalent to ``.apply(lambda x: x.head(n))``, +except ignores as_index flag. + +Examples +-------- + +>>> df = DataFrame([[1, 2], [1, 4], [5, 6]], + columns=['A', 'B']) +>>> df.groupby('A', as_index=False).head(1) + A B + 0 1 2 + 2 5 6 +>>> df.groupby('A').head(1) + A B + 0 1 2 + 2 5 6""" obj = self._selected_obj in_head = self._cumcount_array() < n head = obj[in_head] return head + @Substitution(name='groupby') + @Appender(_doc_template) def tail(self, n=5): - """ - Returns last n rows of each group - - Essentially equivalent to ``.apply(lambda x: x.tail(n))``, - except ignores as_index flag. - - Examples - -------- - - >>> df = DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]], - columns=['A', 'B']) - >>> df.groupby('A').tail(1) - A B - 1 a 2 - 3 b 2 - >>> df.groupby('A').head(1) - A B - 0 a 1 - 2 b 1 - - """ + """Returns last n rows of each group + +Essentially equivalent to ``.apply(lambda x: x.tail(n))``, +except ignores as_index flag. + +Examples +-------- + +>>> df = DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]], + columns=['A', 'B']) +>>> df.groupby('A').tail(1) + A B + 1 a 2 + 3 b 2 +>>> df.groupby('A').head(1) + A B + 0 a 1 + 2 b 1""" obj = self._selected_obj rng = np.arange(0, -self.grouper._max_groupsize, -1, dtype='int64') in_tail = self._cumcount_array(rng, ascending=False) > -n @@ -1059,8 +1098,7 @@ def tail(self, n=5): return tail def _cumcount_array(self, arr=None, ascending=True): - """ - arr is where cumcount gets its values from + """arr is where cumcount gets its values from note: this is currently implementing sort=False (though the default is sort=True) for groupby in general @@ -2389,6 +2427,8 @@ def aggregate(self, func_or_funcs, *args, **kwargs): return ret + agg = aggregate + def _aggregate_multiple_funcs(self, arg): if isinstance(arg, dict): columns = list(arg.keys()) @@ -2861,7 +2901,6 @@ def _post_process_cython_aggregate(self, obj): obj = obj.swapaxes(0, 1) return obj - @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): _level = kwargs.pop('_level',None) @@ -2891,6 +2930,8 @@ def aggregate(self, arg, *args, **kwargs): return result._convert(datetime=True) + agg = aggregate + def _aggregate_generic(self, func, *args, **kwargs): if self.grouper.nkeys != 1: raise AssertionError('Number of keys must be 1') @@ -3337,6 +3378,14 @@ class DataFrameGroupBy(NDFrameGroupBy): _block_agg_axis = 1 + @Substitution(name='groupby') + @Appender(SelectionMixin._agg_doc) + @Appender(SelectionMixin._see_also_template) + def aggregate(self, arg, *args, **kwargs): + return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs) + + agg = aggregate + def _gotitem(self, key, ndim, subset=None): """ sub-classes to define @@ -3500,6 +3549,14 @@ def count(self): class PanelGroupBy(NDFrameGroupBy): + @Substitution(name='groupby') + @Appender(SelectionMixin._agg_doc) + @Appender(SelectionMixin._see_also_template) + def aggregate(self, arg, *args, **kwargs): + return super(PanelGroupBy, self).aggregate(arg, *args, **kwargs) + + agg = aggregate + def _iterate_slices(self): if self.axis == 0: # kludge diff --git a/pandas/core/window.py b/pandas/core/window.py index 208a9d862927a..2c311a05fe571 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -19,6 +19,19 @@ from pandas import compat from pandas.util.decorators import Substitution, Appender +_shared_docs = dict() +_doc_template = """ + +Returns +------- +same type as input + +See also +-------- +:func:`pandas.Series.%(name)s` +:func:`pandas.DataFrame.%(name)s` +""" + class _Window(PandasObject, SelectionMixin): _attributes = ['window','min_periods','freq','center','how','win_type','axis'] exclusions = set() @@ -190,11 +203,14 @@ def _convert_freq(self): if self.freq is not None and isinstance(self.obj, (Series, DataFrame)): self.obj = self.obj.resample(self.freq, how=self.how) - @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): result, how = self._aggregate(arg, *args, **kwargs) + if result is None: + return self.apply(arg, args=args, kwargs=kwargs) return result + agg = aggregate + class Window(_Window): def _prep_window(self, **kwargs): @@ -257,9 +273,21 @@ def f(arg, *args, **kwargs): return self._wrap_results(results, blocks) + @Substitution(name='rolling') + @Appender(SelectionMixin._agg_doc) + @Appender(SelectionMixin._see_also_template) + def aggregate(self, arg, *args, **kwargs): + return super(Window, self).aggregate(arg, *args, **kwargs) + + agg = aggregate + + @Substitution(name='rolling') + @Appender(_doc_template) def sum(self, **kwargs): return self._apply_window(mean=False, **kwargs) + @Substitution(name='rolling') + @Appender(_doc_template) def mean(self, **kwargs): return self._apply_window(mean=True, **kwargs) @@ -286,7 +314,6 @@ def _apply(self, func, window=None, center=None, check_minp=None, how=None, **kw ------- y : type of input """ - if center is None: center = self.center if window is None: @@ -340,17 +367,10 @@ def calc(x): return self._wrap_results(results, blocks) -class Rolling(_Rolling): +class _Rolling_and_Expanding(_Rolling): + _shared_docs['count'] = """%(name)s count of number of non-NaN observations inside provided window.""" def count(self): - """ - Rolling count of number of non-NaN observations inside provided window. - - Returns - ------- - same type as input - """ - obj = self._selected_obj window = self._get_window() window = min(window, len(obj)) if not self.center else window @@ -366,16 +386,15 @@ def count(self): result[result.isnull()] = 0 return result - def apply(self, func, args=(), kwargs={}): - """ - Moving function apply + _shared_docs['apply'] = """%(name)s function apply - Parameters - ---------- - func : function - Must produce a single value from an ndarray input - *args and **kwargs are passed to the function - """ +Parameters +---------- +func : function + Must produce a single value from an ndarray input +*args and **kwargs are passed to the function""" + def apply(self, func, args=(), kwargs={}): + _level = kwargs.pop('_level',None) window = self._get_window() offset = _offset(window, self.center) def f(arg, window, min_periods): @@ -384,62 +403,49 @@ def f(arg, window, min_periods): return self._apply(f, center=False) + _shared_docs['sum'] = """%(name)s sum""" def sum(self): - """ - Moving sum - """ return self._apply('roll_sum') - def max(self, how='max'): - """ - Moving max + _shared_docs['max'] = """%(name)s maximum - Parameters - ---------- - how : string, default max - Method for down- or re-sampling - """ +Parameters +---------- +how : string, default max + Method for down- or re-sampling""" + def max(self, how='max'): return self._apply('roll_max', how=how) - def min(self, how='min'): - """ - Moving min + _shared_docs['min'] = """%(name)s minimum - Parameters - ---------- - how : string, default min - Method for down- or re-sampling - """ +Parameters +---------- +how : string, default min + Method for down- or re-sampling""" + def min(self, how='min'): return self._apply('roll_min', how=how) + _shared_docs['mean'] = """%(name)s mean""" def mean(self): - """ - Moving mean - """ return self._apply('roll_mean') - def median(self, how='median'): - """ - Moving median - - Parameters - ---------- - how : string, default median - Method for down- or re-sampling - """ + _shared_docs['median'] = """%(name)s median +Parameters +---------- +how : string, default median + Method for down- or re-sampling""" + def median(self, how='median'): return self._apply('roll_median_c', how=how) - def std(self, ddof=1): - """ - Moving standard deviation + _shared_docs['std'] = """%(name)s standard deviation - Parameters - ---------- - ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - """ +Parameters +---------- +ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""" + def std(self, ddof=1): window = self._get_window() def f(arg, *args, **kwargs): minp = _require_min_periods(1)(self.min_periods, window) @@ -447,43 +453,35 @@ def f(arg, *args, **kwargs): return self._apply(f, check_minp=_require_min_periods(1)) - def var(self, ddof=1): - """ - Moving variance + _shared_docs['var'] = """%(name)s variance - Parameters - ---------- - ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - """ +Parameters +---------- +ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""" + def var(self, ddof=1): return self._apply('roll_var', check_minp=_require_min_periods(1), ddof=ddof) + _shared_docs['skew'] = """Unbiased %(name)s skewness""" def skew(self): - """ - Unbiased moving skewness - """ return self._apply('roll_skew', check_minp=_require_min_periods(3)) + _shared_docs['kurt'] = """Unbiased %(name)s kurtosis""" def kurt(self): - """ - Unbiased moving kurtosis - """ return self._apply('roll_kurt', check_minp=_require_min_periods(4)) - def quantile(self, quantile): - """ - Rolling quantile + _shared_docs['quantile'] = """%(name)s quantile - Parameters - ---------- - quantile : float - 0 <= quantile <= 1 - """ +Parameters +---------- +quantile : float +0 <= quantile <= 1""" + def quantile(self, quantile): window = self._get_window() def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, window) @@ -491,24 +489,22 @@ def f(arg, *args, **kwargs): return self._apply(f) + _shared_docs['cov'] = """%(name)s sample covariance + +Parameters +---------- +other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output +pairwise : bool, default False + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. +ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""" def cov(self, other=None, pairwise=False, ddof=1): - """ - Moving sample covariance - - Parameters - ---------- - other : Series, DataFrame, or ndarray, optional - if not supplied then will default to self and produce pairwise output - pairwise : bool, default False - If False then only matching columns between self and other will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. - ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - """ if other is None: other = self._selected_obj pairwise = True @@ -522,22 +518,20 @@ def _get_cov(X, Y): return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) + _shared_docs['corr'] = """ +%(name)s sample correlation + +Parameters +---------- +other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output +pairwise : bool, default False + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used.""" def corr(self, other=None, pairwise=False): - """ - Moving sample correlation - - Parameters - ---------- - other : Series, DataFrame, or ndarray, optional - if not supplied then will default to self and produce pairwise output - pairwise : bool, default False - If False then only matching columns between self and other will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. - """ - if other is None: other = self._selected_obj pairwise = True @@ -557,7 +551,101 @@ def _get_corr(a, b): return a.cov(b) / (a.std() * b.std()) return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)) -class Expanding(Rolling): +class Rolling(_Rolling_and_Expanding): + + @Substitution(name='rolling') + @Appender(SelectionMixin._agg_doc) + @Appender(SelectionMixin._see_also_template) + def aggregate(self, arg, *args, **kwargs): + return super(Rolling, self).aggregate(arg, *args, **kwargs) + + agg = aggregate + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['count']) + def count(self): + return super(Rolling, self).count() + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['apply']) + def apply(self, func, args=(), kwargs={}): + return super(Rolling, self).apply(func, args=args, kwargs=kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['sum']) + def sum(self): + return super(Rolling, self).sum() + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['max']) + def max(self, how='max'): + return super(Rolling, self).max(how=how) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['min']) + def min(self, how='min'): + return super(Rolling, self).min() + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['mean']) + def mean(self): + return super(Rolling, self).mean() + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['median']) + def median(self, how='median'): + return super(Rolling, self).median(how=how) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['std']) + def std(self, ddof=1): + return super(Rolling, self).std(ddof=ddof) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['var']) + def var(self, ddof=1): + return super(Rolling, self).var(ddof=ddof) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['skew']) + def skew(self): + return super(Rolling, self).skew() + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['kurt']) + def kurt(self): + return super(Rolling, self).kurt() + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['quantile']) + def quantile(self, quantile): + return super(Rolling, self).quantile(quantile=quantile) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['cov']) + def cov(self, other=None, pairwise=False, ddof=1): + return super(Rolling, self).cov(other=other, pairwise=pairwise, ddof=ddof) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['corr']) + def corr(self, other=None, pairwise=False): + return super(Rolling, self).corr(other=other, pairwise=pairwise) + +class Expanding(_Rolling_and_Expanding): _attributes = ['min_periods','freq','center','how','axis'] @property @@ -570,6 +658,98 @@ def _get_window(self, other=None): return max(len(obj), self.min_periods) if self.min_periods else len(obj) return max((len(obj) + len(obj)), self.min_periods) if self.min_periods else (len(obj) + len(obj)) + @Substitution(name='expanding') + @Appender(SelectionMixin._agg_doc) + @Appender(SelectionMixin._see_also_template) + def aggregate(self, arg, *args, **kwargs): + return super(Expanding, self).aggregate(arg, *args, **kwargs) + + agg = aggregate + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['count']) + def count(self): + return super(Expanding, self).count() + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['apply']) + def apply(self, func, args=(), kwargs={}): + return super(Expanding, self).apply(func, args=args, kwargs=kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['sum']) + def sum(self): + return super(Expanding, self).sum() + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['max']) + def max(self, how='max'): + return super(Expanding, self).max(how=how) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['min']) + def min(self, how='min'): + return super(Expanding, self).min() + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['mean']) + def mean(self): + return super(Expanding, self).mean() + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['median']) + def median(self, how='median'): + return super(Expanding, self).median(how=how) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['std']) + def std(self, ddof=1): + return super(Expanding, self).std(ddof=ddof) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['var']) + def var(self, ddof=1): + return super(Expanding, self).var(ddof=ddof) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['skew']) + def skew(self): + return super(Expanding, self).skew() + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['kurt']) + def kurt(self): + return super(Expanding, self).kurt() + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['quantile']) + def quantile(self, quantile): + return super(Expanding, self).quantile(quantile=quantile) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['cov']) + def cov(self, other=None, pairwise=False, ddof=1): + return super(Expanding, self).cov(other=other, pairwise=pairwise, ddof=ddof) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['corr']) + def corr(self, other=None, pairwise=False): + return super(Expanding, self).corr(other=other, pairwise=pairwise) + class EWM(_Rolling): _attributes = ['com','min_periods','freq','adjust','how','ignore_na','axis'] @@ -589,9 +769,16 @@ def __init__(self, obj, com=None, span=None, halflife=None, min_periods=0, freq= def _constructor(self): return EWM + @Substitution(name='ewm') + @Appender(SelectionMixin._agg_doc) + @Appender(SelectionMixin._see_also_template) + def aggregate(self, arg, *args, **kwargs): + return super(EWM, self).aggregate(arg, *args, **kwargs) + + agg = aggregate + def _apply(self, func, **kwargs): - """ - Rolling statistical measure using supplied function. Designed to be + """Rolling statistical measure using supplied function. Designed to be used with passed-in Cython array-based functions. Parameters @@ -628,15 +815,16 @@ def func(arg): return self._wrap_results(results, blocks) + @Substitution(name='ewm') + @Appender(_doc_template) def mean(self): - """ - exponential weighted moving average - """ + """exponential weighted moving average""" return self._apply('ewma') + @Substitution(name='ewm') + @Appender(_doc_template) def std(self, bias=False): - """ - exponential weighted moving stddev + """exponential weighted moving stddev Parameters ---------- @@ -646,9 +834,10 @@ def std(self, bias=False): return _zsqrt(self.var(bias=bias)) vol=std + @Substitution(name='ewm') + @Appender(_doc_template) def var(self, bias=False): - """ - exponential weighted moving average + """exponential weighted moving average Parameters ---------- @@ -666,9 +855,10 @@ def f(arg): return self._apply(f) + @Substitution(name='ewm') + @Appender(_doc_template) def cov(self, other=None, pairwise=False, bias=False): - """ - exponential weighted sample covariance + """exponential weighted sample covariance Parameters ---------- @@ -702,9 +892,10 @@ def _get_cov(X, Y): return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) + @Substitution(name='ewm') + @Appender(_doc_template) def corr(self, other=None, pairwise=False): - """ - exponential weighted sample correlation + """exponential weighted sample correlation Parameters ---------- diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 1aa9ccf4b457d..41d2c8fa88aa1 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -160,6 +160,14 @@ def compare(result, expected): ('B','rb','mean'),('B','rb','std')]) compare(result, expected) + + # passed lambda + result = r.agg({'A' : np.sum, + 'B' : lambda x: np.std(x, ddof=1)}) + rcustom = r['B'].apply(lambda x: np.std(x,ddof=1)) + expected = pd.concat([a_sum,rcustom],axis=1) + compare(result, expected) + class TestMoments(Base): def setUp(self): From e47bd995435ad56c5cdc3df26857847e55a88a07 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 22 Nov 2015 10:40:05 -0500 Subject: [PATCH 4/8] DEPR: removal of expanding_corr_pairwise/rolling_cor_pairwise, xref #4950 --- doc/source/whatsnew/v0.18.0.txt | 4 ++-- pandas/stats/moments.py | 30 ++---------------------------- 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index b8fc0fff7d984..7d5c2d4a90959 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -220,8 +220,8 @@ Deprecations Removal of prior version deprecations/changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - +Removal of ``rolling_corr_parwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`) +Removal of ``expanding_corr_parwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index c6cff614de9b1..71c1ae3002e5c 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -12,13 +12,13 @@ 'rolling_sum', 'rolling_mean', 'rolling_std', 'rolling_cov', 'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt', 'rolling_quantile', 'rolling_median', 'rolling_apply', - 'rolling_corr_pairwise', 'rolling_window', + 'rolling_window', 'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov', 'expanding_count', 'expanding_max', 'expanding_min', 'expanding_sum', 'expanding_mean', 'expanding_std', 'expanding_cov', 'expanding_corr', 'expanding_var', 'expanding_skew', 'expanding_kurt', 'expanding_quantile', - 'expanding_median', 'expanding_apply', 'expanding_corr_pairwise'] + 'expanding_median', 'expanding_apply' ] #------------------------------------------------------------------------------ # Docs @@ -272,20 +272,6 @@ def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs): func_kw=['other','pairwise'], **kwargs) -@Substitution("Deprecated. Use rolling_corr(..., pairwise=True) instead.\n\n" - "Pairwise moving sample correlation", _pairwise_arg, - _roll_kw%'None', _pairwise_retval, _roll_notes) -@Appender(_doc_template) -def rolling_corr_pairwise(df1, df2=None, window=None, min_periods=None, - freq=None, center=False): - import warnings - msg = "rolling_corr_pairwise is deprecated, use rolling_corr(..., pairwise=True)" - warnings.warn(msg, FutureWarning, stacklevel=2) - return rolling_corr(df1, df2, window=window, min_periods=min_periods, - freq=freq, center=center, - pairwise=True) - - #------------------------------------------------------------------------------ # Exponential moving moments @@ -744,18 +730,6 @@ def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): freq=freq, func_kw=['other','pairwise','ddof']) -@Substitution("Deprecated. Use expanding_corr(..., pairwise=True) instead.\n\n" - "Pairwise expanding sample correlation", _pairwise_arg, - _expanding_kw, _pairwise_retval, "") -@Appender(_doc_template) -def expanding_corr_pairwise(df1, df2=None, min_periods=1, freq=None): - import warnings - msg = "expanding_corr_pairwise is deprecated, use expanding_corr(..., pairwise=True)" - warnings.warn(msg, FutureWarning, stacklevel=2) - return expanding_corr(df1, df2, min_periods=min_periods, - freq=freq, pairwise=True) - - def expanding_apply(arg, func, min_periods=1, freq=None, args=(), kwargs={}): """Generic expanding function application. From 3156395c4a2fbc82372b8ce43566dbe97bc6ba66 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 22 Nov 2015 10:37:31 -0500 Subject: [PATCH 5/8] DEPR: deprecate pd.rolling_*, pd.expanding_*, pd.ewm* --- doc/source/api.rst | 11 +- doc/source/computation.rst | 17 +- doc/source/conf.py | 11 +- doc/source/cookbook.rst | 4 +- doc/source/groupby.rst | 2 +- doc/source/whatsnew/v0.18.0.txt | 26 +- pandas/core/generic.py | 13 +- pandas/core/window.py | 463 ++++++++++--------- pandas/stats/moments.py | 41 +- pandas/tests/test_window.py | 794 ++++++++++++++++---------------- 10 files changed, 732 insertions(+), 650 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index eb683fff3ac47..3c7ca6d5c2326 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1505,9 +1505,9 @@ Window ------ .. currentmodule:: pandas.core.window -Rolling objects are returned by rolling calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. -Expanding objects are returned by rolling calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc. -EWM objects are returned by rolling calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc. +Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. +Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc. +EWM objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc. Standard moving window functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1526,13 +1526,13 @@ Standard moving window functions Rolling.min Rolling.max Rolling.corr - Rolling.corr_pairwise Rolling.cov Rolling.skew Rolling.kurt Rolling.apply Rolling.quantile - Rolling.window + Window.mean + Window.sum .. _api.functions_expanding: @@ -1553,7 +1553,6 @@ Standard expanding window functions Expanding.min Expanding.max Expanding.corr - Expanding.corr_pairwise Expanding.cov Expanding.skew Expanding.kurt diff --git a/doc/source/computation.rst b/doc/source/computation.rst index bf593acd73537..d07257bd2e588 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -203,8 +203,12 @@ Window Functions .. warning:: - Prior to version 0.18.0, these were module level functions that have been deprecated. - You can see the previous documentation + Prior to version 0.18.0, ``pd.rolling_*``, ``pd.expanding_*``, and ``pd.ewm*`` were module level + functions and are now deprecated and replaced by the corresponding method call. + + The deprecation warning will show the new syntax, see an example :ref:`here ` + + You can view the previous documentation `here `__ For working with data, a number of windows functions are provided for @@ -242,10 +246,6 @@ accept the following arguments: result is NA) - ``freq``: optionally specify a :ref:`frequency string ` or :ref:`DateOffset ` to pre-conform the data to. -- ``how``: optionally specify method for down or re-sampling. Default is - is ``min`` for :meth:`~Rolling.min`, ``max`` for :meth:`~Rolling.max`, ``median`` for - :meth:`~Rolling.median`, and ``mean`` for all other rolling functions. See - :meth:`DataFrame.resample`'s how argument for more information. We can then call functions on these ``rolling`` objects. Which return like-indexed objects: @@ -323,7 +323,7 @@ compute the mean absolute deviation on a rolling basis: Rolling Windows ~~~~~~~~~~~~~~~ -The :meth:`~Window.mean`, and :meth:`~Window.sum` functions performs a generic rolling window computation +The :meth:`~Window.mean`, and :meth:`~Window.sum` functions perform a generic rolling window computation on the input data. The weights used in the window are specified by the ``win_type`` keyword. The list of recognized types are: @@ -361,6 +361,9 @@ For some windowing functions, additional parameters must be specified: ser.rolling(window=5, win_type='gaussian').mean(std=0.1) +Centering Windows +~~~~~~~~~~~~~~~~~ + By default the labels are set to the right edge of the window, but a ``center`` keyword is available so the labels can be set at the center. This keyword is available in other rolling functions as well. diff --git a/doc/source/conf.py b/doc/source/conf.py index 23095b7f4d24b..709d9b32984c0 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -224,16 +224,7 @@ 'pandas.io.pickle.read_pickle', 'pandas.io.pytables.HDFStore.append', 'pandas.io.pytables.HDFStore.get', 'pandas.io.pytables.HDFStore.put', 'pandas.io.pytables.HDFStore.select', 'pandas.io.pytables.read_hdf', 'pandas.io.sql.read_sql', 'pandas.io.sql.read_frame', 'pandas.io.sql.write_frame', - 'pandas.io.stata.read_stata', 'pandas.stats.moments.ewma', 'pandas.stats.moments.ewmcorr', - 'pandas.stats.moments.ewmcov', 'pandas.stats.moments.ewmstd', 'pandas.stats.moments.ewmvar', - 'pandas.stats.moments.expanding_apply', 'pandas.stats.moments.expanding_corr', 'pandas.stats.moments.expanding_count', - 'pandas.stats.moments.expanding_cov', 'pandas.stats.moments.expanding_kurt', 'pandas.stats.moments.expanding_mean', - 'pandas.stats.moments.expanding_median', 'pandas.stats.moments.expanding_quantile', 'pandas.stats.moments.expanding_skew', - 'pandas.stats.moments.expanding_std', 'pandas.stats.moments.expanding_sum', 'pandas.stats.moments.expanding_var', - 'pandas.stats.moments.rolling_apply', 'pandas.stats.moments.rolling_corr', 'pandas.stats.moments.rolling_count', - 'pandas.stats.moments.rolling_cov', 'pandas.stats.moments.rolling_kurt', 'pandas.stats.moments.rolling_mean', - 'pandas.stats.moments.rolling_median', 'pandas.stats.moments.rolling_quantile', 'pandas.stats.moments.rolling_skew', - 'pandas.stats.moments.rolling_std', 'pandas.stats.moments.rolling_sum', 'pandas.stats.moments.rolling_var'] + 'pandas.io.stata.read_stata'] html_additional_pages = {'generated/' + page: 'api_redirect.html' for page in moved_api_pages} diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 92ed85071ecb8..4d6a7457bcf90 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -517,7 +517,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to def Red(x): return functools.reduce(CumRet,x,1.0) - pd.expanding_apply(S, Red) + S.expanding().apply(Red) `Replacing some values with mean of the rest of a group @@ -639,7 +639,7 @@ Create a list of dataframes, split using a delineation based on logic included i df = pd.DataFrame(data={'Case' : ['A','A','A','B','A','A','B','A','A'], 'Data' : np.random.randn(9)}) - dfs = list(zip(*df.groupby(pd.rolling_median((1*(df['Case']=='B')).cumsum(),3,True))))[-1] + dfs = list(zip(*df.groupby((1*(df['Case']=='B')).cumsum().rolling(window=3,min_periods=1).median())))[-1] dfs[0] dfs[1] diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 4ae2ee1927d1a..61f87ebe0db1b 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -519,7 +519,7 @@ to standardize the data within each group: index = pd.date_range('10/1/1999', periods=1100) ts = pd.Series(np.random.normal(0.5, 2, 1100), index) - ts = pd.rolling_mean(ts, 100, 100).dropna() + ts = ts.rolling(window=100,min_periods=100).mean().dropna() ts.head() ts.tail() diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 7d5c2d4a90959..2a568582dc7c3 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -210,18 +210,40 @@ Other API Changes Deprecations ^^^^^^^^^^^^ +.. _whatsnew_0180.window_deprecations: +- Function ``pd.rolling_*``, ``pd.expanding_*``, and ``pd.ewm*`` are deprecated and replaced by the corresponding method call. Note that + the new suggested syntax includes all of the arguments (even if default) (:issue:`11603`) + .. code-block:: python + In [1]: s = Series(range(3)) + In [2]: pd.rolling_mean(s,window=2,min_periods=1) + FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with + Series.rolling(min_periods=1,window=2,center=False).mean() + Out[2]: + 0 0.0 + 1 0.5 + 2 1.5 + dtype: float64 + + In [3]: pd.rolling_cov(s, s, window=2) + FutureWarning: pd.rolling_cov is deprecated for Series and will be removed in a future version, replace with + Series.rolling(window=2).cov(other=) + Out[3]: + 0 NaN + 1 0.5 + 2 0.5 + dtype: float64 .. _whatsnew_0180.prior_deprecations: Removal of prior version deprecations/changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Removal of ``rolling_corr_parwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`) -Removal of ``expanding_corr_parwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``rolling_corr_parwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``expanding_corr_parwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e8abc96aab858..d3cd0840782b4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4741,26 +4741,25 @@ def _add_series_or_dataframe_operations(cls): @Appender(rwindow.rolling.__doc__) def rolling(self, window, min_periods=None, freq=None, center=False, - how=None, win_type=None, axis=0): + win_type=None, axis=0): axis = self._get_axis_number(axis) return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, center=center, - how=how, win_type=win_type, axis=axis) + win_type=win_type, axis=axis) cls.rolling = rolling @Appender(rwindow.expanding.__doc__) - def expanding(self, min_periods=None, freq=None, center=False, - how=None, axis=0): + def expanding(self, min_periods=1, freq=None, center=False, axis=0): axis = self._get_axis_number(axis) return rwindow.expanding(self, min_periods=min_periods, freq=freq, center=center, - how=how, axis=axis) + axis=axis) cls.expanding = expanding @Appender(rwindow.ewm.__doc__) def ewm(self, com=None, span=None, halflife=None, min_periods=0, freq=None, - adjust=True, how=None, ignore_na=False, axis=0): + adjust=True, ignore_na=False, axis=0): axis = self._get_axis_number(axis) return rwindow.ewm(self, com=com, span=span, halflife=halflife, min_periods=min_periods, - freq=freq, adjust=adjust, how=how, ignore_na=ignore_na, axis=axis) + freq=freq, adjust=adjust, ignore_na=ignore_na, axis=axis) cls.ewm = ewm def _doc_parms(cls): diff --git a/pandas/core/window.py b/pandas/core/window.py index 2c311a05fe571..09dc528f64cbe 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -33,21 +33,19 @@ """ class _Window(PandasObject, SelectionMixin): - _attributes = ['window','min_periods','freq','center','how','win_type','axis'] + _attributes = ['window','min_periods','freq','center','win_type','axis'] exclusions = set() def __init__(self, obj, window=None, min_periods=None, freq=None, center=False, - how=None, win_type=None, axis=0): + win_type=None, axis=0): self.blocks = [] self.obj = obj self.window = window self.min_periods = min_periods self.freq = freq self.center = center - self.how = how self.win_type = win_type self.axis = axis - self._convert_freq() self._setup() @property @@ -57,9 +55,18 @@ def _constructor(self): def _setup(self): pass - def _create_blocks(self): - """ split data into blocks """ - return self._selected_obj.as_blocks(copy=False).values() + def _convert_freq(self, how=None): + """ resample according to the how, return a new object """ + obj = self._selected_obj + if self.freq is not None and isinstance(obj, (com.ABCSeries, com.ABCDataFrame)): + obj = obj.resample(self.freq, how=how) + return obj + + def _create_blocks(self, how): + """ split data into blocks & return conformed data """ + + obj = self._convert_freq(how) + return obj.as_blocks(copy=False).values(), obj def _gotitem(self, key, ndim, subset=None): """ @@ -119,7 +126,7 @@ def _shallow_copy(self, obj=None, **kwargs): kwargs[attr] = getattr(self,attr) return self._constructor(obj, **kwargs) - def _prep_values(self, values=None, kill_inf=True): + def _prep_values(self, values=None, kill_inf=True, how=None): if values is None: values = getattr(self._selected_obj,'values',self._selected_obj) @@ -143,10 +150,11 @@ def _prep_values(self, values=None, kill_inf=True): return values - def _wrap_result(self, result, block=None): + def _wrap_result(self, result, block=None, obj=None): """ wrap a single result """ - obj = self._selected_obj + if obj is None: + obj = self._selected_obj if isinstance(result, np.ndarray): # coerce if necessary @@ -163,14 +171,21 @@ def _wrap_result(self, result, block=None): columns=block.columns) return result - def _wrap_results(self, results, blocks): - """ wrap lists of results, blocks """ + def _wrap_results(self, results, blocks, obj): + """ + wrap the results + + Paramters + --------- + results : list of ndarrays + blocks : list of blocks + obj : conformed data (may be resampled) + """ - obj = self._selected_obj final = [] for result, block in zip(results, blocks): - result = self._wrap_result(result, block) + result = self._wrap_result(result, block=block, obj=obj) if result.ndim == 1: return result final.append(result) @@ -196,13 +211,6 @@ def _center_window(self, result, window): result = np.copy(result[tuple(lead_indexer)]) return result - def _convert_freq(self): - """ conform to our freq """ - - from pandas import Series, DataFrame - if self.freq is not None and isinstance(self.obj, (Series, DataFrame)): - self.obj = self.obj.resample(self.freq, how=self.how) - def aggregate(self, arg, *args, **kwargs): result, how = self._aggregate(arg, *args, **kwargs) if result is None: @@ -212,6 +220,58 @@ def aggregate(self, arg, *args, **kwargs): agg = aggregate class Window(_Window): + """ + Provides rolling transformations. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + window : int + Size of the moving window. This is the number of observations used for + calculating the statistic. + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : string or DateOffset object, optional (default None) + Frequency to conform the data to before computing the statistic. Specified + as a frequency string or DateOffset object. + center : boolean, default False + Set the labels at the center of the window. + win_type : string, default None + prove a window type, see the notes below + axis : int, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + + The recognized window types are: + + * ``boxcar`` + * ``triang`` + * ``blackman`` + * ``hamming`` + * ``bartlett`` + * ``parzen`` + * ``bohman`` + * ``blackmanharris`` + * ``nuttall`` + * ``barthann`` + * ``kaiser`` (needs beta) + * ``gaussian`` (needs std) + * ``general_gaussian`` (needs power, width) + * ``slepian`` (needs width). + """ def _prep_window(self, **kwargs): """ provide validation for our window type, return the window """ @@ -229,7 +289,7 @@ def _prep_window(self, **kwargs): raise ValueError('Invalid window %s' % str(window)) - def _apply_window(self, mean=True, **kwargs): + def _apply_window(self, mean=True, how=None, **kwargs): """ Applies a moving window of type ``window_type`` on the data. @@ -237,6 +297,8 @@ def _apply_window(self, mean=True, **kwargs): ---------- mean : boolean, default True If True computes weighted mean, else weighted sum + how : string, default to None + how to resample Returns ------- @@ -246,7 +308,8 @@ def _apply_window(self, mean=True, **kwargs): window = self._prep_window(**kwargs) center = self.center - results, blocks = [], self._create_blocks() + blocks, obj = self._create_blocks(how=how) + results = [] for b in blocks: try: values = self._prep_values(b.values) @@ -271,7 +334,7 @@ def f(arg, *args, **kwargs): result = self._center_window(result, window) results.append(result) - return self._wrap_results(results, blocks) + return self._wrap_results(results, blocks, obj) @Substitution(name='rolling') @Appender(SelectionMixin._agg_doc) @@ -309,6 +372,7 @@ def _apply(self, func, window=None, center=None, check_minp=None, how=None, **kw center : boolean, default to self.center check_minp : function, default to _use_window how : string, default to None + how to resample Returns ------- @@ -322,7 +386,8 @@ def _apply(self, func, window=None, center=None, check_minp=None, how=None, **kw if check_minp is None: check_minp = _use_window - results, blocks = [], self._create_blocks() + blocks, obj = self._create_blocks(how=how) + results = [] for b in blocks: try: values = self._prep_values(b.values) @@ -365,13 +430,13 @@ def calc(x): results.append(result) - return self._wrap_results(results, blocks) + return self._wrap_results(results, blocks, obj) class _Rolling_and_Expanding(_Rolling): _shared_docs['count'] = """%(name)s count of number of non-NaN observations inside provided window.""" def count(self): - obj = self._selected_obj + obj = self._convert_freq() window = self._get_window() window = min(window, len(obj)) if not self.center else window try: @@ -495,7 +560,7 @@ def f(arg, *args, **kwargs): ---------- other : Series, DataFrame, or ndarray, optional if not supplied then will default to self and produce pairwise output -pairwise : bool, default False +pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output @@ -504,10 +569,10 @@ def f(arg, *args, **kwargs): ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of elements.""" - def cov(self, other=None, pairwise=False, ddof=1): + def cov(self, other=None, pairwise=None, ddof=1): if other is None: other = self._selected_obj - pairwise = True + pairwise = True if pairwise is None else pairwise # only default unset other = self._shallow_copy(other) window = self._get_window(other) @@ -525,16 +590,16 @@ def _get_cov(X, Y): ---------- other : Series, DataFrame, or ndarray, optional if not supplied then will default to self and produce pairwise output -pairwise : bool, default False +pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output will be a Panel in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used.""" - def corr(self, other=None, pairwise=False): + def corr(self, other=None, pairwise=None): if other is None: other = self._selected_obj - pairwise = True + pairwise = True if pairwise is None else pairwise # only default unset other = self._shallow_copy(other) window = self._get_window(other) @@ -552,6 +617,39 @@ def _get_corr(a, b): return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)) class Rolling(_Rolling_and_Expanding): + """ + Provides rolling transformations. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + window : int + Size of the moving window. This is the number of observations used for + calculating the statistic. + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : string or DateOffset object, optional (default None) + Frequency to conform the data to before computing the statistic. Specified + as a frequency string or DateOffset object. + center : boolean, default False + Set the labels at the center of the window. + axis : int, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + """ @Substitution(name='rolling') @Appender(SelectionMixin._agg_doc) @@ -589,7 +687,7 @@ def max(self, how='max'): @Appender(_doc_template) @Appender(_shared_docs['min']) def min(self, how='min'): - return super(Rolling, self).min() + return super(Rolling, self).min(how=how) @Substitution(name='rolling') @Appender(_doc_template) @@ -636,17 +734,51 @@ def quantile(self, quantile): @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['cov']) - def cov(self, other=None, pairwise=False, ddof=1): + def cov(self, other=None, pairwise=None, ddof=1): return super(Rolling, self).cov(other=other, pairwise=pairwise, ddof=ddof) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['corr']) - def corr(self, other=None, pairwise=False): + def corr(self, other=None, pairwise=None): return super(Rolling, self).corr(other=other, pairwise=pairwise) class Expanding(_Rolling_and_Expanding): - _attributes = ['min_periods','freq','center','how','axis'] + """ + Provides expanding transformations. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : string or DateOffset object, optional (default None) + Frequency to conform the data to before computing the statistic. Specified + as a frequency string or DateOffset object. + center : boolean, default False + Set the labels at the center of the window. + axis : int, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + """ + + _attributes = ['min_periods','freq','center','axis'] + + def __init__(self, obj, min_periods=1, freq=None, center=False, axis=0, **kwargs): + return super(Expanding, self).__init__(obj=obj, min_periods=min_periods, freq=freq, center=center, axis=axis) @property def _constructor(self): @@ -694,7 +826,7 @@ def max(self, how='max'): @Appender(_doc_template) @Appender(_shared_docs['min']) def min(self, how='min'): - return super(Expanding, self).min() + return super(Expanding, self).min(how=how) @Substitution(name='expanding') @Appender(_doc_template) @@ -741,29 +873,93 @@ def quantile(self, quantile): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['cov']) - def cov(self, other=None, pairwise=False, ddof=1): + def cov(self, other=None, pairwise=None, ddof=1): return super(Expanding, self).cov(other=other, pairwise=pairwise, ddof=ddof) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['corr']) - def corr(self, other=None, pairwise=False): + def corr(self, other=None, pairwise=None): return super(Expanding, self).corr(other=other, pairwise=pairwise) class EWM(_Rolling): - _attributes = ['com','min_periods','freq','adjust','how','ignore_na','axis'] + """ + .. versionadded:: 0.18.0 + + Provides exponential weighted functions + + Parameters + ---------- + com : float. optional + Center of mass: :math:`\alpha = 1 / (1 + com)`, + span : float, optional + Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)` + halflife : float, optional + Specify decay in terms of halflife, :math:`\alpha = 1 - exp(log(0.5) / halflife)` + min_periods : int, default 0 + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : None or string alias / date offset object, default=None + Frequency to conform to before computing statistic + adjust : boolean, default True + Divide by decaying adjustment factor in beginning periods to account for + imbalance in relative weightings (viewing EWMA as a moving average) + ignore_na : boolean, default False + Ignore missing values when calculating weights; + specify True to reproduce pre-0.15.0 behavior + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + Either center of mass, span or halflife must be specified + + EWMA is sometimes specified using a "span" parameter `s`, we have that the + decay parameter :math:`\alpha` is related to the span as + :math:`\alpha = 2 / (s + 1) = 1 / (1 + c)` + + where `c` is the center of mass. Given a span, the associated center of mass is + :math:`c = (s - 1) / 2` + + So a "20-day EWMA" would have center 9.5. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + + When adjust is True (default), weighted averages are calculated using weights + (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. + + When adjust is False, weighted averages are calculated recursively as: + weighted_average[0] = arg[0]; + weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. + + When ignore_na is False (default), weights are based on absolute positions. + For example, the weights of x and y used in calculating the final weighted + average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and + (1-alpha)**2 and alpha (if adjust is False). + + When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based on + relative positions. For example, the weights of x and y used in calculating + the final weighted average of [x, None, y] are 1-alpha and 1 (if adjust is + True), and 1-alpha and alpha (if adjust is False). + + More details can be found at + http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions + """ + _attributes = ['com','min_periods','freq','adjust','ignore_na','axis'] def __init__(self, obj, com=None, span=None, halflife=None, min_periods=0, freq=None, - adjust=True, how=None, ignore_na=False, axis=0): + adjust=True, ignore_na=False, axis=0): self.obj = obj self.com = _get_center_of_mass(com, span, halflife) self.min_periods = min_periods self.freq = freq self.adjust = adjust - self.how = how self.ignore_na = ignore_na self.axis = axis - self._convert_freq() @property def _constructor(self): @@ -777,20 +973,23 @@ def aggregate(self, arg, *args, **kwargs): agg = aggregate - def _apply(self, func, **kwargs): + def _apply(self, func, how=None, **kwargs): """Rolling statistical measure using supplied function. Designed to be used with passed-in Cython array-based functions. Parameters ---------- func : string/callable to apply + how : string, default to None + how to resample Returns ------- y : type of input argument """ - results, blocks = [], self._create_blocks() + blocks, obj = self._create_blocks(how=how) + results = [] for b in blocks: try: values = self._prep_values(b.values) @@ -813,7 +1012,7 @@ def func(arg): results.append(np.apply_along_axis(func, self.axis, values)) - return self._wrap_results(results, blocks) + return self._wrap_results(results, blocks, obj) @Substitution(name='ewm') @Appender(_doc_template) @@ -857,14 +1056,14 @@ def f(arg): @Substitution(name='ewm') @Appender(_doc_template) - def cov(self, other=None, pairwise=False, bias=False): + def cov(self, other=None, pairwise=None, bias=False): """exponential weighted sample covariance Parameters ---------- other : Series, DataFrame, or ndarray, optional if not supplied then will default to self and produce pairwise output - pairwise : bool, default False + pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output @@ -875,7 +1074,7 @@ def cov(self, other=None, pairwise=False, bias=False): """ if other is None: other = self._selected_obj - pairwise = True + pairwise = True if pairwise is None else pairwise # only default unset other = self._shallow_copy(other) def _get_cov(X, Y): @@ -894,14 +1093,14 @@ def _get_cov(X, Y): @Substitution(name='ewm') @Appender(_doc_template) - def corr(self, other=None, pairwise=False): + def corr(self, other=None, pairwise=None): """exponential weighted sample correlation Parameters ---------- other : Series, DataFrame, or ndarray, optional if not supplied then will default to self and produce pairwise output - pairwise : bool, default False + pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output @@ -910,7 +1109,7 @@ def corr(self, other=None, pairwise=False): """ if other is None: other = self._selected_obj - pairwise = True + pairwise = True if pairwise is None else pairwise # only default unset other = self._shallow_copy(other) def _get_corr(X, Y): @@ -1089,60 +1288,6 @@ def _pop_args(win_type, arg_names, kwargs): ############################# def rolling(obj, win_type=None, **kwds): - """ - Provides rolling transformations. - - .. versionadded:: 0.18.0 - - Parameters - ---------- - window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. - center : boolean, default False - Set the labels at the center of the window. - how : string, default None - Method for down- or re-sampling - win_type : string, default None - prove a window type, see the notes below - axis : int, default 0 - - Returns - ------- - a Window sub-classed for the particular operation - - Notes - ----- - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - The recognized window types are: - - * ``boxcar`` - * ``triang`` - * ``blackman`` - * ``hamming`` - * ``bartlett`` - * ``parzen`` - * ``bohman`` - * ``blackmanharris`` - * ``nuttall`` - * ``barthann`` - * ``kaiser`` (needs beta) - * ``gaussian`` (needs std) - * ``general_gaussian`` (needs power, width) - * ``slepian`` (needs width). - """ from pandas import Series, DataFrame if not isinstance(obj, (Series, DataFrame)): raise TypeError('invalid type: %s' % type(obj)) @@ -1151,118 +1296,20 @@ def rolling(obj, win_type=None, **kwds): return Window(obj, win_type=win_type, **kwds) return Rolling(obj, **kwds) +rolling.__doc__ = Window.__doc__ def expanding(obj, **kwds): - """ - Provides expanding transformations. - - .. versionadded:: 0.18.0 - - Parameters - ---------- - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. - center : boolean, default False - Set the labels at the center of the window. - how : string, default None - Method for down- or re-sampling - axis : int, default 0 - - Returns - ------- - a Window sub-classed for the particular operation - - Notes - ----- - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - """ - from pandas import Series, DataFrame if not isinstance(obj, (Series, DataFrame)): raise TypeError('invalid type: %s' % type(obj)) return Expanding(obj, **kwds) +expanding.__doc__ = Expanding.__doc__ def ewm(obj, **kwds): - """ - .. versionadded:: 0.18.0 - - Provides exponential weighted functions - - Parameters - ---------- - com : float. optional - Center of mass: :math:`\alpha = 1 / (1 + com)`, - span : float, optional - Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)` - halflife : float, optional - Specify decay in terms of halflife, :math:`\alpha = 1 - exp(log(0.5) / halflife)` - min_periods : int, default 0 - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : None or string alias / date offset object, default=None - Frequency to conform to before computing statistic - adjust : boolean, default True - Divide by decaying adjustment factor in beginning periods to account for - imbalance in relative weightings (viewing EWMA as a moving average) - how : string, default 'mean' - Method for down- or re-sampling - ignore_na : boolean, default False - Ignore missing values when calculating weights; - specify True to reproduce pre-0.15.0 behavior - - Returns - ------- - a Window sub-classed for the particular operation - - Notes - ----- - Either center of mass, span or halflife must be specified - - EWMA is sometimes specified using a "span" parameter `s`, we have that the - decay parameter :math:`\alpha` is related to the span as - :math:`\alpha = 2 / (s + 1) = 1 / (1 + c)` - - where `c` is the center of mass. Given a span, the associated center of mass is - :math:`c = (s - 1) / 2` - - So a "20-day EWMA" would have center 9.5. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - When adjust is True (default), weighted averages are calculated using weights - (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. - - When adjust is False, weighted averages are calculated recursively as: - weighted_average[0] = arg[0]; - weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. - - When ignore_na is False (default), weights are based on absolute positions. - For example, the weights of x and y used in calculating the final weighted - average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and - (1-alpha)**2 and alpha (if adjust is False). - - When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based on - relative positions. For example, the weights of x and y used in calculating - the final weighted average of [x, None, y] are 1-alpha and 1 (if adjust is - True), and 1-alpha and alpha (if adjust is False). - - More details can be found at - http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions - """ from pandas import Series, DataFrame if not isinstance(obj, (Series, DataFrame)): raise TypeError('invalid type: %s' % type(obj)) return EWM(obj, **kwds) +ewm.__doc__ = EWM.__doc__ diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 71c1ae3002e5c..28f35cf26e582 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -4,7 +4,9 @@ """ from __future__ import division +import warnings import numpy as np +from pandas import lib from pandas.core.api import DataFrame, Series from pandas.util.decorators import Substitution, Appender @@ -187,6 +189,10 @@ def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): else: raise AssertionError("cannot support ndim > 2 for ndarray compat") + warnings.warn("pd.{dispatch}_{name} is deprecated for ndarrays and will be removed " + "in a future version".format(dispatch=dispatch,name=name), + FutureWarning, stacklevel=3) + # get the functional keywords here if func_kw is None: func_kw = [] @@ -195,7 +201,37 @@ def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): value = kwargs.pop(k,None) if value is not None: kwds[k] = value + + # how is a keyword that if not-None should be in kwds + how = kwargs.pop('how',None) + if how is not None: + kwds['how'] = how + r = getattr(arg,dispatch)(**kwargs) + + if not is_ndarray: + + # give a helpful deprecation message + # with copy-pastable arguments + pargs = ','.join([ "{a}={b}".format(a=a,b=b) for a,b in kwargs.items() if b is not None ]) + aargs = ','.join(args) + if len(aargs): + aargs += ',' + + def f(a,b): + if lib.isscalar(b): + return "{a}={b}".format(a=a,b=b) + return "{a}=<{b}>".format(a=a,b=type(b).__name__) + aargs = ','.join([ f(a,b) for a,b in kwds.items() if b is not None ]) + warnings.warn("pd.{dispatch}_{name} is deprecated for {klass} " + "and will be removed in a future version, replace with " + "\n\t{klass}.{dispatch}({pargs}).{name}({aargs})".format(klass=type(arg).__name__, + pargs=pargs, + aargs=aargs, + dispatch=dispatch, + name=name), + FutureWarning, stacklevel=3) + result = getattr(r,name)(*args, **kwds) if is_ndarray: @@ -404,8 +440,9 @@ def _rolling_func(name, desc, how=None, func_kw=None, additional_kw=''): @Substitution(desc, _unary_arg, _roll_kw%how_arg_str + additional_kw, _type_of_input_retval, _roll_notes) @Appender(_doc_template) - def f(arg, window, min_periods=None, freq=None, center=False, how=how, + def f(arg, window, min_periods=None, freq=None, center=False, **kwargs): + return ensure_compat('rolling', name, arg, @@ -413,7 +450,6 @@ def f(arg, window, min_periods=None, freq=None, center=False, how=how, min_periods=min_periods, freq=freq, center=center, - how=how, func_kw=func_kw, **kwargs) return f @@ -597,7 +633,6 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, center=center, min_periods=min_periods, axis=axis, - how=how, func_kw=kwargs.keys(), **kwargs) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 41d2c8fa88aa1..5517ce967b864 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -168,22 +168,38 @@ def compare(result, expected): expected = pd.concat([a_sum,rcustom],axis=1) compare(result, expected) +class TestDeprecations(Base): + """ test that we are catching deprecation warnings """ + + def setUp(self): + self._create_data() + + + def test_deprecations(self): + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + mom.rolling_mean(np.ones(10),3,center=True ,axis=0) + mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0) + class TestMoments(Base): def setUp(self): self._create_data() def test_centered_axis_validation(self): + # ok - mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0) + Series(np.ones(10)).rolling(window=3,center=True ,axis=0).mean() + # bad axis - self.assertRaises(ValueError, mom.rolling_mean,Series(np.ones(10)),3,center=True ,axis=1) + self.assertRaises(ValueError, lambda : Series(np.ones(10)).rolling(window=3,center=True ,axis=1).mean()) # ok ok - mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=0) - mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=1) + DataFrame(np.ones((10,10))).rolling(window=3,center=True ,axis=0).mean() + DataFrame(np.ones((10,10))).rolling(window=3,center=True ,axis=1).mean() + # bad axis - self.assertRaises(ValueError, mom.rolling_mean,DataFrame(np.ones((10,10))),3,center=True ,axis=2) + self.assertRaises(ValueError, lambda : DataFrame(np.ones((10,10))).rolling(window=3,center=True ,axis=2).mean()) def test_rolling_sum(self): self._check_moment_func(mom.rolling_sum, np.sum, name='sum') @@ -208,11 +224,12 @@ def test_cmov_mean(self): xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516, 12.818, 12.952, np.nan, np.nan]) - rs = mom.rolling_mean(vals, 5, center=True) - assert_almost_equal(xp, rs) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + rs = mom.rolling_mean(vals, 5, center=True) + assert_almost_equal(xp, rs) xp = Series(rs) - rs = mom.rolling_mean(Series(vals), 5, center=True) + rs = Series(vals).rolling(5, center=True).mean() assert_series_equal(xp, rs) def test_cmov_window(self): @@ -224,13 +241,11 @@ def test_cmov_window(self): xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516, 12.818, 12.952, np.nan, np.nan]) - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - assert_almost_equal(xp, rs) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + rs = mom.rolling_window(vals, 5, 'boxcar', center=True) + assert_almost_equal(xp, rs) xp = Series(rs) - rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True) - assert_series_equal(xp, rs) - rs = Series(vals).rolling(5, win_type='boxcar', center=True).mean() assert_series_equal(xp, rs) @@ -241,19 +256,22 @@ def test_cmov_window_corner(self): # all nan vals = np.empty(10, dtype=float) vals.fill(np.nan) - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - self.assertTrue(np.isnan(rs).all()) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + rs = mom.rolling_window(vals, 5, 'boxcar', center=True) + self.assertTrue(np.isnan(rs).all()) # empty vals = np.array([]) - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - self.assertEqual(len(rs), 0) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + rs = mom.rolling_window(vals, 5, 'boxcar', center=True) + self.assertEqual(len(rs), 0) # shorter than window vals = np.random.randn(5) - rs = mom.rolling_window(vals, 10, 'boxcar') - self.assertTrue(np.isnan(rs).all()) - self.assertEqual(len(rs), 5) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + rs = mom.rolling_window(vals, 10, 'boxcar') + self.assertTrue(np.isnan(rs).all()) + self.assertEqual(len(rs), 5) def test_cmov_window_frame(self): # Gh 8238 @@ -282,9 +300,6 @@ def test_cmov_window_frame(self): [ np.nan, np.nan]]) # DataFrame - rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True) - assert_frame_equal(DataFrame(xp), rs) - rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).mean() assert_frame_equal(DataFrame(xp), rs) @@ -314,10 +329,7 @@ def test_cmov_window_na_min_periods(self): vals[4] = np.nan vals[8] = np.nan - xp = mom.rolling_mean(vals, 5, min_periods=4, center=True) - rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True) - assert_series_equal(xp, rs) - + xp = vals.rolling(5, min_periods=4, center=True).mean() rs = vals.rolling(5, win_type='boxcar', min_periods=4, center=True).mean() assert_series_equal(xp, rs) @@ -350,9 +362,6 @@ def test_cmov_window_regular(self): for wt in win_types: xp = Series(xps[wt]) - rs = mom.rolling_window(Series(vals), 5, wt, center=True) - assert_series_equal(xp, rs) - rs = Series(vals).rolling(5, win_type=wt, center=True).mean() assert_series_equal(xp, rs) @@ -370,9 +379,6 @@ def test_cmov_window_regular_linear_range(self): xp = Series(xp) for wt in win_types: - rs = mom.rolling_window(Series(vals), 5, wt, center=True) - assert_series_equal(xp, rs) - rs = Series(vals).rolling(5, win_type=wt, center=True).mean() assert_series_equal(xp, rs) @@ -407,9 +413,6 @@ def test_cmov_window_regular_missing_data(self): for wt in win_types: xp = Series(xps[wt]) - rs = mom.rolling_window(Series(vals), 5, wt, min_periods=3) - assert_series_equal(xp, rs) - rs = Series(vals).rolling(5, win_type=wt, min_periods=3).mean() assert_series_equal(xp, rs) @@ -438,11 +441,6 @@ def test_cmov_window_special(self): for wt, k in zip(win_types, kwds): xp = Series(xps[wt]) - - rs = mom.rolling_window(Series(vals), 5, wt, center=True, - **k) - assert_series_equal(xp, rs) - rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) assert_series_equal(xp, rs) @@ -461,10 +459,6 @@ def test_cmov_window_special_linear_range(self): xp = Series(xp) for wt, k in zip(win_types, kwds): - rs = mom.rolling_window(Series(vals), 5, wt, center=True, - **k) - assert_series_equal(xp, rs) - rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) assert_series_equal(xp, rs) @@ -474,22 +468,25 @@ def test_rolling_median(self): def test_rolling_min(self): self._check_moment_func(mom.rolling_min, np.min, name='min') - a = np.array([1, 2, 3, 4, 5]) - b = mom.rolling_min(a, window=100, min_periods=1) - assert_almost_equal(b, np.ones(len(a))) - self.assertRaises(ValueError, mom.rolling_min, np.array([1, - 2, 3]), window=3, min_periods=5) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + a = np.array([1, 2, 3, 4, 5]) + b = mom.rolling_min(a, window=100, min_periods=1) + assert_almost_equal(b, np.ones(len(a))) + + self.assertRaises(ValueError, mom.rolling_min, + np.array([1,2, 3]), window=3, min_periods=5) def test_rolling_max(self): self._check_moment_func(mom.rolling_max, np.max, name='max') - a = np.array([1, 2, 3, 4, 5]) - b = mom.rolling_max(a, window=100, min_periods=1) - assert_almost_equal(a, b) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + a = np.array([1, 2, 3, 4, 5]) + b = mom.rolling_max(a, window=100, min_periods=1) + assert_almost_equal(a, b) - self.assertRaises(ValueError, mom.rolling_max, np.array([1, - 2, 3]), window=3, min_periods=5) + self.assertRaises(ValueError, mom.rolling_max, np.array([1,2, 3]), + window=3, min_periods=5) def test_rolling_quantile(self): qs = [.1, .5, .9] @@ -518,11 +515,12 @@ def test_rolling_apply(self): warnings.filterwarnings("ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning) ser = Series([]) - assert_series_equal(ser, mom.rolling_apply(ser, 10, lambda x: x.mean())) + assert_series_equal(ser, ser.rolling(10).apply(lambda x: x.mean())) f = lambda x: x[np.isfinite(x)].mean() def roll_mean(x, window, min_periods=None, freq=None, center=False, **kwargs): - return mom.rolling_apply(x, window, + return mom.rolling_apply(x, + window, func=f, min_periods=min_periods, freq=freq, @@ -531,7 +529,7 @@ def roll_mean(x, window, min_periods=None, freq=None, center=False, **kwargs): # GH 8080 s = Series([None, None, None]) - result = mom.rolling_apply(s, 2, lambda x: len(x), min_periods=0) + result = s.rolling(2,min_periods=0).apply(lambda x: len(x)) expected = Series([1., 2., 2.]) assert_series_equal(result, expected) @@ -543,10 +541,12 @@ def test_rolling_apply_out_of_bounds(self): arr = np.arange(4) # it works! - result = mom.rolling_apply(arr, 10, np.sum) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_apply(arr, 10, np.sum) self.assertTrue(isnull(result).all()) - result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) assert_almost_equal(result, result) def test_rolling_std(self): @@ -559,18 +559,21 @@ def test_rolling_std(self): ddof=0) def test_rolling_std_1obs(self): - result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), - 1, min_periods=1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), + 1, min_periods=1) expected = np.array([np.nan] * 5) assert_almost_equal(result, expected) - result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), - 1, min_periods=1, ddof=0) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), + 1, min_periods=1, ddof=0) expected = np.zeros(5) assert_almost_equal(result, expected) - result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), - 3, min_periods=2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), + 3, min_periods=2) self.assertTrue(np.isnan(result[2])) def test_rolling_std_neg_sqrt(self): @@ -583,10 +586,12 @@ def test_rolling_std_neg_sqrt(self): 0.00028718669878572767, 0.00028718669878572767, 0.00028718669878572767]) - b = mom.rolling_std(a, window=3) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + b = mom.rolling_std(a, window=3) self.assertTrue(np.isfinite(b[2:]).all()) - b = mom.ewmstd(a, span=3) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + b = mom.ewmstd(a, span=3) self.assertTrue(np.isfinite(b[2:]).all()) def test_rolling_var(self): @@ -629,21 +634,26 @@ def test_fperr_robustness(self): if sys.byteorder != "little": arr = arr.byteswap().newbyteorder() - result = mom.rolling_sum(arr, 2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_sum(arr, 2) self.assertTrue((result[1:] >= 0).all()) - result = mom.rolling_mean(arr, 2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_mean(arr, 2) self.assertTrue((result[1:] >= 0).all()) - result = mom.rolling_var(arr, 2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_var(arr, 2) self.assertTrue((result[1:] >= 0).all()) # #2527, ugh arr = np.array([0.00012456, 0.0003, 0]) - result = mom.rolling_mean(arr, 1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_mean(arr, 1) self.assertTrue(result[-1] >= 0) - result = mom.rolling_mean(-arr, 1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_mean(-arr, 1) self.assertTrue(result[-1] <= 0) def _check_moment_func(self, f, static_comp, @@ -657,20 +667,22 @@ def _check_moment_func(self, f, static_comp, test_stable=False, **kwargs): - self._check_ndarray(f, static_comp, window=window, - has_min_periods=has_min_periods, - preserve_nan=preserve_nan, - has_center=has_center, - fill_value=fill_value, - test_stable=test_stable, - **kwargs) - - self._check_structures(f, static_comp, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - fill_value=fill_value, - has_center=has_center, - **kwargs) + with warnings.catch_warnings(record=True): + self._check_ndarray(f, static_comp, window=window, + has_min_periods=has_min_periods, + preserve_nan=preserve_nan, + has_center=has_center, + fill_value=fill_value, + test_stable=test_stable, + **kwargs) + + with warnings.catch_warnings(record=True): + self._check_structures(f, static_comp, + has_min_periods=has_min_periods, + has_time_rule=has_time_rule, + fill_value=fill_value, + has_center=has_center, + **kwargs) # new API if name is not None: @@ -786,12 +798,13 @@ def get_result(obj, window, min_periods=None, freq=None, center=False): center=center),name)(**kwargs) # check via the moments API - return f(obj, - window=window, - min_periods=min_periods, - freq=freq, - center=center, - **kwargs) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + return f(obj, + window=window, + min_periods=min_periods, + freq=freq, + center=center, + **kwargs) series_result = get_result(self.series, window=50) frame_result = get_result(self.frame, window=50) @@ -867,20 +880,18 @@ def get_result(obj, window, min_periods=None, freq=None, center=False): assert_frame_equal(frame_xp, frame_rs) def test_ewma(self): - self._check_ew(mom.ewma) + self._check_ew(mom.ewma,name='mean') arr = np.zeros(1000) arr[5] = 1 - result = mom.ewma(arr, span=100, adjust=False).sum() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.ewma(arr, span=100, adjust=False).sum() self.assertTrue(np.abs(result - 1) < 1e-2) s = Series([1.0, 2.0, 4.0, 8.0]) expected = Series([1.0, 1.6, 2.736842, 4.923077]) - for f in [lambda s: mom.ewma(s, com=2.0, adjust=True), - lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False), - lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=True), - lambda s: s.ewm(com=2.0, adjust=True).mean(), + for f in [lambda s: s.ewm(com=2.0, adjust=True).mean(), lambda s: s.ewm(com=2.0, adjust=True, ignore_na=False).mean(), lambda s: s.ewm(com=2.0, adjust=True, ignore_na=True).mean(), ]: @@ -888,10 +899,7 @@ def test_ewma(self): assert_series_equal(result, expected) expected = Series([1.0, 1.333333, 2.222222, 4.148148]) - for f in [lambda s: mom.ewma(s, com=2.0, adjust=False), - lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=False), - lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=True), - lambda s: s.ewm(com=2.0, adjust=False).mean(), + for f in [lambda s: s.ewm(com=2.0, adjust=False).mean(), lambda s: s.ewm(com=2.0, adjust=False, ignore_na=False).mean(), lambda s: s.ewm(com=2.0, adjust=False, ignore_na=True).mean(), ]: @@ -900,11 +908,11 @@ def test_ewma(self): def test_ewma_nan_handling(self): s = Series([1.] + [np.nan] * 5 + [1.]) - result = mom.ewma(s, com=5) + result = s.ewm(com=5).mean() assert_almost_equal(result, [1.] * len(s)) s = Series([np.nan] * 2 + [1.] + [np.nan] * 2 + [1.]) - result = mom.ewma(s, com=5) + result = s.ewm(com=5).mean() assert_almost_equal(result, [np.nan] * 2 + [1.] * 4) # GH 7603 @@ -937,14 +945,11 @@ def simple_wma(s, w): (s3, False, True, [(1. - alpha)**2, np.nan, (1. - alpha) * alpha, alpha]), ]: expected = simple_wma(s, Series(w)) - result = mom.ewma(s, com=com, adjust=adjust, ignore_na=ignore_na) - assert_series_equal(result, expected) result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() + assert_series_equal(result, expected) if ignore_na is False: # check that ignore_na defaults to False - result = mom.ewma(s, com=com, adjust=adjust) - assert_series_equal(result, expected) result = s.ewm(com=com, adjust=adjust).mean() assert_series_equal(result, expected) @@ -955,33 +960,30 @@ def test_ewmvol(self): self._check_ew(mom.ewmvol, name='vol') def test_ewma_span_com_args(self): - A = mom.ewma(self.arr, com=9.5) - B = mom.ewma(self.arr, span=20) - assert_almost_equal(A, B) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + A = mom.ewma(self.arr, com=9.5) + B = mom.ewma(self.arr, span=20) + assert_almost_equal(A, B) - self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20) - self.assertRaises(Exception, mom.ewma, self.arr) + self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20) + self.assertRaises(Exception, mom.ewma, self.arr) def test_ewma_halflife_arg(self): - A = mom.ewma(self.arr, com=13.932726172912965) - B = mom.ewma(self.arr, halflife=10.0) - assert_almost_equal(A, B) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + A = mom.ewma(self.arr, com=13.932726172912965) + B = mom.ewma(self.arr, halflife=10.0) + assert_almost_equal(A, B) - self.assertRaises(Exception, mom.ewma, self.arr, span=20, halflife=50) - self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, halflife=50) - self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, halflife=50) - self.assertRaises(Exception, mom.ewma, self.arr) + self.assertRaises(Exception, mom.ewma, self.arr, span=20, halflife=50) + self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, halflife=50) + self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, halflife=50) + self.assertRaises(Exception, mom.ewma, self.arr) def test_moment_preserve_series_name(self): # GH 10565 s = Series(np.arange(100), name='foo') - s2 = mom.rolling_mean(s, 30) - s3 = mom.rolling_sum(s, 20) - self.assertEqual(s2.name, 'foo') - self.assertEqual(s3.name, 'foo') - - s2 = s.rolling(30).mean() + s2 = s.rolling(30).sum() s3 = s.rolling(20).sum() self.assertEqual(s2.name, 'foo') self.assertEqual(s3.name, 'foo') @@ -991,11 +993,13 @@ def test_ew_empty_arrays(self): funcs = [mom.ewma, mom.ewmvol, mom.ewmvar] for f in funcs: - result = f(arr, 3) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = f(arr, 3) assert_almost_equal(result, arr) def _check_ew(self, func, name=None): - self._check_ew_ndarray(func, name=name) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_ew_ndarray(func, name=name) self._check_ew_structures(func, name=name) def _check_ew_ndarray(self, func, preserve_nan=False, name=None): @@ -1041,18 +1045,12 @@ def _check_ew_ndarray(self, func, preserve_nan=False, name=None): result2 = func(np.arange(50), span=10) self.assertEqual(result2.dtype, np.float_) - def _check_ew_structures(self, func, name=None): - series_result = func(self.series, com=10) + def _check_ew_structures(self, func, name): + series_result = getattr(self.series.ewm(com=10),name)() tm.assertIsInstance(series_result, Series) - if name is not None: - series_result = getattr(self.series.ewm(com=10),name)() - tm.assertIsInstance(series_result, Series) - frame_result = func(self.frame, com=10) + frame_result = getattr(self.frame.ewm(com=10),name)() self.assertEqual(type(frame_result), DataFrame) - if name is not None: - frame_result = getattr(self.frame.ewm(com=10),name)() - self.assertEqual(type(frame_result), DataFrame) # create the data only once as we are not setting it def _create_consistency_data(): @@ -1305,7 +1303,7 @@ def _variance_debiasing_factors(s, com, adjust, ignore_na): def _ewma(s, com, min_periods, adjust, ignore_na): weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) result = s.multiply(weights).cumsum().divide(weights.cumsum()).fillna(method='ffill') - result[mom.expanding_count(s) < (max(min_periods, 1) if min_periods else 1)] = np.nan + result[s.expanding().count() < (max(min_periods, 1) if min_periods else 1)] = np.nan return result com = 3. @@ -1315,16 +1313,16 @@ def _ewma(s, com, min_periods, adjust, ignore_na): # test consistency between different ewm* moments self._test_moments_consistency( min_periods=min_periods, - count=mom.expanding_count, - mean=lambda x: mom.ewma(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na), + count=lambda x: x.expanding().count(), + mean=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).mean(), mock_mean=lambda x: _ewma(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na), - corr=lambda x, y: mom.ewmcorr(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na), - var_unbiased=lambda x: mom.ewmvar(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=False), - std_unbiased=lambda x: mom.ewmstd(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=False), - cov_unbiased=lambda x, y: mom.ewmcov(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=False), - var_biased=lambda x: mom.ewmvar(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True), - std_biased=lambda x: mom.ewmstd(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True), - cov_biased=lambda x, y: mom.ewmcov(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True), + corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).corr(y), + var_unbiased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).var(bias=False), + std_unbiased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).std(bias=False), + cov_unbiased=lambda x, y: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).cov(y, bias=False), + var_biased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).var(bias=True), + std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).std(bias=True), + cov_biased=lambda x, y: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).cov(y, bias=True), var_debiasing_factors=lambda x: _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na)) @slow @@ -1339,17 +1337,17 @@ def test_expanding_consistency(self): # test consistency between different expanding_* moments self._test_moments_consistency( min_periods=min_periods, - count=mom.expanding_count, - mean=lambda x: mom.expanding_mean(x, min_periods=min_periods), - mock_mean=lambda x: mom.expanding_sum(x, min_periods=min_periods) / mom.expanding_count(x), - corr=lambda x, y: mom.expanding_corr(x, y, min_periods=min_periods), - var_unbiased=lambda x: mom.expanding_var(x, min_periods=min_periods), - std_unbiased=lambda x: mom.expanding_std(x, min_periods=min_periods), - cov_unbiased=lambda x, y: mom.expanding_cov(x, y, min_periods=min_periods), - var_biased=lambda x: mom.expanding_var(x, min_periods=min_periods, ddof=0), - std_biased=lambda x: mom.expanding_std(x, min_periods=min_periods, ddof=0), - cov_biased=lambda x, y: mom.expanding_cov(x, y, min_periods=min_periods, ddof=0), - var_debiasing_factors=lambda x: mom.expanding_count(x) / (mom.expanding_count(x) - 1.).replace(0., np.nan) + count=lambda x: x.expanding().count(), + mean=lambda x: x.expanding(min_periods=min_periods).mean(), + mock_mean=lambda x: x.expanding(min_periods=min_periods).sum() / x.expanding().count(), + corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y), + var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), + std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(), + cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y), + var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0), + std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0), + cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov(y, ddof=0), + var_debiasing_factors=lambda x: x.expanding().count() / (x.expanding().count() - 1.).replace(0., np.nan) ) # test consistency between expanding_xyz() and either (a) expanding_apply of Series.xyz(), @@ -1362,122 +1360,119 @@ def test_expanding_consistency(self): if no_nans: functions = self.base_functions + self.no_nan_functions for (f, require_min_periods, name) in functions: - expanding_f = getattr(mom,'expanding_{0}'.format(name)) + expanding_f = getattr(x.expanding(min_periods=min_periods),name) if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): continue - if expanding_f is mom.expanding_count: - expanding_f_result = expanding_f(x) - expanding_apply_f_result = mom.expanding_apply(x, func=f, min_periods=0) + if name == 'count': + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding(min_periods=0).apply(func=f) else: - if expanding_f in [mom.expanding_cov, mom.expanding_corr]: - expanding_f_result = expanding_f(x, min_periods=min_periods, pairwise=False) + if name in ['cov','corr']: + expanding_f_result = expanding_f(pairwise=False) else: - expanding_f_result = expanding_f(x, min_periods=min_periods) - expanding_apply_f_result = mom.expanding_apply(x, func=f, min_periods=min_periods) + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding(min_periods=min_periods).apply(func=f) if not tm._incompat_bottleneck_version(name): assert_equal(expanding_f_result, expanding_apply_f_result) - if (expanding_f in [mom.expanding_cov, mom.expanding_corr]) and isinstance(x, DataFrame): + if (name in ['cov','corr']) and isinstance(x, DataFrame): # test pairwise=True - expanding_f_result = expanding_f(x, x, min_periods=min_periods, pairwise=True) + expanding_f_result = expanding_f(x, pairwise=True) expected = Panel(items=x.index, major_axis=x.columns, minor_axis=x.columns) for i, _ in enumerate(x.columns): for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = expanding_f(x.iloc[:, i], x.iloc[:, j], min_periods=min_periods) + expected.iloc[:, i, j] = getattr(x.iloc[:, i].expanding(min_periods=min_periods),name)(x.iloc[:, j]) assert_panel_equal(expanding_f_result, expected) @slow def test_rolling_consistency(self): - for window in [1, 2, 3, 10, 20]: - for min_periods in set([0, 1, 2, 3, 4, window]): - if min_periods and (min_periods > window): - continue - for center in [False, True]: + # suppress warnings about empty slices, as we are deliberately testing with empty/0-length Series/DataFrames + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - # test consistency between different rolling_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: mom.rolling_count(x, window=window, center=center), - mean=lambda x: mom.rolling_mean(x, window=window, min_periods=min_periods, center=center), - mock_mean=lambda x: mom.rolling_sum(x, window=window, min_periods=min_periods, center=center).divide( - mom.rolling_count(x, window=window, center=center)), - corr=lambda x, y: mom.rolling_corr(x, y, window=window, min_periods=min_periods, center=center), - var_unbiased=lambda x: mom.rolling_var(x, window=window, min_periods=min_periods, center=center), - std_unbiased=lambda x: mom.rolling_std(x, window=window, min_periods=min_periods, center=center), - cov_unbiased=lambda x, y: mom.rolling_cov(x, y, window=window, min_periods=min_periods, center=center), - var_biased=lambda x: mom.rolling_var(x, window=window, min_periods=min_periods, center=center, ddof=0), - std_biased=lambda x: mom.rolling_std(x, window=window, min_periods=min_periods, center=center, ddof=0), - cov_biased=lambda x, y: mom.rolling_cov(x, y, window=window, min_periods=min_periods, center=center, ddof=0), - var_debiasing_factors=lambda x: mom.rolling_count(x, window=window, center=center).divide( - (mom.rolling_count(x, window=window, center=center) - 1.).replace(0., np.nan)), - ) - - # test consistency between rolling_xyz() and either (a) rolling_apply of Series.xyz(), - # or (b) rolling_apply of np.nanxyz() - for (x, is_constant, no_nans) in self.data: - - assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - rolling_f = getattr(mom,'rolling_{0}'.format(name)) - - if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): - continue - - if rolling_f is mom.rolling_count: - rolling_f_result = rolling_f(x, window=window, center=center) - rolling_apply_f_result = mom.rolling_apply(x, window=window, func=f, - min_periods=0, center=center) - else: - if rolling_f in [mom.rolling_cov, mom.rolling_corr]: - rolling_f_result = rolling_f(x, window=window, min_periods=min_periods, center=center, pairwise=False) + for window in [1, 2, 3, 10, 20]: + for min_periods in set([0, 1, 2, 3, 4, window]): + if min_periods and (min_periods > window): + continue + for center in [False, True]: + + # test consistency between different rolling_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.rolling(window=window, center=center).count(), + mean=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).mean(), + mock_mean=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).sum().divide( + x.rolling(window=window, min_periods=min_periods, center=center).count()), + corr=lambda x, y: x.rolling(window=window, min_periods=min_periods, center=center).corr(y), + var_unbiased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).var(), + std_unbiased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).std(), + cov_unbiased=lambda x, y: x.rolling(window=window, min_periods=min_periods, center=center).cov(y), + var_biased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).var(ddof=0), + std_biased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).std(ddof=0), + cov_biased=lambda x, y: x.rolling(window=window, min_periods=min_periods, center=center).cov(y, ddof=0), + var_debiasing_factors=lambda x: x.rolling(window=window, center=center).count().divide( + (x.rolling(window=window, center=center).count() - 1.).replace(0., np.nan)), + ) + + # test consistency between rolling_xyz() and either (a) rolling_apply of Series.xyz(), + # or (b) rolling_apply of np.nanxyz() + for (x, is_constant, no_nans) in self.data: + + assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + rolling_f = getattr(x.rolling(window=window, center=center, min_periods=min_periods),name) + + if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): + continue + + if name == 'count': + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling(window=window, + min_periods=0, center=center).apply(func=f) else: - rolling_f_result = rolling_f(x, window=window, min_periods=min_periods, center=center) - rolling_apply_f_result = mom.rolling_apply(x, window=window, func=f, - min_periods=min_periods, center=center) - if not tm._incompat_bottleneck_version(name): - assert_equal(rolling_f_result, rolling_apply_f_result) - - if (rolling_f in [mom.rolling_cov, mom.rolling_corr]) and isinstance(x, DataFrame): - # test pairwise=True - rolling_f_result = rolling_f(x, x, window=window, min_periods=min_periods, - center=center, pairwise=True) - expected = Panel(items=x.index, major_axis=x.columns, minor_axis=x.columns) - for i, _ in enumerate(x.columns): - for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = rolling_f(x.iloc[:, i], x.iloc[:, j], - window=window, min_periods=min_periods, center=center) - assert_panel_equal(rolling_f_result, expected) + if name in ['cov','corr']: + rolling_f_result = rolling_f(pairwise=False) + else: + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling(window=window, + min_periods=min_periods, center=center).apply(func=f) + if not tm._incompat_bottleneck_version(name): + assert_equal(rolling_f_result, rolling_apply_f_result) + + if (name in ['cov','corr']) and isinstance(x, DataFrame): + # test pairwise=True + rolling_f_result = rolling_f(x, pairwise=True) + expected = Panel(items=x.index, major_axis=x.columns, minor_axis=x.columns) + for i, _ in enumerate(x.columns): + for j, _ in enumerate(x.columns): + expected.iloc[:, i, j] = getattr(x.iloc[:, i].rolling( + window=window, min_periods=min_periods, center=center),name)(x.iloc[:, j]) + assert_panel_equal(rolling_f_result, expected) # binary moments def test_rolling_cov(self): A = self.series B = A + randn(len(A)) - result = mom.rolling_cov(A, B, 50, min_periods=25) - assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) - result = A.rolling(window=50, min_periods=25).cov(B) assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) def test_rolling_cov_pairwise(self): - self._check_pairwise_moment(mom.rolling_cov, window=10, min_periods=5, name='cov') + self._check_pairwise_moment('rolling','cov', window=10, min_periods=5) def test_rolling_corr(self): A = self.series B = A + randn(len(A)) - result = mom.rolling_corr(A, B, 50, min_periods=25) - assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) - result = A.rolling(window=50, min_periods=25).corr(B) assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) @@ -1487,18 +1482,16 @@ def test_rolling_corr(self): a[:5] = np.nan b[:10] = np.nan - result = mom.rolling_corr(a, b, len(a), min_periods=1) - assert_almost_equal(result[-1], a.corr(b)) - result = a.rolling(window=len(a), min_periods=1).corr(b) assert_almost_equal(result[-1], a.corr(b)) def test_rolling_corr_pairwise(self): - self._check_pairwise_moment(mom.rolling_corr, window=10, min_periods=5, name='corr') + self._check_pairwise_moment('rolling', 'corr', window=10, min_periods=5) + + def _check_pairwise_moment(self, dispatch, name, **kwargs): - def _check_pairwise_moment(self, func, name=None, **kwargs): def get_result(obj, obj2=None): - return func(obj, obj2, **kwargs) + return getattr(getattr(obj,dispatch)(**kwargs),name)(obj2) panel = get_result(self.frame) actual = panel.ix[:, 1, 5] @@ -1506,12 +1499,6 @@ def get_result(obj, obj2=None): tm.assert_series_equal(actual, expected, check_names=False) self.assertEqual(actual.name, 5) - if name is not None: - panel = getattr(self.frame.rolling(**kwargs),name)() - actual = panel.ix[:, 1, 5] - tm.assert_series_equal(actual, expected, check_names=False) - self.assertEqual(actual.name, 5) - def test_flex_binary_moment(self): # GH3155 # don't blow the stack @@ -1529,13 +1516,13 @@ def test_corr_sanity(self): [ 0.78369152, 0.63919667]]) ) - res = mom.rolling_corr(df[0],df[1],5,center=True) + res = df[0].rolling(5,center=True).corr(df[1]) self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res])) # and some fuzzing for i in range(10): df = DataFrame(np.random.rand(30,2)) - res = mom.rolling_corr(df[0],df[1],5,center=True) + res = df[0].rolling(5,center=True).corr(df[1]) try: self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res])) except: @@ -1546,9 +1533,9 @@ def test_flex_binary_frame(self): def _check(method): series = self.frame[1] - res = method(series, self.frame, 10) - res2 = method(self.frame, series, 10) - exp = self.frame.apply(lambda x: method(series, x, 10)) + res = getattr(series.rolling(window=10),method)(self.frame) + res2 = getattr(self.frame.rolling(window=10),method)(series) + exp = self.frame.apply(lambda x: getattr(series.rolling(window=10),method)(x)) tm.assert_frame_equal(res, exp) tm.assert_frame_equal(res2, exp) @@ -1556,28 +1543,32 @@ def _check(method): frame2 = self.frame.copy() frame2.values[:] = np.random.randn(*frame2.shape) - res3 = method(self.frame, frame2, 10) - exp = DataFrame(dict((k, method(self.frame[k], frame2[k], 10)) + res3 = getattr(self.frame.rolling(window=10),method)(frame2) + exp = DataFrame(dict((k, getattr(self.frame[k].rolling(window=10),method)(frame2[k])) for k in self.frame)) tm.assert_frame_equal(res3, exp) - methods = [mom.rolling_corr, mom.rolling_cov] + methods = ['corr','cov'] for meth in methods: _check(meth) def test_ewmcov(self): - self._check_binary_ew(mom.ewmcov) + self._check_binary_ew('cov') def test_ewmcov_pairwise(self): - self._check_pairwise_moment(mom.ewmcov, span=10, min_periods=5) + self._check_pairwise_moment('ewm','cov', span=10, min_periods=5) def test_ewmcorr(self): - self._check_binary_ew(mom.ewmcorr) + self._check_binary_ew('corr') def test_ewmcorr_pairwise(self): - self._check_pairwise_moment(mom.ewmcorr, span=10, min_periods=5) + self._check_pairwise_moment('ewm','corr', span=10, min_periods=5) + + def _check_binary_ew(self, name): + + def func(A, B, com, **kwargs): + return getattr(A.ewm(com, **kwargs),name)(B) - def _check_binary_ew(self, func): A = Series(randn(50), index=np.arange(50)) B = A[2:] + randn(48) @@ -1607,7 +1598,7 @@ def _check_binary_ew(self, func): def test_expanding_apply(self): ser = Series([]) - assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean())) + assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) def expanding_mean(x, min_periods=1, freq=None): return mom.expanding_apply(x, @@ -1618,7 +1609,7 @@ def expanding_mean(x, min_periods=1, freq=None): # GH 8080 s = Series([None, None, None]) - result = mom.expanding_apply(s, lambda x: len(x), min_periods=0) + result = s.expanding(min_periods=0).apply(lambda x: len(x)) expected = Series([1., 2., 3.]) assert_series_equal(result, expected) @@ -1628,39 +1619,34 @@ def mean_w_arg(x, const): df = DataFrame(np.random.rand(20, 3)) - expected = mom.expanding_apply(df, np.mean) + 20. + expected = df.expanding().apply(np.mean) + 20. - assert_frame_equal(mom.expanding_apply(df, mean_w_arg, args=(20,)), - expected) - assert_frame_equal(mom.expanding_apply(df, mean_w_arg, - kwargs={'const' : 20}), + assert_frame_equal(df.expanding().apply(mean_w_arg, args=(20,)), expected) + assert_frame_equal(df.expanding().apply(mean_w_arg, + kwargs={'const' : 20}), + expected) def test_expanding_corr(self): A = self.series.dropna() B = (A + randn(len(A)))[:-5] - result = mom.expanding_corr(A, B) + result = A.expanding().corr(B) - rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1) + rolling_result = A.rolling(window=len(A),min_periods=1).corr(B) assert_almost_equal(rolling_result, result) def test_expanding_count(self): - result = mom.expanding_count(self.series) - assert_almost_equal(result, mom.rolling_count(self.series, - len(self.series))) result = self.series.expanding().count() - assert_almost_equal(result, mom.rolling_count(self.series, - len(self.series))) + assert_almost_equal(result, self.series.rolling(window=len(self.series)).count()) def test_expanding_quantile(self): - result = mom.expanding_quantile(self.series, 0.5) + result = self.series.expanding().quantile(0.5) - rolling_result = mom.rolling_quantile(self.series, - len(self.series), - 0.5, min_periods=1) + rolling_result = self.series.rolling( + window=len(self.series),min_periods=1).quantile(0.5) assert_almost_equal(result, rolling_result) @@ -1668,9 +1654,9 @@ def test_expanding_cov(self): A = self.series B = (A + randn(len(A)))[:-5] - result = mom.expanding_cov(A, B) + result = A.expanding().cov(B) - rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1) + rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) assert_almost_equal(rolling_result, result) @@ -1678,19 +1664,17 @@ def test_expanding_max(self): self._check_expanding(mom.expanding_max, np.max, preserve_nan=False) def test_expanding_cov_pairwise(self): - result = mom.expanding_cov(self.frame) + result = self.frame.expanding().corr() - rolling_result = mom.rolling_cov(self.frame, len(self.frame), - min_periods=1) + rolling_result = self.frame.rolling(window=len(self.frame),min_periods=1).corr() for i in result.items: assert_almost_equal(result[i], rolling_result[i]) def test_expanding_corr_pairwise(self): - result = mom.expanding_corr(self.frame) + result = self.frame.expanding().corr() - rolling_result = mom.rolling_corr(self.frame, len(self.frame), - min_periods=1) + rolling_result = self.frame.rolling(window=len(self.frame), min_periods=1).corr() for i in result.items: assert_almost_equal(result[i], rolling_result[i]) @@ -1699,17 +1683,17 @@ def test_expanding_cov_diff_index(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) - result = mom.expanding_cov(s1, s2) + result = s1.expanding().cov(s2) expected = Series([None, None, 2.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) - result = mom.expanding_cov(s1, s2a) + result = s1.expanding().cov(s2a) assert_series_equal(result, expected) s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) - result = mom.expanding_cov(s1, s2) + result = s1.expanding().cov(s2) expected = Series([None, None, None, 4.5]) assert_series_equal(result, expected) @@ -1717,17 +1701,17 @@ def test_expanding_corr_diff_index(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) - result = mom.expanding_corr(s1, s2) + result = s1.expanding().corr(s2) expected = Series([None, None, 1.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) - result = mom.expanding_corr(s1, s2a) + result = s1.expanding().corr(s2a) assert_series_equal(result, expected) s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) - result = mom.expanding_corr(s1, s2) + result = s1.expanding().corr(s2) expected = Series([None, None, None, 1.]) assert_series_equal(result, expected) @@ -1735,24 +1719,24 @@ def test_rolling_cov_diff_length(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) - result = mom.rolling_cov(s1, s2, window=3, min_periods=2) + result = s1.rolling(window=3, min_periods=2).cov(s2) expected = Series([None, None, 2.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) - result = mom.rolling_cov(s1, s2a, window=3, min_periods=2) + result = s1.rolling(window=3, min_periods=2).cov(s2a) assert_series_equal(result, expected) def test_rolling_corr_diff_length(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) - result = mom.rolling_corr(s1, s2, window=3, min_periods=2) + result = s1.rolling(window=3, min_periods=2).corr(s2) expected = Series([None, None, 1.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) - result = mom.rolling_corr(s1, s2a, window=3, min_periods=2) + result = s1.rolling(window=3, min_periods=2).corr(s2a) assert_series_equal(result, expected) def test_rolling_functions_window_non_shrinkage(self): @@ -1763,20 +1747,20 @@ def test_rolling_functions_window_non_shrinkage(self): df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns) - functions = [lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5), - lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5), - lambda x: mom.rolling_max(x, window=10, min_periods=5), - lambda x: mom.rolling_min(x, window=10, min_periods=5), - lambda x: mom.rolling_sum(x, window=10, min_periods=5), - lambda x: mom.rolling_mean(x, window=10, min_periods=5), - lambda x: mom.rolling_std(x, window=10, min_periods=5), - lambda x: mom.rolling_var(x, window=10, min_periods=5), - lambda x: mom.rolling_skew(x, window=10, min_periods=5), - lambda x: mom.rolling_kurt(x, window=10, min_periods=5), - lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5), - lambda x: mom.rolling_median(x, window=10, min_periods=5), - lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), - lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5), + functions = [lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).max(), + lambda x: x.rolling(window=10, min_periods=5).min(), + lambda x: x.rolling(window=10, min_periods=5).sum(), + lambda x: x.rolling(window=10, min_periods=5).mean(), + lambda x: x.rolling(window=10, min_periods=5).std(), + lambda x: x.rolling(window=10, min_periods=5).var(), + lambda x: x.rolling(window=10, min_periods=5).skew(), + lambda x: x.rolling(window=10, min_periods=5).kurt(), + lambda x: x.rolling(window=10, min_periods=5).quantile(quantile=0.5), + lambda x: x.rolling(window=10, min_periods=5).median(), + lambda x: x.rolling(window=10, min_periods=5).apply(sum), + lambda x: x.rolling(win_type='boxcar', window=10, min_periods=5).mean(), ] for f in functions: try: @@ -1790,8 +1774,8 @@ def test_rolling_functions_window_non_shrinkage(self): # scipy needed for rolling_window continue - functions = [lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), - lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), + functions = [lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=True), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=True), ] for f in functions: df_result_panel = f(df) @@ -1809,35 +1793,35 @@ def test_moment_functions_zero_length(self): df2_expected = df2 df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, minor_axis=df2.columns) - functions = [lambda x: mom.expanding_count(x), - lambda x: mom.expanding_cov(x, x, pairwise=False, min_periods=5), - lambda x: mom.expanding_corr(x, x, pairwise=False, min_periods=5), - lambda x: mom.expanding_max(x, min_periods=5), - lambda x: mom.expanding_min(x, min_periods=5), - lambda x: mom.expanding_sum(x, min_periods=5), - lambda x: mom.expanding_mean(x, min_periods=5), - lambda x: mom.expanding_std(x, min_periods=5), - lambda x: mom.expanding_var(x, min_periods=5), - lambda x: mom.expanding_skew(x, min_periods=5), - lambda x: mom.expanding_kurt(x, min_periods=5), - lambda x: mom.expanding_quantile(x, quantile=0.5, min_periods=5), - lambda x: mom.expanding_median(x, min_periods=5), - lambda x: mom.expanding_apply(x, func=sum, min_periods=5), - lambda x: mom.rolling_count(x, window=10), - lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5), - lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5), - lambda x: mom.rolling_max(x, window=10, min_periods=5), - lambda x: mom.rolling_min(x, window=10, min_periods=5), - lambda x: mom.rolling_sum(x, window=10, min_periods=5), - lambda x: mom.rolling_mean(x, window=10, min_periods=5), - lambda x: mom.rolling_std(x, window=10, min_periods=5), - lambda x: mom.rolling_var(x, window=10, min_periods=5), - lambda x: mom.rolling_skew(x, window=10, min_periods=5), - lambda x: mom.rolling_kurt(x, window=10, min_periods=5), - lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5), - lambda x: mom.rolling_median(x, window=10, min_periods=5), - lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), - lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5), + functions = [lambda x: x.expanding().count(), + lambda x: x.expanding(min_periods=5).cov(x, pairwise=False), + lambda x: x.expanding(min_periods=5).corr(x, pairwise=False), + lambda x: x.expanding(min_periods=5).max(), + lambda x: x.expanding(min_periods=5).min(), + lambda x: x.expanding(min_periods=5).sum(), + lambda x: x.expanding(min_periods=5).mean(), + lambda x: x.expanding(min_periods=5).std(), + lambda x: x.expanding(min_periods=5).var(), + lambda x: x.expanding(min_periods=5).skew(), + lambda x: x.expanding(min_periods=5).kurt(), + lambda x: x.expanding(min_periods=5).quantile(0.5), + lambda x: x.expanding(min_periods=5).median(), + lambda x: x.expanding(min_periods=5).apply(sum), + lambda x: x.rolling(window=10).count(), + lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).max(), + lambda x: x.rolling(window=10, min_periods=5).min(), + lambda x: x.rolling(window=10, min_periods=5).sum(), + lambda x: x.rolling(window=10, min_periods=5).mean(), + lambda x: x.rolling(window=10, min_periods=5).std(), + lambda x: x.rolling(window=10, min_periods=5).var(), + lambda x: x.rolling(window=10, min_periods=5).skew(), + lambda x: x.rolling(window=10, min_periods=5).kurt(), + lambda x: x.rolling(window=10, min_periods=5).quantile(0.5), + lambda x: x.rolling(window=10, min_periods=5).median(), + lambda x: x.rolling(window=10, min_periods=5).apply(sum), + lambda x: x.rolling(win_type='boxcar', window=10, min_periods=5).mean(), ] for f in functions: try: @@ -1854,10 +1838,10 @@ def test_moment_functions_zero_length(self): # scipy needed for rolling_window continue - functions = [lambda x: mom.expanding_cov(x, x, pairwise=True, min_periods=5), - lambda x: mom.expanding_corr(x, x, pairwise=True, min_periods=5), - lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), - lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), + functions = [lambda x: x.expanding(min_periods=5).cov(x, pairwise=True), + lambda x: x.expanding(min_periods=5).corr(x, pairwise=True), + lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=True), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=True), ] for f in functions: df1_result_panel = f(df1) @@ -1872,10 +1856,10 @@ def test_expanding_cov_pairwise_diff_length(self): df1a = DataFrame([[1,5], [3,9]], index=[0,2], columns=['A','B']) df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y']) df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y']) - result1 = mom.expanding_cov(df1, df2, pairwise=True)[2] - result2 = mom.expanding_cov(df1, df2a, pairwise=True)[2] - result3 = mom.expanding_cov(df1a, df2, pairwise=True)[2] - result4 = mom.expanding_cov(df1a, df2a, pairwise=True)[2] + result1 = df1.expanding().cov(df2a, pairwise=True)[2] + result2 = df1.expanding().cov(df2a, pairwise=True)[2] + result3 = df1a.expanding().cov(df2, pairwise=True)[2] + result4 = df1a.expanding().cov(df2a, pairwise=True)[2] expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A','B'], columns=['X','Y']) assert_frame_equal(result1, expected) assert_frame_equal(result2, expected) @@ -1888,10 +1872,10 @@ def test_expanding_corr_pairwise_diff_length(self): df1a = DataFrame([[1,2], [3,4]], index=[0,2], columns=['A','B']) df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y']) df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y']) - result1 = mom.expanding_corr(df1, df2, pairwise=True)[2] - result2 = mom.expanding_corr(df1, df2a, pairwise=True)[2] - result3 = mom.expanding_corr(df1a, df2, pairwise=True)[2] - result4 = mom.expanding_corr(df1a, df2a, pairwise=True)[2] + result1 = df1.expanding().corr(df2, pairwise=True)[2] + result2 = df1.expanding().corr(df2a, pairwise=True)[2] + result3 = df1a.expanding().corr(df2, pairwise=True)[2] + result4 = df1a.expanding().corr(df2a, pairwise=True)[2] expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A','B'], columns=['X','Y']) assert_frame_equal(result1, expected) assert_frame_equal(result2, expected) @@ -1931,12 +1915,12 @@ def test_pairwise_stats_column_names_order(self): self.assert_numpy_array_equal(result, results[0]) # DataFrame with itself, pairwise=True - for f in [lambda x: mom.expanding_cov(x, pairwise=True), - lambda x: mom.expanding_corr(x, pairwise=True), - lambda x: mom.rolling_cov(x, window=3, pairwise=True), - lambda x: mom.rolling_corr(x, window=3, pairwise=True), - lambda x: mom.ewmcov(x, com=3, pairwise=True), - lambda x: mom.ewmcorr(x, com=3, pairwise=True), + for f in [lambda x: x.expanding().cov(pairwise=True), + lambda x: x.expanding().corr(pairwise=True), + lambda x: x.rolling(window=3).cov(pairwise=True), + lambda x: x.rolling(window=3).corr(pairwise=True), + lambda x: x.ewm(com=3).cov(pairwise=True), + lambda x: x.ewm(com=3).corr(pairwise=True), ]: results = [f(df) for df in df1s] for (df, result) in zip(df1s, results): @@ -1948,12 +1932,12 @@ def test_pairwise_stats_column_names_order(self): self.assert_numpy_array_equal(result, results[0]) # DataFrame with itself, pairwise=False - for f in [lambda x: mom.expanding_cov(x, pairwise=False), - lambda x: mom.expanding_corr(x, pairwise=False), - lambda x: mom.rolling_cov(x, window=3, pairwise=False), - lambda x: mom.rolling_corr(x, window=3, pairwise=False), - lambda x: mom.ewmcov(x, com=3, pairwise=False), - lambda x: mom.ewmcorr(x, com=3, pairwise=False), + for f in [lambda x: x.expanding().cov(pairwise=False), + lambda x: x.expanding().corr(pairwise=False), + lambda x: x.rolling(window=3).cov(pairwise=False), + lambda x: x.rolling(window=3).corr(pairwise=False), + lambda x: x.ewm(com=3).cov(pairwise=False), + lambda x: x.ewm(com=3).corr(pairwise=False), ]: results = [f(df) for df in df1s] for (df, result) in zip(df1s, results): @@ -1964,12 +1948,12 @@ def test_pairwise_stats_column_names_order(self): self.assert_numpy_array_equal(result, results[0]) # DataFrame with another DataFrame, pairwise=True - for f in [lambda x, y: mom.expanding_cov(x, y, pairwise=True), - lambda x, y: mom.expanding_corr(x, y, pairwise=True), - lambda x, y: mom.rolling_cov(x, y, window=3, pairwise=True), - lambda x, y: mom.rolling_corr(x, y, window=3, pairwise=True), - lambda x, y: mom.ewmcov(x, y, com=3, pairwise=True), - lambda x, y: mom.ewmcorr(x, y, com=3, pairwise=True), + for f in [lambda x, y: x.expanding().cov(y, pairwise=True), + lambda x, y: x.expanding().corr(y, pairwise=True), + lambda x, y: x.rolling(window=3).cov(y, pairwise=True), + lambda x, y: x.rolling(window=3).corr(y, pairwise=True), + lambda x, y: x.ewm(com=3).cov(y, pairwise=True), + lambda x, y: x.ewm(com=3).corr(y, pairwise=True), ]: results = [f(df, df2) for df in df1s] for (df, result) in zip(df1s, results): @@ -1981,12 +1965,12 @@ def test_pairwise_stats_column_names_order(self): self.assert_numpy_array_equal(result, results[0]) # DataFrame with another DataFrame, pairwise=False - for f in [lambda x, y: mom.expanding_cov(x, y, pairwise=False), - lambda x, y: mom.expanding_corr(x, y, pairwise=False), - lambda x, y: mom.rolling_cov(x, y, window=3, pairwise=False), - lambda x, y: mom.rolling_corr(x, y, window=3, pairwise=False), - lambda x, y: mom.ewmcov(x, y, com=3, pairwise=False), - lambda x, y: mom.ewmcorr(x, y, com=3, pairwise=False), + for f in [lambda x, y: x.expanding().cov(y, pairwise=False), + lambda x, y: x.expanding().corr(y, pairwise=False), + lambda x, y: x.rolling(window=3).cov(y, pairwise=False), + lambda x, y: x.rolling(window=3).corr(y, pairwise=False), + lambda x, y: x.ewm(com=3).cov(y, pairwise=False), + lambda x, y: x.ewm(com=3).corr(y, pairwise=False), ]: results = [f(df, df2) if df.columns.is_unique else None for df in df1s] for (df, result) in zip(df1s, results): @@ -2000,12 +1984,12 @@ def test_pairwise_stats_column_names_order(self): tm.assertRaisesRegexp(ValueError, "'arg2' columns are not unique", f, df2, df) # DataFrame with a Series - for f in [lambda x, y: mom.expanding_cov(x, y), - lambda x, y: mom.expanding_corr(x, y), - lambda x, y: mom.rolling_cov(x, y, window=3), - lambda x, y: mom.rolling_corr(x, y, window=3), - lambda x, y: mom.ewmcov(x, y, com=3), - lambda x, y: mom.ewmcorr(x, y, com=3), + for f in [lambda x, y: x.expanding().cov(y), + lambda x, y: x.expanding().corr(y), + lambda x, y: x.rolling(window=3).cov(y), + lambda x, y: x.rolling(window=3).corr(y), + lambda x, y: x.ewm(com=3).cov(y), + lambda x, y: x.ewm(com=3).corr(y), ]: results = [f(df, s) for df in df1s] + [f(s, df) for df in df1s] for (df, result) in zip(df1s, results): @@ -2021,12 +2005,12 @@ def test_rolling_skew_edge_cases(self): # yields all NaN (0 variance) d = Series([1] * 5) - x = mom.rolling_skew(d, window=5) + x = d.rolling(window=5).skew() assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) - x = mom.rolling_skew(d, window=2) + x = d.rolling(window=2).skew() assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 0.177994, 1.548824] @@ -2034,7 +2018,7 @@ def test_rolling_skew_edge_cases(self): 1.73508164, 0.41941401]) expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824]) - x = mom.rolling_skew(d, window=4) + x = d.rolling(window=4).skew() assert_series_equal(expected, x) def test_rolling_kurt_edge_cases(self): @@ -2043,12 +2027,12 @@ def test_rolling_kurt_edge_cases(self): # yields all NaN (0 variance) d = Series([1] * 5) - x = mom.rolling_kurt(d, window=5) + x = d.rolling(window=5).kurt() assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) - x = mom.rolling_kurt(d, window=3) + x = d.rolling(window=3).kurt() assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 1.224307, 2.671499] @@ -2056,7 +2040,7 @@ def test_rolling_kurt_edge_cases(self): 1.73508164, 0.41941401]) expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499]) - x = mom.rolling_kurt(d, window=4) + x = d.rolling(window=4).kurt() assert_series_equal(expected, x) def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, @@ -2103,11 +2087,13 @@ def _check_expanding_structures(self, func): def _check_expanding(self, func, static_comp, has_min_periods=True, has_time_rule=True, preserve_nan=True): - self._check_expanding_ndarray(func, static_comp, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - preserve_nan=preserve_nan) - self._check_expanding_structures(func) + with warnings.catch_warnings(record=True): + self._check_expanding_ndarray(func, static_comp, + has_min_periods=has_min_periods, + has_time_rule=has_time_rule, + preserve_nan=preserve_nan) + with warnings.catch_warnings(record=True): + self._check_expanding_structures(func) def test_rolling_max_gh6297(self): """Replicate result expected in GH #6297""" @@ -2124,7 +2110,7 @@ def test_rolling_max_gh6297(self): expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_max(series, window=1, freq='D') + x = series.rolling(window=1, freq='D').max() assert_series_equal(expected, x) def test_rolling_max_how_resample(self): @@ -2143,14 +2129,14 @@ def test_rolling_max_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_max(series, window=1, freq='D') + x = series.rolling(window=1, freq='D').max() assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_max(series, window=1, freq='D', how='median') + x = series.rolling(window=1, freq='D').max(how='median') assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 @@ -2158,7 +2144,7 @@ def test_rolling_max_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_max(series, window=1, freq='D', how='mean') + x = series.rolling(window=1, freq='D').max(how='mean') assert_series_equal(expected, x) @@ -2178,7 +2164,7 @@ def test_rolling_min_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_min(series, window=1, freq='D') + x = series.rolling(window=1, freq='D').min() assert_series_equal(expected, x) def test_rolling_median_how_resample(self): @@ -2197,7 +2183,7 @@ def test_rolling_median_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, 10], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_median(series, window=1, freq='D') + x = series.rolling(window=1, freq='D').median() assert_series_equal(expected, x) def test_rolling_median_memory_error(self): From 0bbe1103ae2546ff3006c196211679fac089c5bf Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 25 Nov 2015 18:06:23 -0500 Subject: [PATCH 6/8] DOC: minor doc corrections --- doc/source/computation.rst | 28 ++- pandas/core/base.py | 8 +- pandas/core/groupby.py | 418 +++++++++++++++++++----------------- pandas/core/window.py | 194 +++++++++-------- pandas/tests/test_window.py | 25 ++- pandas/util/decorators.py | 3 +- 6 files changed, 379 insertions(+), 297 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index d07257bd2e588..c85ed267556de 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -220,7 +220,7 @@ and kurtosis. .. note:: - The API for window statistics is quite similar to the way one works with ``Groupby`` objects, see the documentation :ref:`here ` + The API for window statistics is quite similar to the way one works with ``GroupBy`` objects, see the documentation :ref:`here ` We work with ``rolling``, ``expanding`` and ``exponentially weighted`` data through the corresponding objects, :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expanding` and :class:`~pandas.core.window.EWM`. @@ -231,7 +231,7 @@ objects, :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expan s = s.cumsum() s -These are created from methods on ``Series`` and ``DataFrames``. +These are created from methods on ``Series`` and ``DataFrame``. .. ipython:: python @@ -247,7 +247,7 @@ accept the following arguments: - ``freq``: optionally specify a :ref:`frequency string ` or :ref:`DateOffset ` to pre-conform the data to. -We can then call functions on these ``rolling`` objects. Which return like-indexed objects: +We can then call methods on these ``rolling`` objects. These return like-indexed objects: .. ipython:: python @@ -304,8 +304,6 @@ We provide a number of the common statistical functions: :meth:`~Rolling.apply`, Generic apply :meth:`~Rolling.cov`, Unbiased covariance (binary) :meth:`~Rolling.corr`, Correlation (binary) - :meth:`~Window.mean`, Moving window mean function - :meth:`~Window.sum`, Moving window sum function The :meth:`~Rolling.apply` function takes an extra ``func`` argument and performs generic rolling computations. The ``func`` argument should be a single function @@ -323,9 +321,17 @@ compute the mean absolute deviation on a rolling basis: Rolling Windows ~~~~~~~~~~~~~~~ -The :meth:`~Window.mean`, and :meth:`~Window.sum` functions perform a generic rolling window computation -on the input data. The weights used in the window are specified by the ``win_type`` -keyword. The list of recognized types are: +Passing ``win_type`` to ``.rolling`` generates a generic rolling window computation, that is weighted according the ``win_type``. +The following methods are available: + +.. csv-table:: + :header: "Method", "Description" + :widths: 20, 80 + + :meth:`~Window.sum`, Sum of values + :meth:`~Window.mean`, Mean of values + +The weights used in the window are specified by the ``win_type``keyword. The list of recognized types are: - ``boxcar`` - ``triang`` @@ -484,9 +490,9 @@ We can aggregate by passing a function to the entire DataFrame, or select a Seri r['A'].aggregate(np.sum) - r['A','B'].aggregate(np.sum) + r[['A','B']].aggregate(np.sum) -As you can see, the result of the aggregation will have the selection columns, or all +As you can see, the result of the aggregation will have the selected columns, or all columns if none are selected. .. _stats.aggregate.multifunc: @@ -531,7 +537,7 @@ columns of a DataFrame: 'B' : lambda x: np.std(x, ddof=1)}) The function names can also be strings. In order for a string to be valid it -must be either implemented on the Windowed object +must be implemented on the Windowed object .. ipython:: python diff --git a/pandas/core/base.py b/pandas/core/base.py index fafd6b7821dfe..84a127a46424c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -361,8 +361,8 @@ def _gotitem(self, key, ndim, subset=None): See also -------- -:func:`pandas.Series.%(name)s` -:func:`pandas.DataFrame.%(name)s` +`pandas.Series.%(name)s` +`pandas.DataFrame.%(name)s` """ def aggregate(self, func, *args, **kwargs): @@ -465,9 +465,9 @@ def _aggregate_multiple_funcs(self, arg, _level): # find a good name, this could be a function that we don't recognize name = self._is_cython_func(a) or a if not isinstance(name, compat.string_types): - name = getattr(a,name,a) + name = getattr(a,'name',a) if not isinstance(name, compat.string_types): - name = getattr(a,func_name,a) + name = getattr(a,'__name__',a) keys.append(name) except (TypeError, DataError): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 61fcf55af6d6a..ac07c9487fc15 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -22,6 +22,7 @@ from pandas.core.panel import Panel from pandas.util.decorators import (cache_readonly, Substitution, Appender, make_signature, deprecate_kwarg) +from textwrap import dedent import pandas.core.algorithms as algos import pandas.core.common as com from pandas.core.common import(_possibly_downcast_to_dtype, isnull, @@ -39,15 +40,15 @@ _doc_template = """ -Returns -------- -same type as input + Returns + ------- + same type as input -See also --------- -:func:`pandas.Series.%(name)s` -:func:`pandas.DataFrame.%(name)s` -:func:`pandas.Panel.%(name)s` + See also + -------- + `pandas.Series.%(name)s` + `pandas.DataFrame.%(name)s` + `pandas.Panel.%(name)s` """ # special case to prevent duplicate plots when catching exceptions when @@ -629,43 +630,45 @@ def __iter__(self): @Substitution(name='groupby') @Appender(_doc_template) def apply(self, func, *args, **kwargs): - """Apply function and combine results together in an intelligent way. The -split-apply-combine combination rules attempt to be as common sense -based as possible. For example: - -case 1: -group DataFrame -apply aggregation function (f(chunk) -> Series) -yield DataFrame, with group axis having group labels - -case 2: -group DataFrame -apply transform function ((f(chunk) -> DataFrame with same indexes) -yield DataFrame with resulting chunks glued together - -case 3: -group Series -apply function with f(chunk) -> DataFrame -yield DataFrame with result of chunks glued together - -Parameters ----------- -func : function - -Notes ------ -See online documentation for full exposition on how to use apply. - -In the current implementation apply calls func twice on the -first group to decide whether it can take a fast or slow code -path. This can lead to unexpected behavior if func has -side-effects, as they will take effect twice for the first -group. - - -See also --------- -aggregate, transform""" + """ + Apply function and combine results together in an intelligent way. The + split-apply-combine combination rules attempt to be as common sense + based as possible. For example: + + case 1: + group DataFrame + apply aggregation function (f(chunk) -> Series) + yield DataFrame, with group axis having group labels + + case 2: + group DataFrame + apply transform function ((f(chunk) -> DataFrame with same indexes) + yield DataFrame with resulting chunks glued together + + case 3: + group Series + apply function with f(chunk) -> DataFrame + yield DataFrame with result of chunks glued together + + Parameters + ---------- + func : function + + Notes + ----- + See online documentation for full exposition on how to use apply. + + In the current implementation apply calls func twice on the + first group to decide whether it can take a fast or slow code + path. This can lead to unexpected behavior if func has + side-effects, as they will take effect twice for the first + group. + + + See also + -------- + aggregate, transform""" + func = self._is_builtin_func(func) @wraps(func) @@ -710,7 +713,8 @@ def count(self): @Substitution(name='groupby') @Appender(_doc_template) def mean(self): - """Compute mean of groups, excluding missing values + """ + Compute mean of groups, excluding missing values For multiple groupings, the result index will be a MultiIndex """ @@ -726,7 +730,8 @@ def mean(self): @Substitution(name='groupby') @Appender(_doc_template) def median(self): - """Compute median of groups, excluding missing values + """ + Compute median of groups, excluding missing values For multiple groupings, the result index will be a MultiIndex """ @@ -746,14 +751,16 @@ def f(x): @Substitution(name='groupby') @Appender(_doc_template) def std(self, ddof=1): - """Compute standard deviation of groups, excluding missing values + """ + Compute standard deviation of groups, excluding missing values -For multiple groupings, the result index will be a MultiIndex + For multiple groupings, the result index will be a MultiIndex -Parameters ----------- -ddof : integer, default 1 -degrees of freedom""" + Parameters + ---------- + ddof : integer, default 1 + degrees of freedom + """ # todo, implement at cython level? return np.sqrt(self.var(ddof=ddof)) @@ -761,14 +768,16 @@ def std(self, ddof=1): @Substitution(name='groupby') @Appender(_doc_template) def var(self, ddof=1): - """Compute variance of groups, excluding missing values + """ + Compute variance of groups, excluding missing values -For multiple groupings, the result index will be a MultiIndex + For multiple groupings, the result index will be a MultiIndex -Parameters ----------- -ddof : integer, default 1 -degrees of freedom""" + Parameters + ---------- + ddof : integer, default 1 + degrees of freedom + """ if ddof == 1: return self._cython_agg_general('var') @@ -780,14 +789,16 @@ def var(self, ddof=1): @Substitution(name='groupby') @Appender(_doc_template) def sem(self, ddof=1): - """Compute standard error of the mean of groups, excluding missing values + """ + Compute standard error of the mean of groups, excluding missing values -For multiple groupings, the result index will be a MultiIndex + For multiple groupings, the result index will be a MultiIndex -Parameters ----------- -ddof : integer, default 1 -degrees of freedom""" + Parameters + ---------- + ddof : integer, default 1 + degrees of freedom + """ return self.std(ddof=ddof)/np.sqrt(self.count()) @@ -809,8 +820,10 @@ def size(self): @Substitution(name='groupby') @Appender(_doc_template) def ohlc(self): - """Compute sum of values, excluding missing values -For multiple groupings, the result index will be a MultiIndex""" + """ + Compute sum of values, excluding missing values + For multiple groupings, the result index will be a MultiIndex + """ return self._apply_to_column_groupbys( lambda x: x._cython_agg_general('ohlc')) @@ -818,46 +831,48 @@ def ohlc(self): @Substitution(name='groupby') @Appender(_doc_template) def nth(self, n, dropna=None): - """Take the nth row from each group if n is an int, or a subset of rows -if n is a list of ints. - -If dropna, will take the nth non-null row, dropna is either -Truthy (if a Series) or 'all', 'any' (if a DataFrame); this is equivalent -to calling dropna(how=dropna) before the groupby. - -Parameters ----------- -n : int or list of ints - a single nth value for the row or a list of nth values -dropna : None or str, optional - apply the specified dropna operation before counting which row is - the nth row. Needs to be None, 'any' or 'all' - -Examples --------- ->>> df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) ->>> g = df.groupby('A') ->>> g.nth(0) - A B - 0 1 NaN - 2 5 6 ->>> g.nth(1) - A B - 1 1 4 ->>> g.nth(-1) - A B - 1 1 4 - 2 5 6 ->>> g.nth(0, dropna='any') - B - A - 1 4 - 5 6 ->>> g.nth(1, dropna='any') # NaNs denote group exhausted when using dropna - B - A - 1 NaN - 5 NaN""" + """ + Take the nth row from each group if n is an int, or a subset of rows + if n is a list of ints. + + If dropna, will take the nth non-null row, dropna is either + Truthy (if a Series) or 'all', 'any' (if a DataFrame); this is equivalent + to calling dropna(how=dropna) before the groupby. + + Parameters + ---------- + n : int or list of ints + a single nth value for the row or a list of nth values + dropna : None or str, optional + apply the specified dropna operation before counting which row is + the nth row. Needs to be None, 'any' or 'all' + + Examples + -------- + >>> df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + >>> g = df.groupby('A') + >>> g.nth(0) + A B + 0 1 NaN + 2 5 6 + >>> g.nth(1) + A B + 1 1 4 + >>> g.nth(-1) + A B + 1 1 4 + 2 5 6 + >>> g.nth(0, dropna='any') + B + A + 1 4 + 5 6 + >>> g.nth(1, dropna='any') # NaNs denote group exhausted when using dropna + B + A + 1 NaN + 5 NaN + """ if isinstance(n, int): nth_values = [n] @@ -953,46 +968,48 @@ def nth(self, n, dropna=None): @Substitution(name='groupby') @Appender(_doc_template) def cumcount(self, ascending=True): - """Number each item in each group from 0 to the length of that group - 1. - -Essentially this is equivalent to - ->>> self.apply(lambda x: Series(np.arange(len(x)), x.index)) - -Parameters ----------- -ascending : bool, default True - If False, number in reverse, from length of group - 1 to 0. - -Examples --------- - ->>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], - ... columns=['A']) ->>> df - A - 0 a - 1 a - 2 a - 3 b - 4 b - 5 a ->>> df.groupby('A').cumcount() - 0 0 - 1 1 - 2 2 - 3 0 - 4 1 - 5 3 - dtype: int64 ->>> df.groupby('A').cumcount(ascending=False) - 0 3 - 1 2 - 2 1 - 3 1 - 4 0 - 5 0 - dtype: int64""" + """ + Number each item in each group from 0 to the length of that group - 1. + + Essentially this is equivalent to + + >>> self.apply(lambda x: Series(np.arange(len(x)), x.index)) + + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from length of group - 1 to 0. + + Examples + -------- + + >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], + ... columns=['A']) + >>> df + A + 0 a + 1 a + 2 a + 3 b + 4 b + 5 a + >>> df.groupby('A').cumcount() + 0 0 + 1 1 + 2 2 + 3 0 + 4 1 + 5 3 + dtype: int64 + >>> df.groupby('A').cumcount(ascending=False) + 0 3 + 1 2 + 2 1 + 3 1 + 4 0 + 5 0 + dtype: int64 + """ self._set_selection_from_grouper() @@ -1021,14 +1038,16 @@ def cumsum(self, axis=0): @Substitution(name='groupby') @Appender(_doc_template) def shift(self, periods=1, freq=None, axis=0): - """Shift each group by periods observations + """ + Shift each group by periods observations -Parameters ----------- -periods : integer, default 1 - number of periods to shift -freq : frequency string -axis : axis to shift, default 0""" + Parameters + ---------- + periods : integer, default 1 + number of periods to shift + freq : frequency string + axis : axis to shift, default 0 + """ if freq is not None or axis != 0: return self.apply(lambda x: x.shift(periods, freq, axis)) @@ -1047,24 +1066,27 @@ def shift(self, periods=1, freq=None, axis=0): @Substitution(name='groupby') @Appender(_doc_template) def head(self, n=5): - """Returns first n rows of each group. - -Essentially equivalent to ``.apply(lambda x: x.head(n))``, -except ignores as_index flag. - -Examples --------- - ->>> df = DataFrame([[1, 2], [1, 4], [5, 6]], - columns=['A', 'B']) ->>> df.groupby('A', as_index=False).head(1) - A B - 0 1 2 - 2 5 6 ->>> df.groupby('A').head(1) - A B - 0 1 2 - 2 5 6""" + """ + Returns first n rows of each group. + + Essentially equivalent to ``.apply(lambda x: x.head(n))``, + except ignores as_index flag. + + Examples + -------- + + >>> df = DataFrame([[1, 2], [1, 4], [5, 6]], + columns=['A', 'B']) + >>> df.groupby('A', as_index=False).head(1) + A B + 0 1 2 + 2 5 6 + >>> df.groupby('A').head(1) + A B + 0 1 2 + 2 5 6 + """ + obj = self._selected_obj in_head = self._cumcount_array() < n head = obj[in_head] @@ -1073,24 +1095,27 @@ def head(self, n=5): @Substitution(name='groupby') @Appender(_doc_template) def tail(self, n=5): - """Returns last n rows of each group - -Essentially equivalent to ``.apply(lambda x: x.tail(n))``, -except ignores as_index flag. - -Examples --------- - ->>> df = DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]], - columns=['A', 'B']) ->>> df.groupby('A').tail(1) - A B - 1 a 2 - 3 b 2 ->>> df.groupby('A').head(1) - A B - 0 a 1 - 2 b 1""" + """ + Returns last n rows of each group + + Essentially equivalent to ``.apply(lambda x: x.tail(n))``, + except ignores as_index flag. + + Examples + -------- + + >>> df = DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]], + columns=['A', 'B']) + >>> df.groupby('A').tail(1) + A B + 1 a 2 + 3 b 2 + >>> df.groupby('A').head(1) + A B + 0 a 1 + 2 b 1 + """ + obj = self._selected_obj rng = np.arange(0, -self.grouper._max_groupsize, -1, dtype='int64') in_tail = self._cumcount_array(rng, ascending=False) > -n @@ -1098,10 +1123,13 @@ def tail(self, n=5): return tail def _cumcount_array(self, arr=None, ascending=True): - """arr is where cumcount gets its values from + """ + arr is where cumcount gets its values from - note: this is currently implementing sort=False (though the default is sort=True) - for groupby in general + Note + ---- + this is currently implementing sort=False (though the default is sort=True) + for groupby in general """ if arr is None: arr = np.arange(self.grouper._max_groupsize, dtype='int64') @@ -3379,8 +3407,8 @@ class DataFrameGroupBy(NDFrameGroupBy): _block_agg_axis = 1 @Substitution(name='groupby') - @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs) @@ -3550,8 +3578,8 @@ def count(self): class PanelGroupBy(NDFrameGroupBy): @Substitution(name='groupby') - @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): return super(PanelGroupBy, self).aggregate(arg, *args, **kwargs) diff --git a/pandas/core/window.py b/pandas/core/window.py index 09dc528f64cbe..003b2d7bd0451 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -18,6 +18,7 @@ import pandas.algos as algos from pandas import compat from pandas.util.decorators import Substitution, Appender +from textwrap import dedent _shared_docs = dict() _doc_template = """ @@ -28,8 +29,8 @@ See also -------- -:func:`pandas.Series.%(name)s` -:func:`pandas.DataFrame.%(name)s` +`pandas.Series.%(name)s` +`pandas.DataFrame.%(name)s` """ class _Window(PandasObject, SelectionMixin): @@ -257,21 +258,21 @@ class Window(_Window): The recognized window types are: - * ``boxcar`` - * ``triang`` - * ``blackman`` - * ``hamming`` - * ``bartlett`` - * ``parzen`` - * ``bohman`` - * ``blackmanharris`` - * ``nuttall`` - * ``barthann`` - * ``kaiser`` (needs beta) - * ``gaussian`` (needs std) - * ``general_gaussian`` (needs power, width) - * ``slepian`` (needs width). - """ + * ``boxcar`` + * ``triang`` + * ``blackman`` + * ``hamming`` + * ``bartlett`` + * ``parzen`` + * ``bohman`` + * ``blackmanharris`` + * ``nuttall`` + * ``barthann`` + * ``kaiser`` (needs beta) + * ``gaussian`` (needs std) + * ``general_gaussian`` (needs power, width) + * ``slepian`` (needs width). +""" def _prep_window(self, **kwargs): """ provide validation for our window type, return the window """ @@ -340,7 +341,13 @@ def f(arg, *args, **kwargs): @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) def aggregate(self, arg, *args, **kwargs): - return super(Window, self).aggregate(arg, *args, **kwargs) + result, how = self._aggregate(arg, *args, **kwargs) + if result is None: + + # these must apply directly + result = arg(self) + + return result agg = aggregate @@ -451,13 +458,15 @@ def count(self): result[result.isnull()] = 0 return result - _shared_docs['apply'] = """%(name)s function apply + _shared_docs['apply'] = dedent(""" + %(name)s function apply -Parameters ----------- -func : function + Parameters + ---------- + func : function Must produce a single value from an ndarray input -*args and **kwargs are passed to the function""" + *args and **kwargs are passed to the function""") + def apply(self, func, args=(), kwargs={}): _level = kwargs.pop('_level',None) window = self._get_window() @@ -472,21 +481,25 @@ def f(arg, window, min_periods): def sum(self): return self._apply('roll_sum') - _shared_docs['max'] = """%(name)s maximum + _shared_docs['max'] = dedent(""" + %(name)s maximum + + Parameters + ---------- + how : string, default max + Method for down- or re-sampling""") -Parameters ----------- -how : string, default max - Method for down- or re-sampling""" def max(self, how='max'): return self._apply('roll_max', how=how) - _shared_docs['min'] = """%(name)s minimum + _shared_docs['min'] = dedent(""" + %(name)s minimum + + Parameters + ---------- + how : string, default min + Method for down- or re-sampling""") -Parameters ----------- -how : string, default min - Method for down- or re-sampling""" def min(self, how='min'): return self._apply('roll_min', how=how) @@ -494,22 +507,26 @@ def min(self, how='min'): def mean(self): return self._apply('roll_mean') - _shared_docs['median'] = """%(name)s median + _shared_docs['median'] = dedent(""" + %(name)s median + + Parameters + ---------- + how : string, default median + Method for down- or re-sampling""") -Parameters ----------- -how : string, default median - Method for down- or re-sampling""" def median(self, how='median'): return self._apply('roll_median_c', how=how) - _shared_docs['std'] = """%(name)s standard deviation + _shared_docs['std'] = dedent(""" + %(name)s standard deviation -Parameters ----------- -ddof : int, default 1 + Parameters + ---------- + ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements.""" + is ``N - ddof``, where ``N`` represents the number of elements.""") + def std(self, ddof=1): window = self._get_window() def f(arg, *args, **kwargs): @@ -518,13 +535,15 @@ def f(arg, *args, **kwargs): return self._apply(f, check_minp=_require_min_periods(1)) - _shared_docs['var'] = """%(name)s variance + _shared_docs['var'] = dedent(""" + %(name)s variance -Parameters ----------- -ddof : int, default 1 + Parameters + ---------- + ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements.""" + is ``N - ddof``, where ``N`` represents the number of elements.""") + def var(self, ddof=1): return self._apply('roll_var', check_minp=_require_min_periods(1), @@ -540,12 +559,14 @@ def kurt(self): return self._apply('roll_kurt', check_minp=_require_min_periods(4)) - _shared_docs['quantile'] = """%(name)s quantile + _shared_docs['quantile'] = dedent(""" + %(name)s quantile + + Parameters + ---------- + quantile : float + 0 <= quantile <= 1""") -Parameters ----------- -quantile : float -0 <= quantile <= 1""" def quantile(self, quantile): window = self._get_window() def f(arg, *args, **kwargs): @@ -554,21 +575,23 @@ def f(arg, *args, **kwargs): return self._apply(f) - _shared_docs['cov'] = """%(name)s sample covariance - -Parameters ----------- -other : Series, DataFrame, or ndarray, optional - if not supplied then will default to self and produce pairwise output -pairwise : bool, default None - If False then only matching columns between self and other will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. -ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements.""" + _shared_docs['cov'] = dedent(""" + %(name)s sample covariance + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default None + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""") + def cov(self, other=None, pairwise=None, ddof=1): if other is None: other = self._selected_obj @@ -583,19 +606,20 @@ def _get_cov(X, Y): return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) - _shared_docs['corr'] = """ -%(name)s sample correlation - -Parameters ----------- -other : Series, DataFrame, or ndarray, optional - if not supplied then will default to self and produce pairwise output -pairwise : bool, default None - If False then only matching columns between self and other will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used.""" + _shared_docs['corr'] = dedent(""" + %(name)s sample correlation + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default None + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used.""") + def corr(self, other=None, pairwise=None): if other is None: other = self._selected_obj @@ -625,8 +649,8 @@ class Rolling(_Rolling_and_Expanding): Parameters ---------- window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. + Size of the moving window. This is the number of observations used for + calculating the statistic. min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). @@ -884,10 +908,10 @@ def corr(self, other=None, pairwise=None): class EWM(_Rolling): """ - .. versionadded:: 0.18.0 - Provides exponential weighted functions + .. versionadded:: 0.18.0 + Parameters ---------- com : float. optional diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 5517ce967b864..1f1371597c693 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -52,9 +52,13 @@ def test_getitem(self): r = self.frame.rolling(window=5)[1] self.assertEqual(r._selected_obj.name,self.frame.columns[1]) + # technically this is allowed r = self.frame.rolling(window=5)[1,3] tm.assert_index_equal(r._selected_obj.columns,self.frame.columns[[1,3]]) + r = self.frame.rolling(window=5)[[1,3]] + tm.assert_index_equal(r._selected_obj.columns,self.frame.columns[[1,3]]) + def test_select_bad_cols(self): df = DataFrame([[1, 2]], columns=['A', 'B']) g = df.rolling(window=5) @@ -73,7 +77,7 @@ def test_attribute_access(self): tm.assert_series_equal(r.A.sum(),r['A'].sum()) self.assertRaises(AttributeError, lambda : r.F) - def tests_skip_nuiscance(self): + def tests_skip_nuisance(self): df = DataFrame({'A' : range(5), 'B' : range(5,10), 'C' : 'foo'}) @@ -168,6 +172,25 @@ def compare(result, expected): expected = pd.concat([a_sum,rcustom],axis=1) compare(result, expected) + def test_window_with_args(self): + + # make sure that we are aggregating window functions correctly with arg + + r = Series(np.random.randn(100)).rolling(window=10,min_periods=1,win_type='gaussian') + expected = pd.concat([r.mean(std=10),r.mean(std=.01)],axis=1) + expected.columns = ['',''] + result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=.01)]) + assert_frame_equal(result, expected) + + def a(x): + return x.mean(std=10) + def b(x): + return x.mean(std=0.01) + expected = pd.concat([r.mean(std=10),r.mean(std=.01)],axis=1) + expected.columns = ['a','b'] + result = r.aggregate([a,b]) + assert_frame_equal(result, expected) + class TestDeprecations(Base): """ test that we are catching deprecation warnings """ diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index a6aa5ff66576c..5c3cb573766d7 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -2,6 +2,7 @@ from pandas.lib import cache_readonly import sys import warnings +from textwrap import dedent from functools import wraps @@ -180,7 +181,7 @@ def __call__(self, func): func.__doc__ = func.__doc__ if func.__doc__ else '' self.addendum = self.addendum if self.addendum else '' docitems = [func.__doc__, self.addendum] - func.__doc__ = self.join.join(docitems) + func.__doc__ = dedent(self.join.join(docitems)) return func From 05eb20f3657c72bbf3caa61409e1c243ec76412f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 30 Nov 2015 07:14:07 -0500 Subject: [PATCH 7/8] DEPR: deprecate freq/how arguments to window functions --- doc/source/computation.rst | 11 ++--- doc/source/whatsnew/v0.18.0.txt | 6 ++- pandas/core/base.py | 9 +--- pandas/core/window.py | 76 +++++++++++++++++++++------------ pandas/tests/test_window.py | 55 ++++++++++++++---------- 5 files changed, 95 insertions(+), 62 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index c85ed267556de..a96c14a6e154f 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -207,7 +207,6 @@ Window Functions functions and are now deprecated and replaced by the corresponding method call. The deprecation warning will show the new syntax, see an example :ref:`here ` - You can view the previous documentation `here `__ @@ -244,8 +243,12 @@ accept the following arguments: - ``window``: size of moving window - ``min_periods``: threshold of non-null data points to require (otherwise result is NA) -- ``freq``: optionally specify a :ref:`frequency string ` - or :ref:`DateOffset ` to pre-conform the data to. + +.. warning:: + + The ``freq`` and ``how`` arguments were in the API prior to 0.18.0 changes. These are deprecated in the new API. You can simply resample the input prior to creating a window function. + + For example, instead of ``s.rolling(window=5,freq='D').max()`` to get the max value on a rolling 5 Day window, one could use ``s.resample('D',how='max').rolling(window=5).max()``, which first resamples the data to daily data, then provides a rolling 5 day window. We can then call methods on these ``rolling`` objects. These return like-indexed objects: @@ -604,8 +607,6 @@ all accept are: - ``min_periods``: threshold of non-null data points to require. Defaults to minimum needed to compute statistic. No ``NaNs`` will be output once ``min_periods`` non-null data points have been seen. -- ``freq``: optionally specify a :ref:`frequency string ` - or :ref:`DateOffset ` to pre-conform the data to. .. note:: diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 2a568582dc7c3..96618ffbc36cb 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -13,6 +13,8 @@ users upgrade to this version. Highlights include: +- Window functions are now methods on ``.groupby`` like objects, see :ref:`here `. + Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. .. contents:: What's new in v0.18.0 @@ -212,7 +214,7 @@ Deprecations .. _whatsnew_0180.window_deprecations: -- Function ``pd.rolling_*``, ``pd.expanding_*``, and ``pd.ewm*`` are deprecated and replaced by the corresponding method call. Note that +- The functions ``pd.rolling_*``, ``pd.expanding_*``, and ``pd.ewm*`` are deprecated and replaced by the corresponding method call. Note that the new suggested syntax includes all of the arguments (even if default) (:issue:`11603`) .. code-block:: python @@ -237,6 +239,8 @@ Deprecations 2 0.5 dtype: float64 +- The the ``freq`` and ``how`` arguments to the ``.rolling``, ``.expanding``, and ``.ewm`` (new) functions are deprecated, and will be removed in a future version. (:issue:`11603`) + .. _whatsnew_0180.prior_deprecations: Removal of prior version deprecations/changes diff --git a/pandas/core/base.py b/pandas/core/base.py index 84a127a46424c..42e04d5a03696 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -462,13 +462,8 @@ def _aggregate_multiple_funcs(self, arg, _level): colg = self._gotitem(obj.name, ndim=1, subset=obj) results.append(colg.aggregate(a)) - # find a good name, this could be a function that we don't recognize - name = self._is_cython_func(a) or a - if not isinstance(name, compat.string_types): - name = getattr(a,'name',a) - if not isinstance(name, compat.string_types): - name = getattr(a,'__name__',a) - + # make sure we find a good name + name = com._get_callable_name(a) or a keys.append(name) except (TypeError, DataError): pass diff --git a/pandas/core/window.py b/pandas/core/window.py index 003b2d7bd0451..2e9b4922a039a 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -7,6 +7,7 @@ """ from __future__ import division +import warnings import numpy as np from functools import wraps from collections import defaultdict @@ -39,6 +40,12 @@ class _Window(PandasObject, SelectionMixin): def __init__(self, obj, window=None, min_periods=None, freq=None, center=False, win_type=None, axis=0): + + if freq is not None: + warnings.warn("The freq kw is deprecated and will be removed in a future version. You can resample prior " + "to passing to a window function", + FutureWarning, stacklevel=3) + self.blocks = [] self.obj = obj self.window = window @@ -298,7 +305,7 @@ def _apply_window(self, mean=True, how=None, **kwargs): ---------- mean : boolean, default True If True computes weighted mean, else weighted sum - how : string, default to None + how : string, default to None (DEPRECATED) how to resample Returns @@ -378,7 +385,7 @@ def _apply(self, func, window=None, center=None, check_minp=None, how=None, **kw window : int/array, default to _get_window() center : boolean, default to self.center check_minp : function, default to _use_window - how : string, default to None + how : string, default to None (DEPRECATED) how to resample Returns @@ -486,10 +493,15 @@ def sum(self): Parameters ---------- - how : string, default max + how : string, default 'max' (DEPRECATED) Method for down- or re-sampling""") - - def max(self, how='max'): + def max(self, how=None): + if how is not None: + warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " + "to passing to a window function", + FutureWarning, stacklevel=3) + else: + how = 'max' return self._apply('roll_max', how=how) _shared_docs['min'] = dedent(""" @@ -497,10 +509,15 @@ def max(self, how='max'): Parameters ---------- - how : string, default min + how : string, default 'min' (DEPRECATED) Method for down- or re-sampling""") - - def min(self, how='min'): + def min(self, how=None): + if how is not None: + warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " + "to passing to a window function", + FutureWarning, stacklevel=3) + else: + how = 'min' return self._apply('roll_min', how=how) _shared_docs['mean'] = """%(name)s mean""" @@ -512,10 +529,15 @@ def mean(self): Parameters ---------- - how : string, default median + how : string, default 'median' (DEPRECATED) Method for down- or re-sampling""") - - def median(self, how='median'): + def median(self, how=None): + if how is not None: + warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " + "to passing to a window function", + FutureWarning, stacklevel=3) + else: + how = 'median' return self._apply('roll_median_c', how=how) _shared_docs['std'] = dedent(""" @@ -654,7 +676,7 @@ class Rolling(_Rolling_and_Expanding): min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). - freq : string or DateOffset object, optional (default None) + freq : string or DateOffset object, optional (default None) (DEPRECATED) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. center : boolean, default False @@ -704,14 +726,14 @@ def sum(self): @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['max']) - def max(self, how='max'): - return super(Rolling, self).max(how=how) + def max(self, **kwargs): + return super(Rolling, self).max(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['min']) - def min(self, how='min'): - return super(Rolling, self).min(how=how) + def min(self, **kwargs): + return super(Rolling, self).min(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @@ -722,8 +744,8 @@ def mean(self): @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['median']) - def median(self, how='median'): - return super(Rolling, self).median(how=how) + def median(self, **kwargs): + return super(Rolling, self).median(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @@ -778,7 +800,7 @@ class Expanding(_Rolling_and_Expanding): min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). - freq : string or DateOffset object, optional (default None) + freq : string or DateOffset object, optional (default None) (DEPRECATED) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. center : boolean, default False @@ -843,14 +865,14 @@ def sum(self): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['max']) - def max(self, how='max'): - return super(Expanding, self).max(how=how) + def max(self, **kwargs): + return super(Expanding, self).max(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['min']) - def min(self, how='min'): - return super(Expanding, self).min(how=how) + def min(self, **kwargs): + return super(Expanding, self).min(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @@ -861,8 +883,8 @@ def mean(self): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['median']) - def median(self, how='median'): - return super(Expanding, self).median(how=how) + def median(self, **kwargs): + return super(Expanding, self).median(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @@ -923,7 +945,7 @@ class EWM(_Rolling): min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). - freq : None or string alias / date offset object, default=None + freq : None or string alias / date offset object, default=None (DEPRECATED) Frequency to conform to before computing statistic adjust : boolean, default True Divide by decaying adjustment factor in beginning periods to account for @@ -1004,7 +1026,7 @@ def _apply(self, func, how=None, **kwargs): Parameters ---------- func : string/callable to apply - how : string, default to None + how : string, default to None (DEPRECATED) how to resample Returns diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 1f1371597c693..b81b3a87ab5df 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -191,6 +191,15 @@ def b(x): result = r.aggregate([a,b]) assert_frame_equal(result, expected) + def test_preserve_metadata(self): + # GH 10565 + s = Series(np.arange(100), name='foo') + + s2 = s.rolling(30).sum() + s3 = s.rolling(20).sum() + self.assertEqual(s2.name, 'foo') + self.assertEqual(s3.name, 'foo') + class TestDeprecations(Base): """ test that we are catching deprecation warnings """ @@ -815,10 +824,15 @@ def get_result(obj, window, min_periods=None, freq=None, center=False): # check via the API calls if name is provided if name is not None: - return getattr(obj.rolling(window=window, - min_periods=min_periods, - freq=freq, - center=center),name)(**kwargs) + + # catch a freq deprecation warning if freq is provided and not None + w = FutureWarning if freq is not None else None + with tm.assert_produces_warning(w, check_stacklevel=False): + r = obj.rolling(window=window, + min_periods=min_periods, + freq=freq, + center=center) + return getattr(r,name)(**kwargs) # check via the moments API with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -1002,15 +1016,6 @@ def test_ewma_halflife_arg(self): self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, halflife=50) self.assertRaises(Exception, mom.ewma, self.arr) - def test_moment_preserve_series_name(self): - # GH 10565 - s = Series(np.arange(100), name='foo') - - s2 = s.rolling(30).sum() - s3 = s.rolling(20).sum() - self.assertEqual(s2.name, 'foo') - self.assertEqual(s3.name, 'foo') - def test_ew_empty_arrays(self): arr = np.array([], dtype=np.float64) @@ -2133,7 +2138,8 @@ def test_rolling_max_gh6297(self): expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = series.rolling(window=1, freq='D').max() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + x = series.rolling(window=1, freq='D').max() assert_series_equal(expected, x) def test_rolling_max_how_resample(self): @@ -2152,14 +2158,16 @@ def test_rolling_max_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = series.rolling(window=1, freq='D').max() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + x = series.rolling(window=1, freq='D').max() assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = series.rolling(window=1, freq='D').max(how='median') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + x = series.rolling(window=1, freq='D').max(how='median') assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 @@ -2167,7 +2175,8 @@ def test_rolling_max_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = series.rolling(window=1, freq='D').max(how='mean') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + x = series.rolling(window=1, freq='D').max(how='mean') assert_series_equal(expected, x) @@ -2187,8 +2196,9 @@ def test_rolling_min_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = series.rolling(window=1, freq='D').min() - assert_series_equal(expected, x) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + r = series.rolling(window=1, freq='D') + assert_series_equal(expected, r.min()) def test_rolling_median_how_resample(self): @@ -2206,14 +2216,15 @@ def test_rolling_median_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, 10], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = series.rolling(window=1, freq='D').median() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + x = series.rolling(window=1, freq='D').median() assert_series_equal(expected, x) def test_rolling_median_memory_error(self): # GH11722 n = 20000 - mom.rolling_median(Series(np.random.randn(n)), window=2, center=False) - mom.rolling_median(Series(np.random.randn(n)), window=2, center=False) + Series(np.random.randn(n)).rolling(window=2, center=False).median() + Series(np.random.randn(n)).rolling(window=2, center=False).median() if __name__ == '__main__': import nose From 1890a88d4d1b2926e45f631330f6191642bef773 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 15 Dec 2015 07:02:58 -0500 Subject: [PATCH 8/8] cleanup based on comments --- doc/source/computation.rst | 35 ++-- doc/source/whatsnew/v0.14.0.txt | 15 +- doc/source/whatsnew/v0.15.0.txt | 103 ++++++++--- doc/source/whatsnew/v0.18.0.txt | 12 +- pandas/core/base.py | 15 +- pandas/core/groupby.py | 31 ++-- pandas/core/window.py | 305 ++++++++++++++++---------------- pandas/tests/test_window.py | 74 ++++++-- 8 files changed, 355 insertions(+), 235 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index a96c14a6e154f..39587e82731b0 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -243,6 +243,7 @@ accept the following arguments: - ``window``: size of moving window - ``min_periods``: threshold of non-null data points to require (otherwise result is NA) +- ``center``: boolean, whether to set the labels at the center (default is False) .. warning:: @@ -334,7 +335,7 @@ The following methods are available: :meth:`~Window.sum`, Sum of values :meth:`~Window.mean`, Mean of values -The weights used in the window are specified by the ``win_type``keyword. The list of recognized types are: +The weights used in the window are specified by the ``win_type`` keyword. The list of recognized types are: - ``boxcar`` - ``triang`` @@ -370,27 +371,12 @@ For some windowing functions, additional parameters must be specified: ser.rolling(window=5, win_type='gaussian').mean(std=0.1) -Centering Windows -~~~~~~~~~~~~~~~~~ - -By default the labels are set to the right edge of the window, but a -``center`` keyword is available so the labels can be set at the center. -This keyword is available in other rolling functions as well. - -.. ipython:: python - - ser.rolling(window=5, win_type='boxcar').mean() - - ser.rolling(window=5, win_type='boxcar', center=True).mean() - - ser.rolling(window=5, center=True).mean() - .. _stats.moments.normalization: .. note:: For ``.sum()`` with a ``win_type``, there is no normalization done to the - weights. Passing custom weights of ``[1, 1, 1]`` will yield a different + weights for the window. Passing custom weights of ``[1, 1, 1]`` will yield a different result than passing weights of ``[2, 2, 2]``, for example. When passing a ``win_type`` instead of explicitly specifying the weights, the weights are already normalized so that the largest weight is 1. @@ -399,6 +385,18 @@ This keyword is available in other rolling functions as well. such that the weights are normalized with respect to each other. Weights of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result. +Centering Windows +~~~~~~~~~~~~~~~~~ + +By default the labels are set to the right edge of the window, but a +``center`` keyword is available so the labels can be set at the center. +This keyword is available in other rolling functions as well. + +.. ipython:: python + + ser.rolling(window=5).mean() + ser.rolling(window=5, center=True).mean() + .. _stats.moments.binary: Binary Window Functions @@ -550,7 +548,7 @@ Furthermore you can pass a nested dict to indicate different aggregations on dif .. ipython:: python - r.agg({'A' : {'ra' : 'sum'}, 'B' : {'rb' : 'std' }}) + r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] }) .. _stats.moments.expanding: @@ -607,6 +605,7 @@ all accept are: - ``min_periods``: threshold of non-null data points to require. Defaults to minimum needed to compute statistic. No ``NaNs`` will be output once ``min_periods`` non-null data points have been seen. +- ``center``: boolean, whether to set the labels at the center (default is False) .. note:: diff --git a/doc/source/whatsnew/v0.14.0.txt b/doc/source/whatsnew/v0.14.0.txt index e2f96f204edab..67928af30bead 100644 --- a/doc/source/whatsnew/v0.14.0.txt +++ b/doc/source/whatsnew/v0.14.0.txt @@ -170,11 +170,18 @@ API changes :ref:`Computing rolling pairwise covariances and correlations ` in the docs. - .. ipython:: python + .. code-block:: python + + In [1]: df = DataFrame(np.random.randn(10,4),columns=list('ABCD')) + + In [4]: covs = pd.rolling_cov(df[['A','B','C']], df[['B','C','D']], 5, pairwise=True) - df = DataFrame(np.random.randn(10,4),columns=list('ABCD')) - covs = rolling_cov(df[['A','B','C']], df[['B','C','D']], 5, pairwise=True) - covs[df.index[-1]] + In [5]: covs[df.index[-1]] + Out[5]: + B C D + A 0.035310 0.326593 -0.505430 + B 0.137748 -0.006888 -0.005383 + C -0.006888 0.861040 0.020762 - ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`) diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index a33e0f19961ab..9651c1efeff4a 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -68,7 +68,7 @@ For full docs, see the :ref:`categorical introduction ` and the .. ipython:: python :okwarning: - + df = DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) df["grade"] = df["raw_grade"].astype("category") @@ -353,9 +353,15 @@ Rolling/Expanding Moments improvements New behavior - .. ipython:: python + .. code-block:: python - rolling_min(s, window=10, min_periods=5) + In [4]: pd.rolling_min(s, window=10, min_periods=5) + Out[4]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + dtype: float64 - :func:`rolling_max`, :func:`rolling_min`, :func:`rolling_sum`, :func:`rolling_mean`, :func:`rolling_median`, :func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, :func:`rolling_quantile`, @@ -381,9 +387,15 @@ Rolling/Expanding Moments improvements New behavior (note final value is ``5 = sum([2, 3, NaN])``): - .. ipython:: python + .. code-block:: python - rolling_sum(Series(range(4)), window=3, min_periods=0, center=True) + In [7]: rolling_sum(Series(range(4)), window=3, min_periods=0, center=True) + Out[7]: + 0 1 + 1 3 + 2 6 + 3 5 + dtype: float64 - :func:`rolling_window` now normalizes the weights properly in rolling mean mode (`mean=True`) so that the calculated weighted means (e.g. 'triang', 'gaussian') are distributed about the same means as those @@ -397,20 +409,27 @@ Rolling/Expanding Moments improvements .. code-block:: python - In [39]: rolling_window(s, window=3, win_type='triang', center=True) - Out[39]: - 0 NaN - 1 6.583333 - 2 6.883333 - 3 6.683333 - 4 NaN - dtype: float64 + In [39]: rolling_window(s, window=3, win_type='triang', center=True) + Out[39]: + 0 NaN + 1 6.583333 + 2 6.883333 + 3 6.683333 + 4 NaN + dtype: float64 New behavior .. ipython:: python - rolling_window(s, window=3, win_type='triang', center=True) + In [10]: pd.rolling_window(s, window=3, win_type='triang', center=True) + Out[10]: + 0 NaN + 1 9.875 + 2 10.325 + 3 10.025 + 4 NaN + dtype: float64 - Removed ``center`` argument from all :func:`expanding_ ` functions (see :ref:`list `), as the results produced when ``center=True`` did not make much sense. (:issue:`7925`) @@ -449,9 +468,17 @@ Rolling/Expanding Moments improvements New behavior (note values start at index ``4``, the location of the 2nd (since ``min_periods=2``) non-empty value): - .. ipython:: python + .. code-block:: python - ewma(s, com=3., min_periods=2) + In [2]: pd.ewma(s, com=3., min_periods=2) + Out[2]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + 4 1.759644 + 5 2.383784 + dtype: float64 - :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` now have an optional ``adjust`` argument, just like :func:`ewma` does, @@ -465,11 +492,28 @@ Rolling/Expanding Moments improvements When ``ignore_na=True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation. (:issue:`7543`) - .. ipython:: python + .. code-block:: python + + In [7]: pd.ewma(Series([None, 1., 8.]), com=2.) + Out[7]: + 0 NaN + 1 1.0 + 2 5.2 + dtype: float64 + + In [8]: pd.ewma(Series([1., None, 8.]), com=2., ignore_na=True) # pre-0.15.0 behavior + Out[8]: + 0 1.0 + 1 1.0 + 2 5.2 + dtype: float64 - ewma(Series([None, 1., 8.]), com=2.) - ewma(Series([1., None, 8.]), com=2., ignore_na=True) # pre-0.15.0 behavior - ewma(Series([1., None, 8.]), com=2., ignore_na=False) # new default + In [9]: pd.ewma(Series([1., None, 8.]), com=2., ignore_na=False) # new default + Out[9]: + 0 1.000000 + 1 1.000000 + 2 5.846154 + dtype: float64 .. warning:: @@ -525,10 +569,23 @@ Rolling/Expanding Moments improvements By comparison, the following 0.15.0 results have a ``NaN`` for entry ``0``, and the debiasing factors are decreasing (towards 1.25): - .. ipython:: python + .. code-block:: python - ewmvar(s, com=2., bias=False) - ewmvar(s, com=2., bias=False) / ewmvar(s, com=2., bias=True) + In [14]: pd.ewmvar(s, com=2., bias=False) + Out[14]: + 0 NaN + 1 0.500000 + 2 1.210526 + 3 4.089069 + dtype: float64 + + In [15]: pd.ewmvar(s, com=2., bias=False) / pd.ewmvar(s, com=2., bias=True) + Out[15]: + 0 NaN + 1 2.083333 + 2 1.583333 + 3 1.425439 + dtype: float64 See :ref:`Exponentially weighted moment functions ` for details. (:issue:`7912`) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 96618ffbc36cb..7f63096d7c045 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -86,8 +86,8 @@ And multiple aggregations .. ipython:: python - r.agg({'A' : {'ra' : ['mean','std']}, - 'B' : {'rb' : ['mean','std']}}) + r.agg({'A' : ['mean','std'], + 'B' : ['mean','std']}) .. _whatsnew_0180.enhancements.other: @@ -239,15 +239,17 @@ Deprecations 2 0.5 dtype: float64 -- The the ``freq`` and ``how`` arguments to the ``.rolling``, ``.expanding``, and ``.ewm`` (new) functions are deprecated, and will be removed in a future version. (:issue:`11603`) +- The the ``freq`` and ``how`` arguments to the ``.rolling``, ``.expanding``, and ``.ewm`` (new) functions are deprecated, and will be removed in a future version. You can simply resample the input prior to creating a window function. (:issue:`11603`). + + For example, instead of ``s.rolling(window=5,freq='D').max()`` to get the max value on a rolling 5 Day window, one could use ``s.resample('D',how='max').rolling(window=5).max()``, which first resamples the data to daily data, then provides a rolling 5 day window. .. _whatsnew_0180.prior_deprecations: Removal of prior version deprecations/changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- Removal of ``rolling_corr_parwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`) -- Removal of ``expanding_corr_parwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``rolling_corr_pairwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``expanding_corr_pairwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 42e04d5a03696..a1e1c20344ea4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -358,11 +358,10 @@ def _gotitem(self, key, ndim, subset=None): """ _see_also_template = """ - See also -------- -`pandas.Series.%(name)s` -`pandas.DataFrame.%(name)s` +pandas.Series.%(name)s +pandas.DataFrame.%(name)s """ def aggregate(self, func, *args, **kwargs): @@ -422,7 +421,7 @@ def _aggregate(self, arg, *args, **kwargs): else: for col, agg_how in compat.iteritems(arg): colg = self._gotitem(col, ndim=1) - result[col] = colg.aggregate(agg_how, _level=(_level or 0) + 1) + result[col] = colg.aggregate(agg_how, _level=None) keys.append(col) if isinstance(list(result.values())[0], com.ABCDataFrame): @@ -451,12 +450,16 @@ def _aggregate_multiple_funcs(self, arg, _level): if self.axis != 0: raise NotImplementedError("axis other than 0 is not supported") - obj = self._obj_with_exclusions + if self._selected_obj.ndim == 1: + obj = self._selected_obj + else: + obj = self._obj_with_exclusions + results = [] keys = [] # degenerate case - if obj.ndim == 1: + if obj.ndim==1: for a in arg: try: colg = self._gotitem(obj.name, ndim=1, subset=obj) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ac07c9487fc15..5428ee5484bfa 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -40,15 +40,11 @@ _doc_template = """ - Returns - ------- - same type as input - See also -------- - `pandas.Series.%(name)s` - `pandas.DataFrame.%(name)s` - `pandas.Panel.%(name)s` + pandas.Series.%(name)s + pandas.DataFrame.%(name)s + pandas.Panel.%(name)s """ # special case to prevent duplicate plots when catching exceptions when @@ -628,7 +624,6 @@ def __iter__(self): return self.grouper.get_iterator(self.obj, axis=self.axis) @Substitution(name='groupby') - @Appender(_doc_template) def apply(self, func, *args, **kwargs): """ Apply function and combine results together in an intelligent way. The @@ -664,10 +659,12 @@ def apply(self, func, *args, **kwargs): side-effects, as they will take effect twice for the first group. - See also -------- - aggregate, transform""" + aggregate, transform + pandas.Series.%(name)s + pandas.DataFrame.%(name)s + pandas.Panel.%(name)s""" func = self._is_builtin_func(func) @@ -759,7 +756,7 @@ def std(self, ddof=1): Parameters ---------- ddof : integer, default 1 - degrees of freedom + degrees of freedom """ # todo, implement at cython level? @@ -776,7 +773,7 @@ def var(self, ddof=1): Parameters ---------- ddof : integer, default 1 - degrees of freedom + degrees of freedom """ if ddof == 1: @@ -797,7 +794,7 @@ def sem(self, ddof=1): Parameters ---------- ddof : integer, default 1 - degrees of freedom + degrees of freedom """ return self.std(ddof=ddof)/np.sqrt(self.count()) @@ -868,8 +865,8 @@ def nth(self, n, dropna=None): 1 4 5 6 >>> g.nth(1, dropna='any') # NaNs denote group exhausted when using dropna - B - A + B + A 1 NaN 5 NaN """ @@ -978,13 +975,13 @@ def cumcount(self, ascending=True): Parameters ---------- ascending : bool, default True - If False, number in reverse, from length of group - 1 to 0. + If False, number in reverse, from length of group - 1 to 0. Examples -------- >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], - ... columns=['A']) + ... columns=['A']) >>> df A 0 a diff --git a/pandas/core/window.py b/pandas/core/window.py index 2e9b4922a039a..4bbdf444ac2a7 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -30,8 +30,8 @@ See also -------- -`pandas.Series.%(name)s` -`pandas.DataFrame.%(name)s` +pandas.Series.%(name)s +pandas.DataFrame.%(name)s """ class _Window(PandasObject, SelectionMixin): @@ -65,8 +65,14 @@ def _setup(self): def _convert_freq(self, how=None): """ resample according to the how, return a new object """ + obj = self._selected_obj if self.freq is not None and isinstance(obj, (com.ABCSeries, com.ABCDataFrame)): + if how is not None: + warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " + "to passing to a window function", + FutureWarning, stacklevel=6) + obj = obj.resample(self.freq, how=how) return obj @@ -118,7 +124,7 @@ def _get_window(self, other=None): def __unicode__(self): """ provide a nice str repr of our rolling object """ - attrs = [ "{k}->{v}".format(k=k,v=getattr(self,k)) \ + attrs = [ "{k}={v}".format(k=k,v=getattr(self,k)) \ for k in self._attributes if getattr(self,k,None) is not None ] return "{klass} [{attrs}]".format(klass=self.__class__.__name__, attrs=','.join(attrs)) @@ -227,6 +233,22 @@ def aggregate(self, arg, *args, **kwargs): agg = aggregate + _shared_docs['sum'] = dedent(""" + %(name)s sum + + Parameters + ---------- + how : string, default None (DEPRECATED) + Method for down- or re-sampling""") + + _shared_docs['mean'] = dedent(""" + %(name)s mean + + Parameters + ---------- + how : string, default None (DEPRECATED) + Method for down- or re-sampling""") + class Window(_Window): """ Provides rolling transformations. @@ -241,7 +263,7 @@ class Window(_Window): min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). - freq : string or DateOffset object, optional (default None) + freq : string or DateOffset object, optional (default None) (DEPRECATED) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. center : boolean, default False @@ -345,8 +367,8 @@ def f(arg, *args, **kwargs): return self._wrap_results(results, blocks, obj) @Substitution(name='rolling') - @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): result, how = self._aggregate(arg, *args, **kwargs) if result is None: @@ -358,13 +380,15 @@ def aggregate(self, arg, *args, **kwargs): agg = aggregate - @Substitution(name='rolling') + @Substitution(name='window') @Appender(_doc_template) + @Appender(_shared_docs['sum']) def sum(self, **kwargs): return self._apply_window(mean=False, **kwargs) - @Substitution(name='rolling') + @Substitution(name='window') @Appender(_doc_template) + @Appender(_shared_docs['mean']) def mean(self, **kwargs): return self._apply_window(mean=True, **kwargs) @@ -471,8 +495,8 @@ def count(self): Parameters ---------- func : function - Must produce a single value from an ndarray input - *args and **kwargs are passed to the function""") + Must produce a single value from an ndarray input + *args and **kwargs are passed to the function""") def apply(self, func, args=(), kwargs={}): _level = kwargs.pop('_level',None) @@ -484,9 +508,8 @@ def f(arg, window, min_periods): return self._apply(f, center=False) - _shared_docs['sum'] = """%(name)s sum""" - def sum(self): - return self._apply('roll_sum') + def sum(self, **kwargs): + return self._apply('roll_sum', **kwargs) _shared_docs['max'] = dedent(""" %(name)s maximum @@ -494,15 +517,11 @@ def sum(self): Parameters ---------- how : string, default 'max' (DEPRECATED) - Method for down- or re-sampling""") - def max(self, how=None): - if how is not None: - warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " - "to passing to a window function", - FutureWarning, stacklevel=3) - else: + Method for down- or re-sampling""") + def max(self, how=None, **kwargs): + if self.freq is not None and how is None: how = 'max' - return self._apply('roll_max', how=how) + return self._apply('roll_max', how=how, **kwargs) _shared_docs['min'] = dedent(""" %(name)s minimum @@ -510,19 +529,14 @@ def max(self, how=None): Parameters ---------- how : string, default 'min' (DEPRECATED) - Method for down- or re-sampling""") - def min(self, how=None): - if how is not None: - warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " - "to passing to a window function", - FutureWarning, stacklevel=3) - else: + Method for down- or re-sampling""") + def min(self, how=None, **kwargs): + if self.freq is not None and how is None: how = 'min' - return self._apply('roll_min', how=how) + return self._apply('roll_min', how=how, **kwargs) - _shared_docs['mean'] = """%(name)s mean""" - def mean(self): - return self._apply('roll_mean') + def mean(self, **kwargs): + return self._apply('roll_mean', **kwargs) _shared_docs['median'] = dedent(""" %(name)s median @@ -530,15 +544,11 @@ def mean(self): Parameters ---------- how : string, default 'median' (DEPRECATED) - Method for down- or re-sampling""") - def median(self, how=None): - if how is not None: - warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " - "to passing to a window function", - FutureWarning, stacklevel=3) - else: + Method for down- or re-sampling""") + def median(self, how=None, **kwargs): + if self.freq is not None and how is None: how = 'median' - return self._apply('roll_median_c', how=how) + return self._apply('roll_median_c', how=how, **kwargs) _shared_docs['std'] = dedent(""" %(name)s standard deviation @@ -546,16 +556,16 @@ def median(self, how=None): Parameters ---------- ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements.""") + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""") - def std(self, ddof=1): + def std(self, ddof=1, **kwargs): window = self._get_window() def f(arg, *args, **kwargs): minp = _require_min_periods(1)(self.min_periods, window) return _zsqrt(algos.roll_var(arg, window, minp, ddof)) - return self._apply(f, check_minp=_require_min_periods(1)) + return self._apply(f, check_minp=_require_min_periods(1), **kwargs) _shared_docs['var'] = dedent(""" %(name)s variance @@ -563,23 +573,26 @@ def f(arg, *args, **kwargs): Parameters ---------- ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements.""") + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""") - def var(self, ddof=1): + def var(self, ddof=1, **kwargs): return self._apply('roll_var', check_minp=_require_min_periods(1), - ddof=ddof) + ddof=ddof, + **kwargs) _shared_docs['skew'] = """Unbiased %(name)s skewness""" - def skew(self): + def skew(self, **kwargs): return self._apply('roll_skew', - check_minp=_require_min_periods(3)) + check_minp=_require_min_periods(3), + **kwargs) _shared_docs['kurt'] = """Unbiased %(name)s kurtosis""" - def kurt(self): + def kurt(self, **kwargs): return self._apply('roll_kurt', - check_minp=_require_min_periods(4)) + check_minp=_require_min_periods(4), + **kwargs) _shared_docs['quantile'] = dedent(""" %(name)s quantile @@ -587,15 +600,15 @@ def kurt(self): Parameters ---------- quantile : float - 0 <= quantile <= 1""") + 0 <= quantile <= 1""") - def quantile(self, quantile): + def quantile(self, quantile, **kwargs): window = self._get_window() def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, window) return algos.roll_quantile(arg, window, minp, quantile) - return self._apply(f) + return self._apply(f, **kwargs) _shared_docs['cov'] = dedent(""" %(name)s sample covariance @@ -614,7 +627,7 @@ def f(arg, *args, **kwargs): Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of elements.""") - def cov(self, other=None, pairwise=None, ddof=1): + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): if other is None: other = self._selected_obj pairwise = True if pairwise is None else pairwise # only default unset @@ -622,8 +635,8 @@ def cov(self, other=None, pairwise=None, ddof=1): window = self._get_window(other) def _get_cov(X, Y): - mean = lambda x: x.rolling(window, self.min_periods, center=self.center).mean() - count = (X+Y).rolling(window=window, center=self.center).count() + mean = lambda x: x.rolling(window, self.min_periods, center=self.center).mean(**kwargs) + count = (X+Y).rolling(window=window, center=self.center).count(**kwargs) bias_adj = count / (count - ddof) return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) @@ -642,7 +655,7 @@ def _get_cov(X, Y): will be a Panel in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used.""") - def corr(self, other=None, pairwise=None): + def corr(self, other=None, pairwise=None, **kwargs): if other is None: other = self._selected_obj pairwise = True if pairwise is None else pairwise # only default unset @@ -659,12 +672,12 @@ def _get_corr(a, b): freq=self.freq, center=self.center) - return a.cov(b) / (a.std() * b.std()) + return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)) class Rolling(_Rolling_and_Expanding): """ - Provides rolling transformations. + Provides rolling window calculcations. .. versionadded:: 0.18.0 @@ -698,8 +711,8 @@ class Rolling(_Rolling_and_Expanding): """ @Substitution(name='rolling') - @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): return super(Rolling, self).aggregate(arg, *args, **kwargs) @@ -720,8 +733,8 @@ def apply(self, func, args=(), kwargs={}): @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['sum']) - def sum(self): - return super(Rolling, self).sum() + def sum(self, **kwargs): + return super(Rolling, self).sum(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @@ -738,8 +751,8 @@ def min(self, **kwargs): @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['mean']) - def mean(self): - return super(Rolling, self).mean() + def mean(self, **kwargs): + return super(Rolling, self).mean(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @@ -750,44 +763,44 @@ def median(self, **kwargs): @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['std']) - def std(self, ddof=1): - return super(Rolling, self).std(ddof=ddof) + def std(self, ddof=1, **kwargs): + return super(Rolling, self).std(ddof=ddof, **kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['var']) - def var(self, ddof=1): - return super(Rolling, self).var(ddof=ddof) + def var(self, ddof=1, **kwargs): + return super(Rolling, self).var(ddof=ddof, **kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['skew']) - def skew(self): - return super(Rolling, self).skew() + def skew(self, **kwargs): + return super(Rolling, self).skew(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['kurt']) - def kurt(self): - return super(Rolling, self).kurt() + def kurt(self, **kwargs): + return super(Rolling, self).kurt(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['quantile']) - def quantile(self, quantile): - return super(Rolling, self).quantile(quantile=quantile) + def quantile(self, quantile, **kwargs): + return super(Rolling, self).quantile(quantile=quantile, **kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['cov']) - def cov(self, other=None, pairwise=None, ddof=1): - return super(Rolling, self).cov(other=other, pairwise=pairwise, ddof=ddof) + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + return super(Rolling, self).cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['corr']) - def corr(self, other=None, pairwise=None): - return super(Rolling, self).corr(other=other, pairwise=pairwise) + def corr(self, other=None, pairwise=None, **kwargs): + return super(Rolling, self).corr(other=other, pairwise=pairwise, **kwargs) class Expanding(_Rolling_and_Expanding): """ @@ -837,8 +850,8 @@ def _get_window(self, other=None): return max((len(obj) + len(obj)), self.min_periods) if self.min_periods else (len(obj) + len(obj)) @Substitution(name='expanding') - @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): return super(Expanding, self).aggregate(arg, *args, **kwargs) @@ -847,8 +860,8 @@ def aggregate(self, arg, *args, **kwargs): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['count']) - def count(self): - return super(Expanding, self).count() + def count(self, **kwargs): + return super(Expanding, self).count(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @@ -859,8 +872,8 @@ def apply(self, func, args=(), kwargs={}): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['sum']) - def sum(self): - return super(Expanding, self).sum() + def sum(self, **kwargs): + return super(Expanding, self).sum(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @@ -877,8 +890,8 @@ def min(self, **kwargs): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['mean']) - def mean(self): - return super(Expanding, self).mean() + def mean(self, **kwargs): + return super(Expanding, self).mean(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @@ -889,44 +902,68 @@ def median(self, **kwargs): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['std']) - def std(self, ddof=1): - return super(Expanding, self).std(ddof=ddof) + def std(self, ddof=1, **kwargs): + return super(Expanding, self).std(ddof=ddof, **kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['var']) - def var(self, ddof=1): - return super(Expanding, self).var(ddof=ddof) + def var(self, ddof=1, **kwargs): + return super(Expanding, self).var(ddof=ddof, **kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['skew']) - def skew(self): - return super(Expanding, self).skew() + def skew(self, **kwargs): + return super(Expanding, self).skew(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['kurt']) - def kurt(self): - return super(Expanding, self).kurt() + def kurt(self, **kwargs): + return super(Expanding, self).kurt(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['quantile']) - def quantile(self, quantile): - return super(Expanding, self).quantile(quantile=quantile) + def quantile(self, quantile, **kwargs): + return super(Expanding, self).quantile(quantile=quantile, **kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['cov']) - def cov(self, other=None, pairwise=None, ddof=1): - return super(Expanding, self).cov(other=other, pairwise=pairwise, ddof=ddof) + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + return super(Expanding, self).cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['corr']) - def corr(self, other=None, pairwise=None): - return super(Expanding, self).corr(other=other, pairwise=pairwise) + def corr(self, other=None, pairwise=None, **kwargs): + return super(Expanding, self).corr(other=other, pairwise=pairwise, **kwargs) + +_bias_template = """ + +Parameters +---------- +bias : boolean, default False + Use a standard estimation bias correction +""" + +_pairwise_template = """ + +Parameters +---------- +other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output +pairwise : bool, default None + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. +bias : boolean, default False + Use a standard estimation bias correction +""" class EWM(_Rolling): """ @@ -1012,8 +1049,8 @@ def _constructor(self): return EWM @Substitution(name='ewm') - @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): return super(EWM, self).aggregate(arg, *args, **kwargs) @@ -1062,33 +1099,23 @@ def func(arg): @Substitution(name='ewm') @Appender(_doc_template) - def mean(self): + def mean(self, **kwargs): """exponential weighted moving average""" - return self._apply('ewma') + return self._apply('ewma', **kwargs) @Substitution(name='ewm') @Appender(_doc_template) - def std(self, bias=False): - """exponential weighted moving stddev - - Parameters - ---------- - bias : boolean, default False - Use a standard estimation bias correction - """ - return _zsqrt(self.var(bias=bias)) + @Appender(_bias_template) + def std(self, bias=False, **kwargs): + """exponential weighted moving stddev""" + return _zsqrt(self.var(bias=bias, **kwargs)) vol=std @Substitution(name='ewm') @Appender(_doc_template) - def var(self, bias=False): - """exponential weighted moving average - - Parameters - ---------- - bias : boolean, default False - Use a standard estimation bias correction - """ + @Appender(_bias_template) + def var(self, bias=False, **kwargs): + """exponential weighted moving variance""" def f(arg): return algos.ewmcov(arg, arg, @@ -1098,26 +1125,13 @@ def f(arg): int(self.min_periods), int(bias)) - return self._apply(f) + return self._apply(f, **kwargs) @Substitution(name='ewm') @Appender(_doc_template) - def cov(self, other=None, pairwise=None, bias=False): - """exponential weighted sample covariance - - Parameters - ---------- - other : Series, DataFrame, or ndarray, optional - if not supplied then will default to self and produce pairwise output - pairwise : bool, default None - If False then only matching columns between self and other will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. - bias : boolean, default False - Use a standard estimation bias correction - """ + @Appender(_pairwise_template) + def cov(self, other=None, pairwise=None, bias=False, **kwargs): + """exponential weighted sample covariance""" if other is None: other = self._selected_obj pairwise = True if pairwise is None else pairwise # only default unset @@ -1139,20 +1153,9 @@ def _get_cov(X, Y): @Substitution(name='ewm') @Appender(_doc_template) - def corr(self, other=None, pairwise=None): - """exponential weighted sample correlation - - Parameters - ---------- - other : Series, DataFrame, or ndarray, optional - if not supplied then will default to self and produce pairwise output - pairwise : bool, default None - If False then only matching columns between self and other will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. - """ + @Appender(_pairwise_template) + def corr(self, other=None, pairwise=None, **kwargs): + """exponential weighted sample correlation""" if other is None: other = self._selected_obj pairwise = True if pairwise is None else pairwise # only default unset diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index b81b3a87ab5df..4d7f9292705ad 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -130,17 +130,17 @@ def compare(result, expected): result = r.aggregate({'A': ['mean','std']}) expected = pd.concat([a_mean,a_std],axis=1) - expected.columns = pd.MultiIndex.from_product([['A'],['mean','std']]) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','std')]) assert_frame_equal(result, expected) result = r['A'].aggregate(['mean','sum']) expected = pd.concat([a_mean,a_sum],axis=1) - expected.columns = pd.MultiIndex.from_product([['A'],['mean','sum']]) + expected.columns = ['mean','sum'] assert_frame_equal(result, expected) result = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' } }) expected = pd.concat([a_mean,a_sum],axis=1) - expected.columns = pd.MultiIndex.from_product([['A'],['mean','sum']]) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','sum')]) compare(result, expected) result = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' }, @@ -150,6 +150,13 @@ def compare(result, expected): ('B','mean2'),('B','sum2')]) compare(result, expected) + result = r.aggregate({'A': ['mean','std'], + 'B': ['mean','std']}) + expected = pd.concat([a_mean,a_std,b_mean,b_std],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','std'), + ('B','mean'),('B','std')]) + compare(result, expected) + result = r.aggregate({'r1' : { 'A' : ['mean','sum'] }, 'r2' : { 'B' : ['mean','sum'] }}) expected = pd.concat([a_mean,a_sum,b_mean,b_sum],axis=1) @@ -172,10 +179,28 @@ def compare(result, expected): expected = pd.concat([a_sum,rcustom],axis=1) compare(result, expected) + def test_agg_consistency(self): + + df = DataFrame({'A' : range(5), + 'B' : range(0,10,2)}) + r = df.rolling(window=3) + + result = r.agg([np.sum, np.mean]).columns + expected = pd.MultiIndex.from_product([list('AB'),['sum','mean']]) + tm.assert_index_equal(result, expected) + + result = r['A'].agg([np.sum, np.mean]).columns + expected = pd.Index(['sum','mean']) + tm.assert_index_equal(result, expected) + + result = r.agg({'A' : [np.sum, np.mean]}).columns + expected = pd.MultiIndex.from_tuples([('A','sum'),('A','mean')]) + tm.assert_index_equal(result, expected) + def test_window_with_args(self): + tm._skip_if_no_scipy() # make sure that we are aggregating window functions correctly with arg - r = Series(np.random.randn(100)).rolling(window=10,min_periods=1,win_type='gaussian') expected = pd.concat([r.mean(std=10),r.mean(std=.01)],axis=1) expected.columns = ['',''] @@ -200,6 +225,31 @@ def test_preserve_metadata(self): self.assertEqual(s2.name, 'foo') self.assertEqual(s3.name, 'foo') + def test_how_compat(self): + # in prior versions, we would allow how to be used in the resample + # now that its deprecated, we need to handle this in the actual + # aggregation functions + s = pd.Series(np.random.randn(20), index=pd.date_range('1/1/2000', periods=20, freq='12H')) + + for how in ['min','max','median']: + for op in ['mean','sum','std','var','kurt','skew']: + for t in ['rolling','expanding']: + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + + dfunc = getattr(pd,"{0}_{1}".format(t,op)) + if dfunc is None: + continue + + if t == 'rolling': + kwargs = {'window' : 5} + else: + kwargs = {} + result = dfunc(s, freq='D', how=how, **kwargs) + + expected = getattr(getattr(s,t)(freq='D', **kwargs),op)(how=how) + assert_series_equal(result, expected) + class TestDeprecations(Base): """ test that we are catching deprecation warnings """ @@ -495,11 +545,12 @@ def test_cmov_window_special_linear_range(self): assert_series_equal(xp, rs) def test_rolling_median(self): - self._check_moment_func(mom.rolling_median, np.median, name='median') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_moment_func(mom.rolling_median, np.median, name='median') def test_rolling_min(self): - self._check_moment_func(mom.rolling_min, np.min, name='min') - + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_moment_func(mom.rolling_min, np.min, name='min') with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): a = np.array([1, 2, 3, 4, 5]) @@ -510,7 +561,8 @@ def test_rolling_min(self): np.array([1,2, 3]), window=3, min_periods=5) def test_rolling_max(self): - self._check_moment_func(mom.rolling_max, np.max, name='max') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_moment_func(mom.rolling_max, np.max, name='max') with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): a = np.array([1, 2, 3, 4, 5]) @@ -2177,7 +2229,7 @@ def test_rolling_max_how_resample(self): for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): x = series.rolling(window=1, freq='D').max(how='mean') - assert_series_equal(expected, x) + assert_series_equal(expected, x) def test_rolling_min_how_resample(self): @@ -2198,7 +2250,7 @@ def test_rolling_min_how_resample(self): for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): r = series.rolling(window=1, freq='D') - assert_series_equal(expected, r.min()) + assert_series_equal(expected, r.min()) def test_rolling_median_how_resample(self): @@ -2218,7 +2270,7 @@ def test_rolling_median_how_resample(self): for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): x = series.rolling(window=1, freq='D').median() - assert_series_equal(expected, x) + assert_series_equal(expected, x) def test_rolling_median_memory_error(self): # GH11722