-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
WIP: Feature/interpolate #1640
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: Feature/interpolate #1640
Changes from all commits
1582c1f
ab727e7
95006c4
4a4f6eb
42d63ef
263ec98
19d21b8
f937c07
8717e38
3d5c1b1
1864e8f
f58d464
1b93808
6f83b7b
33df6af
eafe67a
dd9fa8c
88d1569
3fb9261
37882b7
a04e83e
48505a5
20f957d
282bb65
a6fcb7f
2b0d9e1
d3220f3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import pandas as pd | ||
|
||
try: | ||
import dask | ||
except ImportError: | ||
pass | ||
|
||
import xarray as xr | ||
|
||
from . import randn, requires_dask | ||
|
||
|
||
def make_bench_data(shape, frac_nan, chunks): | ||
vals = randn(shape, frac_nan) | ||
coords = {'time': pd.date_range('2000-01-01', freq='D', | ||
periods=shape[0])} | ||
da = xr.DataArray(vals, dims=('time', 'x', 'y'), coords=coords) | ||
|
||
if chunks is not None: | ||
da = da.chunk(chunks) | ||
|
||
return da | ||
|
||
|
||
def time_interpolate_na(shape, chunks, method, limit): | ||
if chunks is not None: | ||
requires_dask() | ||
da = make_bench_data(shape, 0.1, chunks=chunks) | ||
actual = da.interpolate_na(dim='time', method='linear', limit=limit) | ||
|
||
if chunks is not None: | ||
actual = actual.compute() | ||
|
||
|
||
time_interpolate_na.param_names = ['shape', 'chunks', 'method', 'limit'] | ||
time_interpolate_na.params = ([(3650, 200, 400), (100, 25, 25)], | ||
[None, {'x': 25, 'y': 25}], | ||
['linear', 'spline', 'quadratic', 'cubic'], | ||
[None, 3]) | ||
|
||
|
||
def time_ffill(shape, chunks, limit): | ||
|
||
da = make_bench_data(shape, 0.1, chunks=chunks) | ||
actual = da.ffill(dim='time', limit=limit) | ||
|
||
if chunks is not None: | ||
actual = actual.compute() | ||
|
||
|
||
time_ffill.param_names = ['shape', 'chunks', 'limit'] | ||
time_ffill.params = ([(3650, 200, 400), (100, 25, 25)], | ||
[None, {'x': 25, 'y': 25}], | ||
[None, 3]) | ||
|
||
|
||
def time_bfill(shape, chunks, limit): | ||
|
||
da = make_bench_data(shape, 0.1, chunks=chunks) | ||
actual = da.bfill(dim='time', limit=limit) | ||
|
||
if chunks is not None: | ||
actual = actual.compute() | ||
|
||
|
||
time_bfill.param_names = ['shape', 'chunks', 'limit'] | ||
time_bfill.params = ([(3650, 200, 400), (100, 25, 25)], | ||
[None, {'x': 25, 'y': 25}], | ||
[None, 3]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1228,6 +1228,97 @@ def fillna(self, value): | |
out = ops.fillna(self, value) | ||
return out | ||
|
||
def interpolate_na(self, dim=None, method='linear', limit=None, | ||
use_coordinate=True, | ||
**kwargs): | ||
"""Interpolate values according to different methods. | ||
|
||
Parameters | ||
---------- | ||
dim : str | ||
Specifies the dimension along which to interpolate. | ||
method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', | ||
'polynomial', 'barycentric', 'krog', 'pchip', | ||
'spline', 'akima'}, optional | ||
String indicating which method to use for interpolation: | ||
|
||
- 'linear': linear interpolation (Default). Additional keyword | ||
arguments are passed to ``numpy.interp`` | ||
- 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', | ||
'polynomial': are passed to ``scipy.interpolate.interp1d``. If | ||
method=='polynomial', the ``order`` keyword argument must also be | ||
provided. | ||
- 'barycentric', 'krog', 'pchip', 'spline', and `akima`: use their | ||
respective``scipy.interpolate`` classes. | ||
use_coordinate : boolean or str, default True | ||
Specifies which index to use as the x values in the interpolation | ||
formulated as `y = f(x)`. If False, values are treated as if | ||
eqaully-spaced along `dim`. If True, the IndexVariable `dim` is | ||
used. If use_coordinate is a string, it specifies the name of a | ||
coordinate variariable to use as the index. | ||
limit : int, default None | ||
Maximum number of consecutive NaNs to fill. Must be greater than 0 | ||
or None for no limit. | ||
|
||
Returns | ||
------- | ||
DataArray | ||
|
||
See also | ||
-------- | ||
numpy.interp | ||
scipy.interpolate | ||
""" | ||
from .missing import interp_na | ||
return interp_na(self, dim=dim, method=method, limit=limit, | ||
use_coordinate=use_coordinate, **kwargs) | ||
|
||
def ffill(self, dim, limit=None): | ||
'''Fill NaN values by propogating values forward | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need to change now, but FYI PEP8 is |
||
|
||
*Requires bottleneck.* | ||
|
||
Parameters | ||
---------- | ||
dim : str | ||
Specifies the dimension along which to propagate values when | ||
filling. | ||
limit : int, default None | ||
The maximum number of consecutive NaN values to forward fill. In | ||
other words, if there is a gap with more than this number of | ||
consecutive NaNs, it will only be partially filled. Must be greater | ||
than 0 or None for no limit. | ||
|
||
Returns | ||
------- | ||
DataArray | ||
''' | ||
from .missing import ffill | ||
return ffill(self, dim, limit=limit) | ||
|
||
def bfill(self, dim, limit=None): | ||
'''Fill NaN values by propogating values backward | ||
|
||
*Requires bottleneck.* | ||
|
||
Parameters | ||
---------- | ||
dim : str | ||
Specifies the dimension along which to propagate values when | ||
filling. | ||
limit : int, default None | ||
The maximum number of consecutive NaN values to backward fill. In | ||
other words, if there is a gap with more than this number of | ||
consecutive NaNs, it will only be partially filled. Must be greater | ||
than 0 or None for no limit. | ||
|
||
Returns | ||
------- | ||
DataArray | ||
''' | ||
from .missing import bfill | ||
return bfill(self, dim, limit=limit) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need bottleneck installed to use |
||
|
||
def combine_first(self, other): | ||
"""Combine two DataArray objects, with union of coordinates. | ||
|
||
|
@@ -1935,10 +2026,10 @@ def sortby(self, variables, ascending=True): | |
sorted: DataArray | ||
A new dataarray where all the specified dims are sorted by dim | ||
labels. | ||
|
||
Examples | ||
-------- | ||
|
||
>>> da = xr.DataArray(np.random.rand(5), | ||
... coords=[pd.date_range('1/1/2000', periods=5)], | ||
... dims='time') | ||
|
@@ -1952,7 +2043,7 @@ def sortby(self, variables, ascending=True): | |
<xarray.DataArray (time: 5)> | ||
array([ 0.26532 , 0.270962, 0.552878, 0.615637, 0.965471]) | ||
Coordinates: | ||
* time (time) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 ... | ||
* time (time) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 ... | ||
""" | ||
ds = self._to_temp_dataset().sortby(variables, ascending=ascending) | ||
return self._from_temp_dataset(ds) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably too late to be helpful - but are we sure about the name here? We don't generally add
_na
onto methods (bfill_na
?), and pandas isinterpolate
onlyThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See comment from @shoyer above: #1640 (comment)