pydata · shoyer · Jul 27, 2015 · Jul 20, 2015 · Jul 23, 2015 · Jul 23, 2015
diff --git a/doc/api.rst b/doc/api.rst
@@ -93,6 +93,7 @@ Indexing
    Dataset.loc
    Dataset.isel
    Dataset.sel
+   Dataset.isel_points
    Dataset.squeeze
    Dataset.reindex
    Dataset.reindex_like
@@ -202,6 +203,7 @@ Indexing
    DataArray.loc
    DataArray.isel
    DataArray.sel
+   DataArray.isel_points
    DataArray.squeeze
    DataArray.reindex
    DataArray.reindex_like

diff --git a/doc/indexing.rst b/doc/indexing.rst
@@ -36,6 +36,8 @@ below and summarized in this table:
 | By name          | By label     | ``arr.sel(space='IA')`` or |br| | ``ds.sel(space='IA')`` or |br| |
 |                  |              | ``arr.loc[dict(space='IA')]``   | ``ds.loc[dict(space='IA')]``   |
 +------------------+--------------+---------------------------------+--------------------------------+
+| By name          | By integers  | ``arr.isel_points(x=[0, 1])``   | ``ds.isel_points(x=[0, 1])``   |
++------------------+--------------+---------------------------------+--------------------------------+
 
 Positional indexing
 -------------------
@@ -57,6 +59,7 @@ DataArray:
 
     Positional indexing deviates from the NumPy when indexing with multiple
     arrays like ``arr[[0, 1], [0, 1]]``, as described in :ref:`indexing details`.
+    Use :py:meth:`~xray.Dataset.isel_points` to achieve this functionality.
 
 xray also supports label-based indexing, just like pandas. Because
 we use a :py:class:`pandas.Index` under the hood, label based indexing is very
@@ -108,6 +111,13 @@ use them explicitly to slice data. There are two ways to do this:
         # index by dimension coordinate labels
         arr.sel(time=slice('2000-01-01', '2000-01-02'))
 
+3. Use the :py:meth:`~xray.DataArray.isel_points` method:
+
+    .. ipython:: python
+
+        # index by integer array indices
+        arr.isel_points(space=[0, 1], dim='points')
+
 The arguments to these methods can be any objects that could index the array
 along the dimension given by the keyword, e.g., labels for an individual value,
 Python :py:func:`slice` objects or 1-dimensional arrays.
@@ -122,7 +132,7 @@ __ http://legacy.python.org/dev/peps/pep-0472/
 
 .. warning::
 
-    Do not try to assign values when using ``isel`` or ``sel``::
+    Do not try to assign values when using ``isel``, ``isel_points`` or ``sel``::
 
         # DO NOT do this
         arr.isel(space=0) = 0
@@ -145,6 +155,7 @@ simultaneously, returning a new dataset:
     ds = arr.to_dataset()
     ds.isel(space=[0], time=[0])
     ds.sel(time='2000-01-01')
+    ds.isel_points(space=[0, 1], dim='points')
 
 Positional indexing on a dataset is not supported because the ordering of
 dimensions in a dataset is somewhat ambiguous (it can vary between different

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -14,6 +14,7 @@ v0.5.3 (unreleased)
 
 - Dataset variables are now written to netCDF files in order of appearance
   when using the netcdf4 backend (:issue:`479`).
+- Added :py:meth:`~xray.Dataset.isel_points` and :py:meth:`~xray.DataArray.isel_points` to support pointwise indexing of Datasets and DataArrays (:issue:`475`).
 
 v0.5.2 (16 July 2015)
 ---------------------

diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py
@@ -551,6 +551,18 @@ def sel(self, method=None, **indexers):
         return self.isel(**indexing.remap_label_indexers(self, indexers,
                                                          method=method))
 
+    def isel_points(self, dim='points', **indexers):
+        """Return a new DataArray whose dataset is given by pointwise integer
+        indexing along the specified dimension(s).
+
+        See Also
+        --------
+        Dataset.isel_points
+        DataArray.sel_points
+        """
+        ds = self._dataset.isel_points(dim=dim, **indexers)
+        return self._with_replaced_dataset(ds)
+
     def reindex_like(self, other, method=None, copy=True):
         """Conform this object onto the indexes of another object, filling
         in missing values with NaN.

diff --git a/xray/core/dataset.py b/xray/core/dataset.py
@@ -1,6 +1,6 @@
 import functools
 import warnings
-from collections import Mapping
+from collections import Mapping, Sequence
 from numbers import Number
 
 import numpy as np
@@ -21,6 +21,7 @@
 from .variable import as_variable, Variable, Coordinate, broadcast_variables
 from .pycompat import (iteritems, itervalues, basestring, OrderedDict,
                        dask_array_type)
+from .combine import concat
 
 
 # list of attributes of pd.DatetimeIndex that are ndarrays of time info
@@ -1028,6 +1029,58 @@ def sel(self, method=None, **indexers):
         return self.isel(**indexing.remap_label_indexers(self, indexers,
                                                          method=method))
 
+    def isel_points(self, dim='points', **indexers):
+        """Returns a new dataset with each array indexed pointwise along the
+        specified dimension(s).
+
+        This method selects pointwise values from each array and is akin to
+        the NumPy indexing behavior of `arr[[0, 1], [0, 1]]`, except this
+        method does not require knowing the order of each array's dimensions.
+
+        Parameters
+        ----------
+        dim : str, optional
+            Dimension name for which the points will be added to.
+        **indexers : {dim: indexer, ...}
+            Keyword arguments with names matching dimensions and values given
+            by integers, slice objects or arrays. All indexers must be the same
+            length.
+
+        Returns
+        -------
+        obj : Dataset
+            A new Dataset with the same contents as this dataset, except each
+            array and dimension is indexed by the appropriate indexers. In
+            general, each array's data will be a view of the array's data
+            in this dataset, unless numpy fancy indexing was triggered by using
+            an array indexer, in which case the data will be a copy.
+
+        See Also
+        --------
+        Dataset.sel
+        DataArray.isel
+        DataArray.sel
+        DataArray.isel_points
+        """
+        invalid = [k for k in indexers if k not in self.dims]
+        if invalid:
+            raise ValueError("dimensions %r do not exist" % invalid)
+
+        # all the indexers should be iterables
+        keys = indexers.keys()
+        indexers = [(k, ([v] if not isinstance(v, Sequence) else v))
+                    for k, v in iteritems(indexers)]
+
+        # all the indexers should have the same length
+        lengths = set([len(v) for k, v in indexers])
+        if len(lengths) > 1:
+            raise ValueError('All indexers must be the same length')
+
+        return concat([self.isel(**d) for d in
+                       [dict(zip(keys, inds)) for inds in
+                        zip(*[v for k, v in indexers])]],
+                      dim=dim)
+
     def reindex_like(self, other, method=None, copy=True):
         """Conform this object onto the indexes of another object, filling
         in missing values with NaN.

diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py
@@ -382,6 +382,64 @@ def test_sel_method(self):
         actual = data.sel(x=[0.9, 1.9], method='backfill')
         self.assertDataArrayIdentical(expected, actual)
 
+    def test_isel_points_method(self):
+        shape = (10, 5, 6)
+        np_array = np.random.random(shape)
+        da = DataArray(np_array, dims=['time', 'y', 'x'])
+        y = [1, 3]
+        x = [3, 0]
+
+        expected = da.values[:, y, x]
+
+        actual = da.isel_points(y=y, x=x, dim='test_coord')
+        assert 'test_coord' in actual.coords
+        assert actual.coords['test_coord'].shape == (len(y), )
+
+        actual = da.isel_points(y=y, x=x)
+        assert 'points' in actual.coords
+        # not sure why actual needs to be transposed
+        np.testing.assert_equal(actual.T, expected)
+
+        # test scalars (should match isel but will have points dim)
+        y = 1
+        x = 3
+        expected = da.values[:, y, x]
+
+        actual = da.isel_points(y=y, x=x)
+        # squeeze to drop "points" dim
+        assert 'points' in actual.coords
+        np.testing.assert_allclose(actual.squeeze().values, expected)
+        self.assertDataArrayIdentical(actual.squeeze().drop(['points']),
+                                      da.isel(y=y, x=x))
+
+        # a few corner cases
+        da.isel_points(time=[1, 2], x=[2, 2], y=[3, 4])
+        np.testing.assert_allclose(
+            da.isel_points(time=1, x=2, y=4).values.squeeze(),
+            np_array[1, 4, 2].squeeze())
+
+        da.isel_points(time=1)
+        da.isel_points(time=[1, 2])
+
+        # test that leaving out a dim is the same as slice(None)
+        self.assertDataArrayIdentical(
+            da.isel_points(time=slice(None), y=y, x=x),
+            da.isel_points(time=np.arange(len(da['time'])), y=y, x=x))
+        self.assertDataArrayIdentical(
+            da.isel_points(time=slice(None), y=y, x=x),
+            da.isel_points(y=y, x=x))
+
+        # test that the order of the indexers doesn't matter
+        self.assertDataArrayIdentical(
+            da.isel_points(y=y, x=x),
+            da.isel_points(x=x, y=y))
+
+        # make sure we're raising errors in the right places
+        with self.assertRaises(ValueError):
+            da.isel_points(y=[1, 2], x=[1, 2, 3])
+        with self.assertRaises(ValueError):
+            da.isel_points(bad_key=[1, 2])
+
     def test_loc(self):
         self.ds['x'] = ('x', np.array(list('abcdefghij')))
         da = self.ds['foo']

diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py
@@ -661,6 +661,47 @@ def test_sel(self):
         self.assertDatasetEqual(data.isel(td=slice(1, 3)),
                                 data.sel(td=slice('1 days', '2 days')))
 
+    def test_isel_points(self):
+        data = create_test_data()
+
+        pdim1 = [1, 2, 3]
+        pdim2 = [4, 5, 1]
+        pdim3 = [1, 2, 3]
+
+        actual = data.isel_points(dim1=pdim1, dim2=pdim2, dim3=pdim3,
+                                  dim='test_coord')
+        assert 'test_coord' in actual.coords
+        assert actual.coords['test_coord'].shape == (len(pdim1), )
+
+        actual = data.isel_points(dim1=pdim1, dim2=pdim2)
+        assert 'points' in actual.coords
+        np.testing.assert_array_equal(pdim1, actual['dim1'])
+
+        # # test scalars (should match isel but will have points dim)
+        pdim1 = 1
+        pdim2 = 3
+
+        actual = data.isel_points(dim1=pdim1, dim2=pdim2)
+        # squeeze to drop "points" dim
+        assert 'points' in actual.coords
+        self.assertDatasetEqual(actual.squeeze().drop(['points']),
+                                data.isel(dim1=pdim1, dim2=pdim2))
+
+        # test that leaving out a dim is the same as slice(None)
+        self.assertDatasetIdentical(
+            data.isel_points(time=slice(None), dim1=pdim1, dim2=pdim2),
+            data.isel_points(dim1=pdim1, dim2=pdim2))
+
+        # test that the order of the indexers doesn't matter
+        self.assertDatasetIdentical(data.isel_points(dim1=pdim1, dim2=pdim2),
+                                    data.isel_points(dim2=pdim2, dim1=pdim1))
+
+        # make sure we're raising errors in the right places
+        with self.assertRaises(ValueError):
+            data.isel_points(dim1=[1, 2], dim2=[1, 2, 3])
+        with self.assertRaises(ValueError):
+            data.isel_points(bad_key=[1, 2])
+
     def test_sel_method(self):
         data = create_test_data()