Skip to content

BUG: PeriodIndex.asof_locs #32310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,27 +388,26 @@ def __array_wrap__(self, result, context=None):
# cannot pass _simple_new as it is
return type(self)(result, freq=self.freq, name=self.name)

def asof_locs(self, where, mask):
def asof_locs(self, where, mask: np.ndarray) -> np.ndarray:
"""
where : array of timestamps
mask : array of booleans where data is not NA

"""
where_idx = where
if isinstance(where_idx, DatetimeIndex):
where_idx = PeriodIndex(where_idx.values, freq=self.freq)
elif not isinstance(where_idx, PeriodIndex):
raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")
elif where_idx.freq != self.freq:
raise raise_on_incompatible(self, where_idx)

locs = self._ndarray_values[mask].searchsorted(
where_idx._ndarray_values, side="right"
)
locs = self.asi8[mask].searchsorted(where_idx.asi8, side="right")

locs = np.where(locs > 0, locs - 1, 0)
result = np.arange(len(self))[mask].take(locs)

first = mask.argmax()
result[
(locs == 0) & (where_idx._ndarray_values < self._ndarray_values[first])
] = -1
result[(locs == 0) & (where_idx.asi8 < self.asi8[first])] = -1

return result

Expand Down
6 changes: 3 additions & 3 deletions pandas/plotting/_matplotlib/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,13 @@ def _convert_1d(values, units, axis):
if isinstance(values, valid_types) or is_integer(values) or is_float(values):
return get_datevalue(values, axis.freq)
elif isinstance(values, PeriodIndex):
return values.asfreq(axis.freq)._ndarray_values
return values.asfreq(axis.freq).asi8
elif isinstance(values, Index):
return values.map(lambda x: get_datevalue(x, axis.freq))
elif lib.infer_dtype(values, skipna=False) == "period":
# https://github.com/pandas-dev/pandas/issues/24304
# convert ndarray[period] -> PeriodIndex
return PeriodIndex(values, freq=axis.freq)._ndarray_values
return PeriodIndex(values, freq=axis.freq).asi8
elif isinstance(values, (list, tuple, np.ndarray, Index)):
return [get_datevalue(x, axis.freq) for x in values]
return values
Expand Down Expand Up @@ -607,7 +607,7 @@ def _daily_finder(vmin, vmax, freq):
info = np.zeros(
span, dtype=[("val", np.int64), ("maj", bool), ("min", bool), ("fmt", "|S20")]
)
info["val"][:] = dates_._ndarray_values
info["val"][:] = dates_.asi8
info["fmt"][:] = ""
info["maj"][[0, -1]] = True
# .. and set some shortcuts
Expand Down
22 changes: 21 additions & 1 deletion pandas/tests/frame/methods/test_asof.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
import numpy as np
import pytest

from pandas import DataFrame, Period, Series, Timestamp, date_range, to_datetime
from pandas._libs.tslibs import IncompatibleFrequency

from pandas import (
DataFrame,
Period,
Series,
Timestamp,
date_range,
period_range,
to_datetime,
)
import pandas._testing as tm


Expand Down Expand Up @@ -156,3 +166,13 @@ def test_is_copy(self, date_range_frame):

with tm.assert_produces_warning(None):
result["C"] = 1

def test_asof_periodindex_mismatched_freq(self):
N = 50
rng = period_range("1/1/1990", periods=N, freq="H")
df = DataFrame(np.random.randn(N), index=rng)

# Mismatched freq
msg = "Input has different freq"
with pytest.raises(IncompatibleFrequency, match=msg):
df.asof(rng.asfreq("D"))
24 changes: 24 additions & 0 deletions pandas/tests/indexes/period/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,3 +795,27 @@ def test_period_index_indexer(self):
tm.assert_frame_equal(df, df.loc[list(idx)])
tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]])
tm.assert_frame_equal(df, df.loc[list(idx)])


class TestAsOfLocs:
def test_asof_locs_mismatched_type(self):
dti = pd.date_range("2016-01-01", periods=3)
pi = dti.to_period("D")
pi2 = dti.to_period("H")

mask = np.array([0, 1, 0], dtype=bool)

msg = "must be DatetimeIndex or PeriodIndex"
with pytest.raises(TypeError, match=msg):
pi.asof_locs(pd.Int64Index(pi.asi8), mask)

with pytest.raises(TypeError, match=msg):
pi.asof_locs(pd.Float64Index(pi.asi8), mask)

with pytest.raises(TypeError, match=msg):
# TimedeltaIndex
pi.asof_locs(dti - dti, mask)

msg = "Input has different freq=H"
with pytest.raises(libperiod.IncompatibleFrequency, match=msg):
pi.asof_locs(pi2, mask)
7 changes: 7 additions & 0 deletions pandas/tests/series/methods/test_asof.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._libs.tslibs import IncompatibleFrequency

from pandas import Series, Timestamp, date_range, isna, notna, offsets
import pandas._testing as tm

Expand Down Expand Up @@ -132,6 +134,11 @@ def test_periodindex(self):
d = ts.index[0].to_timestamp() - offsets.BDay()
assert isna(ts.asof(d))

# Mismatched freq
msg = "Input has different freq"
with pytest.raises(IncompatibleFrequency, match=msg):
ts.asof(rng.asfreq("D"))

def test_errors(self):

s = Series(
Expand Down