Skip to content

ENH: Add SemiMonthEnd and SemiMonthBegin offsets #1543 #13315

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1155,3 +1155,63 @@ def setup(self):

def time_timeseries_year_incr(self):
(self.date + self.year)


class timeseries_semi_month_offset(object):
goal_time = 0.2

def setup(self):
self.N = 100000
self.rng = date_range(start='1/1/2000', periods=self.N, freq='T')
# date is not on an offset which will be slowest case
self.date = dt.datetime(2011, 1, 2)
self.semi_month_end = pd.offsets.SemiMonthEnd()
self.semi_month_begin = pd.offsets.SemiMonthBegin()

def time_semi_month_end_apply(self):
self.semi_month_end.apply(self.date)

def time_semi_month_end_incr(self):
self.date + self.semi_month_end

def time_semi_month_end_incr_n(self):
self.date + 10 * self.semi_month_end

def time_semi_month_end_decr(self):
self.date - self.semi_month_end

def time_semi_month_end_decr_n(self):
self.date - 10 * self.semi_month_end

def time_semi_month_end_apply_index(self):
self.semi_month_end.apply_index(self.rng)

def time_semi_month_end_incr_rng(self):
self.rng + self.semi_month_end

def time_semi_month_end_decr_rng(self):
self.rng - self.semi_month_end

def time_semi_month_begin_apply(self):
self.semi_month_begin.apply(self.date)

def time_semi_month_begin_incr(self):
self.date + self.semi_month_begin

def time_semi_month_begin_incr_n(self):
self.date + 10 * self.semi_month_begin

def time_semi_month_begin_decr(self):
self.date - self.semi_month_begin

def time_semi_month_begin_decr_n(self):
self.date - 10 * self.semi_month_begin

def time_semi_month_begin_apply_index(self):
self.semi_month_begin.apply_index(self.rng)

def time_semi_month_begin_incr_rng(self):
self.rng + self.semi_month_begin

def time_semi_month_begin_decr_rng(self):
self.rng - self.semi_month_begin
4 changes: 4 additions & 0 deletions doc/source/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,8 @@ frequency increment. Specific offset logic like "month", "business day", or
BMonthBegin, "business month begin"
CBMonthEnd, "custom business month end"
CBMonthBegin, "custom business month begin"
SemiMonthEnd, "15th (or other day_of_month) and calendar month end"
SemiMonthBegin, "15th (or other day_of_month) and calendar month begin"
QuarterEnd, "calendar quarter end"
QuarterBegin, "calendar quarter begin"
BQuarterEnd, "business quarter end"
Expand Down Expand Up @@ -967,9 +969,11 @@ frequencies. We will refer to these aliases as *offset aliases*
"D", "calendar day frequency"
"W", "weekly frequency"
"M", "month end frequency"
"SM", "semi-month end frequency (15th and end of month)"
"BM", "business month end frequency"
"CBM", "custom business month end frequency"
"MS", "month start frequency"
"SMS", "semi-month start frequency (1st and 15th)"
"BMS", "business month start frequency"
"CBMS", "custom business month start frequency"
"Q", "quarter end frequency"
Expand Down
37 changes: 37 additions & 0 deletions doc/source/whatsnew/v0.18.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,43 @@ New behaviour:

In [2]: pd.read_csv(StringIO(data), names=names)

.. _whatsnew_0182.enhancements.semi_month_offsets:

Semi-Month Offsets
^^^^^^^^^^^^^^^^^^

Pandas has gained new frequency offsets, ``SemiMonthEnd`` ('SM') and ``SemiMonthBegin`` ('SMS').
These provide date offsets anchored (by default) to the 15th and end of month, and 15th and 1st of month respectively.
(:issue:`1543`)

.. ipython:: python

from pandas.tseries.offsets import SemiMonthEnd, SemiMonthBegin

SemiMonthEnd:

.. ipython:: python

Timestamp('2016-01-01') + SemiMonthEnd()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe shorten the examples and put 2 (or 3) for each sub-section.

pd.date_range('2015-01-01', freq='SM', periods=4)

SemiMonthBegin:

.. ipython:: python

Timestamp('2016-01-01') + SemiMonthBegin()

pd.date_range('2015-01-01', freq='SMS', periods=4)

Using the anchoring suffix, you can also specify the day of month to use instead of the 15th.

.. ipython:: python

pd.date_range('2015-01-01', freq='SMS-16', periods=4)

pd.date_range('2015-01-01', freq='SM-14', periods=4)

.. _whatsnew_0182.enhancements.other:

Other enhancements
Expand Down
214 changes: 213 additions & 1 deletion pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import numpy as np

from pandas.tseries.tools import to_datetime, normalize_date
from pandas.core.common import ABCSeries, ABCDatetimeIndex, ABCPeriod
from pandas.core.common import (ABCSeries, ABCDatetimeIndex, ABCPeriod,
AbstractMethodError)

# import after tools, dateutil check
from dateutil.relativedelta import relativedelta, weekday
Expand All @@ -18,6 +19,7 @@
__all__ = ['Day', 'BusinessDay', 'BDay', 'CustomBusinessDay', 'CDay',
'CBMonthEnd', 'CBMonthBegin',
'MonthBegin', 'BMonthBegin', 'MonthEnd', 'BMonthEnd',
'SemiMonthEnd', 'SemiMonthBegin',
'BusinessHour', 'CustomBusinessHour',
'YearBegin', 'BYearBegin', 'YearEnd', 'BYearEnd',
'QuarterBegin', 'BQuarterBegin', 'QuarterEnd', 'BQuarterEnd',
Expand Down Expand Up @@ -1160,6 +1162,214 @@ def onOffset(self, dt):
_prefix = 'MS'


class SemiMonthOffset(DateOffset):
_adjust_dst = True
_default_day_of_month = 15
_min_day_of_month = 2

def __init__(self, n=1, day_of_month=None, normalize=False, **kwds):
if day_of_month is None:
self.day_of_month = self._default_day_of_month
else:
self.day_of_month = int(day_of_month)
if not self._min_day_of_month <= self.day_of_month <= 27:
raise ValueError('day_of_month must be '
'{}<=day_of_month<=27, got {}'.format(
self._min_day_of_month, self.day_of_month))
self.n = int(n)
self.normalize = normalize
self.kwds = kwds
self.kwds['day_of_month'] = self.day_of_month

@classmethod
def _from_name(cls, suffix=None):
return cls(day_of_month=suffix)

@property
def rule_code(self):
suffix = '-{}'.format(self.day_of_month)
return self._prefix + suffix

@apply_wraps
def apply(self, other):
n = self.n
if not self.onOffset(other):
_, days_in_month = tslib.monthrange(other.year, other.month)
if 1 < other.day < self.day_of_month:
other += relativedelta(day=self.day_of_month)
if n > 0:
# rollforward so subtract 1
n -= 1
elif self.day_of_month < other.day < days_in_month:
other += relativedelta(day=self.day_of_month)
if n < 0:
# rollforward in the negative direction so add 1
n += 1
elif n == 0:
n = 1

return self._apply(n, other)

def _apply(self, n, other):
"""Handle specific apply logic for child classes"""
raise AbstractMethodError(self)

@apply_index_wraps
def apply_index(self, i):
# determine how many days away from the 1st of the month we are
days_from_start = i.to_perioddelta('M').asi8
delta = Timedelta(days=self.day_of_month - 1).value

# get boolean array for each element before the day_of_month
before_day_of_month = days_from_start < delta

# get boolean array for each element after the day_of_month
after_day_of_month = days_from_start > delta

# determine the correct n for each date in i
roll = self._get_roll(i, before_day_of_month, after_day_of_month)

# isolate the time since it will be striped away one the next line
time = i.to_perioddelta('D')

# apply the correct number of months
i = (i.to_period('M') + (roll // 2)).to_timestamp()

# apply the correct day
i = self._apply_index_days(i, roll)

return i + time

def _get_roll(self, i, before_day_of_month, after_day_of_month):
"""Return an array with the correct n for each date in i.

The roll array is based on the fact that i gets rolled back to
the first day of the month.
"""
raise AbstractMethodError(self)

def _apply_index_days(self, i, roll):
"""Apply the correct day for each date in i"""
raise AbstractMethodError(self)


class SemiMonthEnd(SemiMonthOffset):
"""
Two DateOffset's per month repeating on the last
day of the month and day_of_month.

.. versionadded:: 0.18.2

Parameters
----------
n: int
normalize : bool, default False
day_of_month: int, {1, 3,...,27}, default 15
"""
_prefix = 'SM'
_min_day_of_month = 1

def onOffset(self, dt):
if self.normalize and not _is_normalized(dt):
return False
_, days_in_month = tslib.monthrange(dt.year, dt.month)
return dt.day in (self.day_of_month, days_in_month)

def _apply(self, n, other):
# if other.day is not day_of_month move to day_of_month and update n
if other.day < self.day_of_month:
other += relativedelta(day=self.day_of_month)
if n > 0:
n -= 1
elif other.day > self.day_of_month:
other += relativedelta(day=self.day_of_month)
if n == 0:
n = 1
else:
n += 1

months = n // 2
day = 31 if n % 2 else self.day_of_month
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you have a tests that goes thru all of the months in a single year and verifies correctness? what about leap year for feb?

return other + relativedelta(months=months, day=day)

def _get_roll(self, i, before_day_of_month, after_day_of_month):
n = self.n
is_month_end = i.is_month_end
if n > 0:
roll_end = np.where(is_month_end, 1, 0)
roll_before = np.where(before_day_of_month, n, n + 1)
roll = roll_end + roll_before
elif n == 0:
roll_after = np.where(after_day_of_month, 2, 0)
roll_before = np.where(~after_day_of_month, 1, 0)
roll = roll_before + roll_after
else:
roll = np.where(after_day_of_month, n + 2, n + 1)
return roll

def _apply_index_days(self, i, roll):
i += (roll % 2) * Timedelta(days=self.day_of_month).value
return i + Timedelta(days=-1)


class SemiMonthBegin(SemiMonthOffset):
"""
Two DateOffset's per month repeating on the first
day of the month and day_of_month.

.. versionadded:: 0.18.2

Parameters
----------
n: int
normalize : bool, default False
day_of_month: int, {2, 3,...,27}, default 15
"""
_prefix = 'SMS'

def onOffset(self, dt):
if self.normalize and not _is_normalized(dt):
return False
return dt.day in (1, self.day_of_month)

def _apply(self, n, other):
# if other.day is not day_of_month move to day_of_month and update n
if other.day < self.day_of_month:
other += relativedelta(day=self.day_of_month)
if n == 0:
n = -1
else:
n -= 1
elif other.day > self.day_of_month:
other += relativedelta(day=self.day_of_month)
if n == 0:
n = 1
elif n < 0:
n += 1

months = n // 2 + n % 2
day = 1 if n % 2 else self.day_of_month
return other + relativedelta(months=months, day=day)

def _get_roll(self, i, before_day_of_month, after_day_of_month):
n = self.n
is_month_start = i.is_month_start
if n > 0:
roll = np.where(before_day_of_month, n, n + 1)
elif n == 0:
roll_start = np.where(is_month_start, 0, 1)
roll_after = np.where(after_day_of_month, 1, 0)
roll = roll_start + roll_after
else:
roll_after = np.where(after_day_of_month, n + 2, n + 1)
roll_start = np.where(is_month_start, -1, 0)
roll = roll_after + roll_start
return roll

def _apply_index_days(self, i, roll):
return i + (roll % 2) * Timedelta(days=self.day_of_month - 1).value


class BusinessMonthEnd(MonthOffset):
"""DateOffset increments between business EOM dates"""

Expand Down Expand Up @@ -2720,6 +2930,8 @@ def generate_range(start=None, end=None, periods=None,
CustomBusinessHour, # 'CBH'
MonthEnd, # 'M'
MonthBegin, # 'MS'
SemiMonthEnd, # 'SM'
SemiMonthBegin, # 'SMS'
Week, # 'W'
Second, # 'S'
Minute, # 'T'
Expand Down
Loading