Skip to content

Commit 279f07b

Browse files
authored
daily to dekadal functions (#527)
* daily to dekadal functions * removing superflous * dekad bins at midnight * groupby_bins takes "bins" that can be bins' size or edges * fixing assert warning * get_attr is your friend * split up functionality to create point array from interval one * generalizing daily_to_dekad * re-add func to swap interval to point dim * another suite of fucntions (and their tests) to do it all * from assert == to np/xr assertions functions * various improvements on names, types, practice, syntax, doc... * missed that one * remove optional reduction application from regroup * removing the "regroup" intervals experiment * correct xr references
1 parent e64763f commit 279f07b

File tree

2 files changed

+319
-104
lines changed

2 files changed

+319
-104
lines changed

enacts/calc.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -841,6 +841,126 @@ def _cess_date(dry_thresh, dry_spell_length_thresh, sm_func, time_coord):
841841

842842

843843
# Time functions
844+
845+
846+
def intervals_to_points(intervals, to_point="mid", keep_attrs=True):
847+
""" Given an xr.DataArray of pd.Interval, return an xr.DataArray of the left,
848+
mid, or right points of those Intervals.
849+
850+
Parameters
851+
----------
852+
intervals : xr.DataArray(pd.Interval)
853+
array of intervals
854+
to_point : str, optional
855+
"left", "mid" or "right" point of `intervals`
856+
default is "mid"
857+
keep_attrs : boolean, optional
858+
keep attributes from `intervals` to point array
859+
default is True
860+
861+
Returns
862+
-------
863+
point_array : xr.DataArray
864+
array of the left, mid or right points of `intervals`
865+
866+
See Also
867+
--------
868+
pandas.Interval
869+
870+
Notes
871+
-----
872+
Should work for any type of array, not just time.
873+
xr.groupby_bins against dim renames the Interval dim_bins,
874+
not sure if xr.groupby does the same,
875+
and what other Xarray functions return Intervals but, depending,
876+
could generalize the returned array name
877+
"""
878+
return xr.DataArray(
879+
data=[getattr(intervals.values[t], to_point) for t in range(intervals.size)],
880+
coords={intervals.name : intervals},
881+
name=( # There might be other automatic cases to cover
882+
intervals.name.replace("_bins", f'_{to_point}')
883+
if intervals.name.endswith("_bins")
884+
else "_".join(intervals.name, f'_{to_point}')
885+
),
886+
attrs=intervals.attrs if keep_attrs else {},
887+
)
888+
return data
889+
890+
891+
def replace_intervals_with_points(
892+
interval_data, interval_dim, to_point="mid", keep_attrs=True
893+
):
894+
""" Replace a coordinate whose values are pd.Interval with one whose values are
895+
the left edge, center (mid), or right edge of those intervals.
896+
897+
Parameters
898+
----------
899+
interval_data : xr.DataArray or xr.Dataset
900+
data depending on a pd.Interval dimension
901+
interval_dim : str
902+
name of pd.Interval dimension to be replaced
903+
to_point : str, optional
904+
"left", "mid" or "right" point of `interval_dim` intervals
905+
default is "mid"
906+
keep_attrs : boolean, optional
907+
keep attributes from `interval_dim` to replacing point-wise dimension
908+
default is True
909+
910+
Returns
911+
-------
912+
point_data : xr.DataArray or xr.Dataset
913+
of which interval dimension has been replaced by point dimension
914+
915+
See Also
916+
--------
917+
pandas.Interval, intervals_to_points, xarray.assign_coords, xarray.swap_dims
918+
"""
919+
point_dim = intervals_to_points(
920+
interval_data[interval_dim], to_point=to_point, keep_attrs=keep_attrs
921+
)
922+
return (
923+
interval_data
924+
.assign_coords({point_dim.name : (interval_dim, point_dim.data)})
925+
.swap_dims({interval_dim: point_dim.name})
926+
)
927+
928+
929+
def groupby_dekads(daily_data, time_dim="T"):
930+
""" Groups `daily_data` by dekads for grouping operations
931+
932+
Parameters
933+
----------
934+
daily_data : xr.DataArray or xr.Dataset
935+
daily data
936+
time_dim : str, optional
937+
name of daily time dimenstion, default is "T"
938+
939+
Returns
940+
-------
941+
grouped : xr.core.groupby.DataArrayGroupBy or xr.core.groupby.DataArrayGroupBy
942+
`daily_data` grouped by dekads
943+
944+
See Also
945+
--------
946+
xarray.groupby_bins, xarray.core.groupby.DataArrayGroupBy,
947+
xarray.core.groupby.DataArrayGroupBy
948+
"""
949+
# dekad edges are located at midnight
950+
dekad_edges = pd.date_range(
951+
start=daily_data[time_dim][0].dt.floor("D").values,
952+
end=(daily_data[time_dim][-1] + np.timedelta64(1, "D")).dt.floor("D").values,
953+
freq="1D",
954+
)
955+
dekad_edges = dekad_edges.where(
956+
(dekad_edges.day == 1) | (dekad_edges.day == 11) | (dekad_edges.day == 21)
957+
).dropna()
958+
assert dekad_edges.size > 1, (
959+
"daily_data must span at least one full dekad (need 2 edges to form 1 bin)"
960+
)
961+
return daily_data.groupby_bins(daily_data[time_dim], dekad_edges, right=False)
962+
963+
844964
def strftimeb2int(strftimeb):
845965
"""Convert month values to integers (1-12) from strings.
846966

0 commit comments

Comments
 (0)