Skip to content

Commit 77607dd

Browse files
code sample for pandas-dev#43329
1 parent 968e9a5 commit 77607dd

File tree

1 file changed

+33
-0
lines changed

1 file changed

+33
-0
lines changed

bisect/43329.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# BUG: resampling DataFrame with DateTimeIndex with holes and uint64 columns leads to error on pandas==1.3.2 (not in 1.1.0) #43329
2+
3+
import numpy as np
4+
import pandas as pd
5+
6+
print(pd.__version__)
7+
8+
# Data generation: DataFrame with DateTimeIndex, one row per hour, values are 0 or 1.
9+
df = pd.DataFrame(
10+
index=pd.date_range(start="2000-01-01", end="2000-01-03 23", freq="H"),
11+
columns=["x"],
12+
data=[0, 1, 0] * 24,
13+
)
14+
15+
# Removing some rows in order to have a hole in the dataset
16+
df = df.loc[(df.index < "2000-01-02") | (df.index > "2000-01-03"), :]
17+
18+
# Create dummy indicator
19+
one_hot = pd.get_dummies(
20+
df["x"]
21+
) # This line leads to having "RuntimeError: empty group with uint64_t"
22+
# one_hot = pd.get_dummies(df["x"], dtype=int) # This line leads to having expected dataframe
23+
# Keeping, for each day, the maximum day value.
24+
df_output = one_hot.resample("D").max()
25+
print(df_output)
26+
27+
# Expected_dataframe:
28+
df_expected = pd.DataFrame(
29+
index=pd.date_range(start="2000-01-01", end="2000-01-03", freq="D"),
30+
data={col: [1, np.nan, 1] for col in [0, 1]},
31+
)
32+
33+
pd.testing.assert_frame_equal(df_expected, df_output)

0 commit comments

Comments
 (0)