Skip to content

Commit 411e97d

Browse files
authored
build: Use Pandas 2.0 forward compatible API (#582)
1 parent 71d418c commit 411e97d

File tree

6 files changed

+47
-20
lines changed

6 files changed

+47
-20
lines changed

python/tests/integration/arcticdb/version_store/test_update_with_date_range.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,14 @@ def __init__(self, wrapped: pd.DataFrame, *, with_timezone_attr: bool, timezone_
5353

5454
def __getitem__(self, item):
5555
if isinstance(item, slice):
56-
open_ended = slice(item.start + timedelta(microseconds=1), item.stop - timedelta(microseconds=1), item.step)
56+
# Comparing datetimes with timezone to datetimes without timezone has been deprecated in Pandas 1.2.0
57+
# (see https://github.com/pandas-dev/pandas/pull/36148/) and is not support anymore in Pandas 2.0
58+
# (see https://github.com/pandas-dev/pandas/pull/49492/).
59+
# We explicitly remove the timezone from the start and stop of the slice to be able to use the
60+
# index of the wrapped DataFrame.
61+
start_wo_tz = item.start.replace(tzinfo=None) + timedelta(microseconds=1)
62+
stop_wo_tz = item.stop.replace(tzinfo=None) - timedelta(microseconds=1)
63+
open_ended = slice(start_wo_tz, stop_wo_tz, item.step)
5764
return CustomTimeseries(
5865
self.wrapped[open_ended],
5966
with_timezone_attr=self.with_timezone_attr,

python/tests/unit/arcticdb/test_column_stats.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@ def generate_symbol(lib, sym):
1919
lib.write(sym, df0)
2020
lib.append(sym, df1)
2121
expected_column_stats = lib.read_index(sym)
22-
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
22+
expected_column_stats.drop(
23+
expected_column_stats.columns.difference(["start_index", "end_index"]),
24+
axis=1,
25+
inplace=True,
26+
)
2327
expected_column_stats = expected_column_stats.iloc[[0, 1]]
2428
expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min()]
2529
expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max()]
@@ -41,7 +45,7 @@ def test_column_stats_basic_flow(lmdb_version_store_tiny_segment):
4145
expected_column_stats = generate_symbol(lib, sym)
4246
expected_column_stats.drop(
4347
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
44-
1,
48+
axis=1,
4549
inplace=True,
4650
)
4751

@@ -74,7 +78,11 @@ def test_column_stats_infinity(lmdb_version_store_tiny_segment):
7478
lib.append(sym, df1)
7579
lib.append(sym, df2)
7680
expected_column_stats = lib.read_index(sym)
77-
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
81+
expected_column_stats.drop(
82+
expected_column_stats.columns.difference(["start_index", "end_index"]),
83+
axis=1,
84+
inplace=True,
85+
)
7886
expected_column_stats = expected_column_stats.iloc[[0, 1, 2]]
7987
expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min(), df2["col_1"].min()]
8088
expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max(), df2["col_1"].max()]
@@ -94,7 +102,7 @@ def test_column_stats_as_of(lmdb_version_store_tiny_segment):
94102
expected_column_stats = expected_column_stats.iloc[[0]]
95103
expected_column_stats.drop(
96104
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
97-
1,
105+
axis=1,
98106
inplace=True,
99107
)
100108

@@ -150,7 +158,7 @@ def test_column_stats_multiple_indexes_different_columns(lmdb_version_store_tiny
150158

151159
expected_column_stats.drop(
152160
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
153-
1,
161+
axis=1,
154162
inplace=True,
155163
)
156164
column_stats = lib.read_column_stats(sym)
@@ -251,7 +259,7 @@ def test_column_stats_multiple_creates(lmdb_version_store_tiny_segment):
251259
expected_column_stats = base_expected_column_stats.copy()
252260
expected_column_stats.drop(
253261
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
254-
1,
262+
axis=1,
255263
inplace=True,
256264
)
257265
column_stats = lib.read_column_stats(sym)
@@ -287,10 +295,14 @@ def test_column_stats_duplicated_primary_index(lmdb_version_store_tiny_segment):
287295
lib = lmdb_version_store_tiny_segment
288296
sym = "test_column_stats_duplicated_primary_index"
289297

290-
total_df = df0.append(df1)
298+
total_df = pd.concat((df0, df1))
291299
lib.write(sym, total_df)
292300
expected_column_stats = lib.read_index(sym)
293-
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
301+
expected_column_stats.drop(
302+
expected_column_stats.columns.difference(["start_index", "end_index"]),
303+
axis=1,
304+
inplace=True,
305+
)
294306
expected_column_stats = expected_column_stats.iloc[[0, 1]]
295307
expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min()]
296308
expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max()]
@@ -324,7 +336,11 @@ def test_column_stats_dynamic_schema_missing_data(lmdb_version_store_tiny_segmen
324336
df = lib.read(sym).data
325337

326338
expected_column_stats = lib.read_index(sym)
327-
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
339+
expected_column_stats.drop(
340+
expected_column_stats.columns.difference(["start_index", "end_index"]),
341+
axis=1,
342+
inplace=True,
343+
)
328344
expected_column_stats = expected_column_stats.iloc[[0, 1, 2, 3, 4]]
329345
expected_column_stats["v1.0_MIN(col_1)"] = [
330346
df0["col_1"].min(),
@@ -395,7 +411,11 @@ def test_column_stats_dynamic_schema_types_changing(lmdb_version_store_tiny_segm
395411
lib.append(sym, df1)
396412

397413
expected_column_stats = lib.read_index(sym)
398-
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
414+
expected_column_stats.drop(
415+
expected_column_stats.columns.difference(["start_index", "end_index"]),
416+
axis=1,
417+
inplace=True,
418+
)
399419
expected_column_stats = expected_column_stats.iloc[[0, 1]]
400420
expected_column_stats["v1.0_MIN(int_widening)"] = [df0["int_widening"].min(), df1["int_widening"].min()]
401421
expected_column_stats["v1.0_MAX(int_widening)"] = [df0["int_widening"].max(), df1["int_widening"].max()]

python/tests/unit/arcticdb/version_store/test_aggregation_dynamic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def test_aggregation_grouping_column_missing_from_row_group(lmdb_version_store_d
328328
{"to_sum": [3, 4]},
329329
index=np.arange(2, 4),
330330
)
331-
expected = df0.append(df1).groupby("grouping_column").agg({"to_sum": "sum"})
331+
expected = pd.concat((df0, df1)).groupby("grouping_column").agg({"to_sum": "sum"})
332332

333333
symbol = "test_aggregation_grouping_column_missing_from_row_group"
334334
lib.write(symbol, df0)

python/tests/unit/arcticdb/version_store/test_empty_writes.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@ def test_write_no_rows(lmdb_version_store, sym):
2323
assert_frame_equal(lmdb_version_store.read(sym).data, df)
2424

2525
df2 = pd.DataFrame([[1.3, 6, "test"]], columns=column_names, index=[pd.Timestamp(0)])
26-
df2 = df.append(df2)
26+
df2 = pd.concat((df, df2))
2727
# coercing not needed
2828
lmdb_version_store.append(sym, df2, dynamic_strings=True)
2929
assert_frame_equal(lmdb_version_store.read(sym).data, df2)
3030

3131
df3 = pd.DataFrame(
3232
[[3.3, 8, None], [2.3, 10, "test2"]], columns=column_names, index=[pd.Timestamp(1), pd.Timestamp(2)]
3333
)
34-
df2 = df2.append(df3)
34+
df2 = pd.concat((df2, df3))
3535
# coercing not needed
3636
lmdb_version_store.append(sym, df3, dynamic_strings=True)
3737
assert_frame_equal(lmdb_version_store.read(sym).data, df2)
@@ -100,7 +100,7 @@ def test_write_no_rows_and_columns(lmdb_version_store_dynamic_schema, sym):
100100
columns=column_names + ["d"],
101101
index=[pd.Timestamp(3), pd.Timestamp(4)],
102102
)
103-
df5 = df2.append(df4)
103+
df5 = pd.concat((df2, df4))
104104
lmdb_version_store_dynamic_schema.append(sym, df4, dynamic_strings=True)
105105
assert_frame_equal(lmdb_version_store_dynamic_schema.read(sym).data, df5)
106106

python/tests/unit/arcticdb/version_store/test_parallel.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def test_sort_merge_write(lmdb_version_store):
108108
new_df = pd.DataFrame(data=vals, index=index)
109109

110110
dataframes.append(new_df)
111-
df = df.append(new_df)
111+
df = pd.concat((df, new_df))
112112
dt = dt + datetime.timedelta(days=1)
113113

114114
random.shuffle(dataframes)
@@ -139,7 +139,7 @@ def test_sort_merge_append(lmdb_version_store_dynamic_schema):
139139
vals = {c: random_floats(num_rows_per_day) for c in cols}
140140
new_df = pd.DataFrame(data=vals, index=index)
141141
dataframes.append(new_df)
142-
df = df.append(new_df)
142+
df = pd.concat((df, new_df))
143143
dt = dt + datetime.timedelta(days=1)
144144

145145
half_way = len(dataframes) / 2

python/tests/unit/arcticdb/version_store/test_projection_dynamic.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,18 +55,18 @@ def test_project_column_types_changing_and_missing(lmdb_version_store_dynamic_sc
5555
# uint8
5656
df = pd.DataFrame({"col_to_project": np.arange(2, dtype=np.uint8), "data_col": [2, 3]}, index=np.arange(2, 4))
5757
lib.append(symbol, df)
58-
expected = expected.append(df)
58+
expected = pd.concat((expected, df))
5959
# Missing
6060
df = pd.DataFrame({"data_col": [4, 5]}, index=np.arange(4, 6))
6161
lib.append(symbol, df)
62-
expected = expected.append(df)
62+
expected = pd.concat((expected, df))
6363
# int16
6464
df = pd.DataFrame(
6565
{"col_to_project": np.arange(200, 202, dtype=np.int16), "data_col": [6, 7]}, index=np.arange(6, 8)
6666
)
6767
lib.append(symbol, df)
6868

69-
expected = expected.append(df)
69+
expected = pd.concat((expected, df))
7070
expected["projected_col"] = expected["col_to_project"] * 2
7171
q = QueryBuilder()
7272
q = q.apply("projected_col", q["col_to_project"] * 2)

0 commit comments

Comments
 (0)