Skip to content

build: Use Pandas 2.0 forward compatible API #582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,14 @@ def __init__(self, wrapped: pd.DataFrame, *, with_timezone_attr: bool, timezone_

def __getitem__(self, item):
if isinstance(item, slice):
open_ended = slice(item.start + timedelta(microseconds=1), item.stop - timedelta(microseconds=1), item.step)
# Comparing datetimes with timezone to datetimes without timezone has been deprecated in Pandas 1.2.0
# (see https://github.com/pandas-dev/pandas/pull/36148/) and is not support anymore in Pandas 2.0
# (see https://github.com/pandas-dev/pandas/pull/49492/).
# We explicitly remove the timezone from the start and stop of the slice to be able to use the
# index of the wrapped DataFrame.
start_wo_tz = item.start.replace(tzinfo=None) + timedelta(microseconds=1)
stop_wo_tz = item.stop.replace(tzinfo=None) - timedelta(microseconds=1)
open_ended = slice(start_wo_tz, stop_wo_tz, item.step)
return CustomTimeseries(
self.wrapped[open_ended],
with_timezone_attr=self.with_timezone_attr,
Expand Down
40 changes: 30 additions & 10 deletions python/tests/unit/arcticdb/test_column_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ def generate_symbol(lib, sym):
lib.write(sym, df0)
lib.append(sym, df1)
expected_column_stats = lib.read_index(sym)
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index"]),
axis=1,
inplace=True,
)
expected_column_stats = expected_column_stats.iloc[[0, 1]]
expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min()]
expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max()]
Expand All @@ -41,7 +45,7 @@ def test_column_stats_basic_flow(lmdb_version_store_tiny_segment):
expected_column_stats = generate_symbol(lib, sym)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
1,
axis=1,
inplace=True,
)

Expand Down Expand Up @@ -74,7 +78,11 @@ def test_column_stats_infinity(lmdb_version_store_tiny_segment):
lib.append(sym, df1)
lib.append(sym, df2)
expected_column_stats = lib.read_index(sym)
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index"]),
axis=1,
inplace=True,
)
expected_column_stats = expected_column_stats.iloc[[0, 1, 2]]
expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min(), df2["col_1"].min()]
expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max(), df2["col_1"].max()]
Expand All @@ -94,7 +102,7 @@ def test_column_stats_as_of(lmdb_version_store_tiny_segment):
expected_column_stats = expected_column_stats.iloc[[0]]
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
1,
axis=1,
inplace=True,
)

Expand Down Expand Up @@ -150,7 +158,7 @@ def test_column_stats_multiple_indexes_different_columns(lmdb_version_store_tiny

expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
1,
axis=1,
inplace=True,
)
column_stats = lib.read_column_stats(sym)
Expand Down Expand Up @@ -251,7 +259,7 @@ def test_column_stats_multiple_creates(lmdb_version_store_tiny_segment):
expected_column_stats = base_expected_column_stats.copy()
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
1,
axis=1,
inplace=True,
)
column_stats = lib.read_column_stats(sym)
Expand Down Expand Up @@ -287,10 +295,14 @@ def test_column_stats_duplicated_primary_index(lmdb_version_store_tiny_segment):
lib = lmdb_version_store_tiny_segment
sym = "test_column_stats_duplicated_primary_index"

total_df = df0.append(df1)
total_df = pd.concat((df0, df1))
lib.write(sym, total_df)
expected_column_stats = lib.read_index(sym)
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index"]),
axis=1,
inplace=True,
)
expected_column_stats = expected_column_stats.iloc[[0, 1]]
expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min()]
expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max()]
Expand Down Expand Up @@ -324,7 +336,11 @@ def test_column_stats_dynamic_schema_missing_data(lmdb_version_store_tiny_segmen
df = lib.read(sym).data

expected_column_stats = lib.read_index(sym)
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index"]),
axis=1,
inplace=True,
)
expected_column_stats = expected_column_stats.iloc[[0, 1, 2, 3, 4]]
expected_column_stats["v1.0_MIN(col_1)"] = [
df0["col_1"].min(),
Expand Down Expand Up @@ -395,7 +411,11 @@ def test_column_stats_dynamic_schema_types_changing(lmdb_version_store_tiny_segm
lib.append(sym, df1)

expected_column_stats = lib.read_index(sym)
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index"]),
axis=1,
inplace=True,
)
expected_column_stats = expected_column_stats.iloc[[0, 1]]
expected_column_stats["v1.0_MIN(int_widening)"] = [df0["int_widening"].min(), df1["int_widening"].min()]
expected_column_stats["v1.0_MAX(int_widening)"] = [df0["int_widening"].max(), df1["int_widening"].max()]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def test_aggregation_grouping_column_missing_from_row_group(lmdb_version_store_d
{"to_sum": [3, 4]},
index=np.arange(2, 4),
)
expected = df0.append(df1).groupby("grouping_column").agg({"to_sum": "sum"})
expected = pd.concat((df0, df1)).groupby("grouping_column").agg({"to_sum": "sum"})

symbol = "test_aggregation_grouping_column_missing_from_row_group"
lib.write(symbol, df0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ def test_write_no_rows(lmdb_version_store, sym):
assert_frame_equal(lmdb_version_store.read(sym).data, df)

df2 = pd.DataFrame([[1.3, 6, "test"]], columns=column_names, index=[pd.Timestamp(0)])
df2 = df.append(df2)
df2 = pd.concat((df, df2))
# coercing not needed
lmdb_version_store.append(sym, df2, dynamic_strings=True)
assert_frame_equal(lmdb_version_store.read(sym).data, df2)

df3 = pd.DataFrame(
[[3.3, 8, None], [2.3, 10, "test2"]], columns=column_names, index=[pd.Timestamp(1), pd.Timestamp(2)]
)
df2 = df2.append(df3)
df2 = pd.concat((df2, df3))
# coercing not needed
lmdb_version_store.append(sym, df3, dynamic_strings=True)
assert_frame_equal(lmdb_version_store.read(sym).data, df2)
Expand Down Expand Up @@ -100,7 +100,7 @@ def test_write_no_rows_and_columns(lmdb_version_store_dynamic_schema, sym):
columns=column_names + ["d"],
index=[pd.Timestamp(3), pd.Timestamp(4)],
)
df5 = df2.append(df4)
df5 = pd.concat((df2, df4))
lmdb_version_store_dynamic_schema.append(sym, df4, dynamic_strings=True)
assert_frame_equal(lmdb_version_store_dynamic_schema.read(sym).data, df5)

Expand Down
4 changes: 2 additions & 2 deletions python/tests/unit/arcticdb/version_store/test_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_sort_merge_write(lmdb_version_store):
new_df = pd.DataFrame(data=vals, index=index)

dataframes.append(new_df)
df = df.append(new_df)
df = pd.concat((df, new_df))
dt = dt + datetime.timedelta(days=1)

random.shuffle(dataframes)
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_sort_merge_append(lmdb_version_store_dynamic_schema):
vals = {c: random_floats(num_rows_per_day) for c in cols}
new_df = pd.DataFrame(data=vals, index=index)
dataframes.append(new_df)
df = df.append(new_df)
df = pd.concat((df, new_df))
dt = dt + datetime.timedelta(days=1)

half_way = len(dataframes) / 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,18 @@ def test_project_column_types_changing_and_missing(lmdb_version_store_dynamic_sc
# uint8
df = pd.DataFrame({"col_to_project": np.arange(2, dtype=np.uint8), "data_col": [2, 3]}, index=np.arange(2, 4))
lib.append(symbol, df)
expected = expected.append(df)
expected = pd.concat((expected, df))
# Missing
df = pd.DataFrame({"data_col": [4, 5]}, index=np.arange(4, 6))
lib.append(symbol, df)
expected = expected.append(df)
expected = pd.concat((expected, df))
# int16
df = pd.DataFrame(
{"col_to_project": np.arange(200, 202, dtype=np.int16), "data_col": [6, 7]}, index=np.arange(6, 8)
)
lib.append(symbol, df)

expected = expected.append(df)
expected = pd.concat((expected, df))
expected["projected_col"] = expected["col_to_project"] * 2
q = QueryBuilder()
q = q.apply("projected_col", q["col_to_project"] * 2)
Expand Down