Skip to content

Update pandas version to 0.24 #451

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ exclude_lines =
pragma: no cover
# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError
raise NotImplementedError
raise ImportError
assert
9 changes: 9 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ matrix:
- black --check modin/
- flake8 .

- os: linux
dist: trusty
env:
- PYTHON=3.6
- API_COMPAT=1
script:
- export PATH="$HOME/miniconda/bin:$PATH"
- python -m pytest modin/pandas/test/test_api.py

install:
- ./.travis/install-dependencies.sh

Expand Down
42 changes: 29 additions & 13 deletions modin/data_management/query_compiler/pandas_query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
is_datetime_or_timedelta_dtype,
is_bool_dtype,
)
from pandas.core.index import _ensure_index
from pandas.core.index import ensure_index
from pandas.core.base import DataError

from modin.engines.base.block_partitions import BaseBlockPartitions
Expand Down Expand Up @@ -97,7 +97,7 @@ def pandas_index_extraction(df, axis):
return index_obj[new_indices] if compute_diff else new_indices

def _validate_set_axis(self, new_labels, old_labels):
new_labels = _ensure_index(new_labels)
new_labels = ensure_index(new_labels)
old_len = len(old_labels)
new_len = len(new_labels)
if old_len != new_len:
Expand All @@ -118,14 +118,14 @@ def _get_columns(self):

def _set_index(self, new_index):
if self._index_cache is None:
self._index_cache = _ensure_index(new_index)
self._index_cache = ensure_index(new_index)
else:
new_index = self._validate_set_axis(new_index, self._index_cache)
self._index_cache = new_index

def _set_columns(self, new_columns):
if self._columns_cache is None:
self._columns_cache = _ensure_index(new_columns)
self._columns_cache = ensure_index(new_columns)
else:
new_columns = self._validate_set_axis(new_columns, self._columns_cache)
self._columns_cache = new_columns
Expand Down Expand Up @@ -1388,11 +1388,16 @@ def _process_all_any(self, func, **kwargs):

if bool_only:
if axis == 0 and not axis_none and len(not_bool_col) == len(self.columns):
return pandas.Series(dtype=bool)
if len(not_bool_col) == len(self.columns):
query_compiler = self
else:
query_compiler = self.drop(columns=not_bool_col)
# TODO add this line back once pandas-dev/pandas#25101 is resolved
# return pandas.Series(dtype=bool)
pass
# See note above about pandas-dev/pandas#25101
# TODO remove this when pandas 0.24.2 is released.
query_compiler = self
# if len(not_bool_col) == len(self.columns):
# query_compiler = self
# else:
# query_compiler = self.drop(columns=not_bool_col)
else:
if (
bool_only is False
Expand Down Expand Up @@ -2492,11 +2497,22 @@ def _list_like_func(self, func, axis, *args, **kwargs):
Returns:
A new PandasQueryCompiler.
"""
func_prepared = self._prepare_method(lambda df: df.apply(func, *args, **kwargs))
func_prepared = self._prepare_method(
lambda df: df.apply(func, axis, *args, **kwargs)
)
new_data = self._map_across_full_axis(axis, func_prepared)
# When the function is list-like, the function names become the index
new_index = [f if isinstance(f, string_types) else f.__name__ for f in func]
return self.__constructor__(new_data, new_index, self.columns)
# When the function is list-like, the function names become the index/columns
new_index = (
[f if isinstance(f, string_types) else f.__name__ for f in func]
if axis == 0
else self.index
)
new_columns = (
[f if isinstance(f, string_types) else f.__name__ for f in func]
if axis == 1
else self.columns
)
return self.__constructor__(new_data, new_index, new_columns)

def _callable_func(self, func, axis, *args, **kwargs):
"""Apply callable functions across given axis.
Expand Down
2 changes: 2 additions & 0 deletions modin/engines/base/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ def to_sql(
index_label=None,
chunksize=None,
dtype=None,
method=None,
):
ErrorMessage.default_to_pandas("`to_sql`")
df = qc.to_pandas()
Expand All @@ -447,4 +448,5 @@ def to_sql(
index_label=index_label,
chunksize=chunksize,
dtype=dtype,
method=method,
)
6 changes: 2 additions & 4 deletions modin/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
factorize,
test,
qcut,
match,
Panel,
date_range,
period_range,
Expand Down Expand Up @@ -64,7 +63,7 @@
from .plotting import Plotting as plotting
from .. import __execution_engine__ as execution_engine

__pandas_version__ = "0.23.4"
__pandas_version__ = "0.24.1"

if pandas.__version__ != __pandas_version__:
raise ImportError(
Expand Down Expand Up @@ -131,7 +130,7 @@ def initialize_ray():
if execution_engine == "Ray":
initialize_ray()
num_cpus = ray.global_state.cluster_resources()["CPU"]
elif execution_engine == "Dask":
elif execution_engine == "Dask": # pragma: no cover
from distributed.client import _get_global_client

if threading.current_thread().name == "MainThread":
Expand Down Expand Up @@ -174,7 +173,6 @@ def initialize_ray():
"factorize",
"test",
"qcut",
"match",
"to_datetime",
"get_dummies",
"isna",
Expand Down
Loading