-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Deprecate SparseDataFrame and SparseSeries #26137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d518404
c32e5ff
836d19b
c0d6cf2
8f06d88
380c7c0
21569e2
6a81837
12a8329
01c7710
e9b9b29
b295ce1
ccf71db
7e6fbd6
865f1aa
9915c48
30f3670
b043243
b2aef95
706c5dc
13d30d2
c5fa3fb
101c425
b76745f
f153400
0c49ddc
1903f67
0b03ac2
12d8d83
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,13 +28,24 @@ | |
from pandas.core.sparse.series import SparseSeries | ||
|
||
_shared_doc_kwargs = dict(klass='SparseDataFrame') | ||
depr_msg = """\ | ||
SparseDataFrame is deprecated and will be removed in a future version. | ||
Use a regular DataFrame whose columns are SparseArrays instead. | ||
|
||
See http://pandas.pydata.org/pandas-docs/stable/\ | ||
user_guide/sparse.html#migrating for more. | ||
""" | ||
|
||
|
||
class SparseDataFrame(DataFrame): | ||
""" | ||
DataFrame containing sparse floating point data in the form of SparseSeries | ||
objects | ||
|
||
.. deprectaed:: 0.25.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
|
||
Use a DataFrame with sparse values instead. | ||
|
||
Parameters | ||
---------- | ||
data : same types as can be passed to DataFrame or scipy.sparse.spmatrix | ||
|
@@ -56,6 +67,7 @@ class SparseDataFrame(DataFrame): | |
def __init__(self, data=None, index=None, columns=None, default_kind=None, | ||
default_fill_value=None, dtype=None, copy=False): | ||
|
||
warnings.warn(depr_msg, FutureWarning, stacklevel=2) | ||
# pick up the defaults from the Sparse structures | ||
if isinstance(data, SparseDataFrame): | ||
if index is None: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -116,14 +116,32 @@ def _sparse_series_to_coo(ss, row_levels=(0, ), column_levels=(1, ), | |
return sparse_matrix, rows, columns | ||
|
||
|
||
def _coo_to_sparse_series(A, dense_index=False): | ||
def _coo_to_sparse_series(A, dense_index: bool = False, | ||
sparse_series: bool = True): | ||
""" | ||
Convert a scipy.sparse.coo_matrix to a SparseSeries. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string here (types too if you can!) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. I'm not really sure on two things
so I left types off for those. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you? Are these types actually checked in our CI? I'd rather not introduce invalid types. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes they should be |
||
Use the defaults given in the SparseSeries constructor. | ||
|
||
Parameters | ||
---------- | ||
A : scipy.sparse.coo.coo_matrix | ||
dense_index : bool, default False | ||
sparse_series : bool, default True | ||
|
||
Returns | ||
------- | ||
Series or SparseSeries | ||
""" | ||
from pandas import SparseDtype | ||
|
||
s = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) | ||
s = s.sort_index() | ||
s = s.to_sparse() # TODO: specify kind? | ||
if sparse_series: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why exactly do you need sparse_series flag? why can't we just do the astype after calling this routine? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is called from both Series.sparse and SparseSeries. Previously, this went coo_matrix -> SparseSeries -> Series[sparse], which caused an undesired warning for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok can you add a todo about this then, this is not obvious at all |
||
# TODO(SparseSeries): remove this and the sparse_series keyword. | ||
# This is just here to avoid a DeprecationWarning when | ||
# _coo_to_sparse_series is called via Series.sparse.from_coo | ||
s = s.to_sparse() # TODO: specify kind? | ||
else: | ||
s = s.astype(SparseDtype(s.dtype)) | ||
if dense_index: | ||
# is there a better constructor method to use here? | ||
i = range(A.shape[0]) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,9 +32,24 @@ | |
optional_labels='', optional_axis='') | ||
|
||
|
||
depr_msg = """\ | ||
SparseSeries is deprecated and will be removed in a future version. | ||
Use a Series with sparse values instead. | ||
|
||
>>> series = pd.Series(pd.SparseArray(...)) | ||
|
||
See http://pandas.pydata.org/pandas-docs/stable/\ | ||
user_guide/sparse.html#migrating for more. | ||
""" | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
class SparseSeries(Series): | ||
"""Data structure for labeled, sparse floating point data | ||
|
||
.. deprectaed:: 0.25.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo |
||
|
||
Use a Series with sparse values instead. | ||
|
||
Parameters | ||
---------- | ||
data : {array-like, Series, SparseSeries, dict} | ||
|
@@ -60,6 +75,7 @@ class SparseSeries(Series): | |
def __init__(self, data=None, index=None, sparse_index=None, kind='block', | ||
fill_value=None, name=None, dtype=None, copy=False, | ||
fastpath=False): | ||
warnings.warn(depr_msg, FutureWarning, stacklevel=2) | ||
# TODO: Most of this should be refactored and shared with Series | ||
# 1. BlockManager -> array | ||
# 2. Series.index, Series.name, index, name reconciliation | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -215,6 +215,7 @@ def test_scalar_with_index_infer_dtype(self, scalar, dtype): | |
assert exp.dtype == dtype | ||
|
||
@pytest.mark.parametrize("fill", [1, np.nan, 0]) | ||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you don't need these as a prior PR added this to setup.cfg There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The setup.cfg has an We have a single test asserting that |
||
def test_sparse_series_round_trip(self, kind, fill): | ||
# see gh-13999 | ||
arr = SparseArray([np.nan, 1, np.nan, 2, 3], | ||
|
@@ -231,6 +232,7 @@ def test_sparse_series_round_trip(self, kind, fill): | |
tm.assert_sp_array_equal(arr, res) | ||
|
||
@pytest.mark.parametrize("fill", [True, False, np.nan]) | ||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning") | ||
def test_sparse_series_round_trip2(self, kind, fill): | ||
# see gh-13999 | ||
arr = SparseArray([True, False, True, True], dtype=np.bool, | ||
|
@@ -1098,6 +1100,7 @@ def test_npoints(self): | |
assert arr.npoints == 1 | ||
|
||
|
||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning") | ||
class TestAccessor: | ||
|
||
@pytest.mark.parametrize('attr', [ | ||
|
Uh oh!
There was an error while loading. Please reload this page.