diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 251bc6587872d..4db4239ea1b54 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6338,45 +6338,152 @@ def append(self, other, ignore_index=False, 3 3 4 4 """ - if isinstance(other, (Series, dict)): - if isinstance(other, dict): - other = Series(other) - if other.name is None and not ignore_index: + kwargs = { + 'ignore_index': ignore_index, + 'verify_integrity': verify_integrity, + 'sort': sort, + } + + obj_type = type(other) + kwargs['_obj_type'] = obj_type + if issubclass(obj_type, dict): + return self._append_dict(other, **kwargs) + elif issubclass(obj_type, Series): + return self._append_series(other, **kwargs) + elif issubclass(obj_type, DataFrame): + return self._append_frame(other, **kwargs) + elif issubclass(obj_type, list): + + try: + item_type = type(other[0]) + except IndexError: # empty list! + return self._append_list_of_frames(other, **kwargs) + if not all(isinstance(i, item_type) for i in other[1:]): + if issubclass(item_type, (dict, Series, DataFrame)): + raise TypeError("When other is a list, its elements must" + " be all of the same type") + else: + raise TypeError("The value of other must be a" + " DataFrame or Series/dict-like object," + " or list of these") + kwargs['_item_type'] = item_type + + if issubclass(item_type, dict): + return self._append_list_of_dicts(other, **kwargs) + elif issubclass(item_type, Series): + return self._append_list_of_series(other, **kwargs) + elif issubclass(item_type, DataFrame): + return self._append_list_of_frames(other, **kwargs) + else: + raise TypeError("The value of other must be a" + " DataFrame or Series/dict-like object," + " or list of these") + else: + raise TypeError("The value of other must be a" + " DataFrame or Series/dict-like object," + " or list of these") + + def _append_dict(self, other, *args, **kwargs): + return self._append_list_of_dicts([other], *args, **kwargs) + + def _append_series(self, other, *args, **kwargs): + return self._append_list_of_series([other], *args, **kwargs) + + def _append_frame(self, other, *args, **kwargs): + return self._append_list_of_frames([other], *args, **kwargs) + + def _append_list_of_dicts(self, other, *args, **kwargs): + if not kwargs['ignore_index']: + raise TypeError('Can only append a dict if ignore_index=True') + return self._append_frame(DataFrame(other), *args, **kwargs) + + def _append_list_of_series(self, other, *args, **kwargs): + if not kwargs['ignore_index']: + if any(series.name is None for series in other): raise TypeError('Can only append a Series if ignore_index=True' ' or if the Series has a name') - if other.name is None: - index = None - else: - # other must have the same index name as self, otherwise - # index name will be reset - index = Index([other.name], name=self.index.name) + if len(other) == 1: + # manually create DF for performance + ser = other[0] + df = DataFrame(ser.values.reshape(1, ser.shape[0]), + index=[ser.name], columns=ser.index) + else: + df = DataFrame(other) - idx_diff = other.index.difference(self.columns) - try: - combined_columns = self.columns.append(idx_diff) - except TypeError: - combined_columns = self.columns.astype(object).append(idx_diff) - other = other.reindex(combined_columns, copy=False) - other = DataFrame(other.values.reshape((1, len(other))), - index=index, - columns=combined_columns) - other = other._convert(datetime=True, timedelta=True) - if not self.columns.equals(combined_columns): - self = self.reindex(columns=combined_columns) - elif isinstance(other, list) and not isinstance(other[0], DataFrame): - other = DataFrame(other) - if (self.columns.get_indexer(other.columns) >= 0).all(): - other = other.loc[:, self.columns] + return self._append_frame(df, *args, **kwargs) + def _append_list_of_frames(self, other, *args, **kwargs): + ignore_index = kwargs['ignore_index'] + verify_integrity = kwargs['verify_integrity'] + sort = kwargs['sort'] + _obj_type = kwargs['_obj_type'] + _item_type = kwargs.get('_item_type') + + from pandas.core.indexes.api import _normalize_dataframes from pandas.core.reshape.concat import concat - if isinstance(other, (list, tuple)): - to_concat = [self] + other - else: - to_concat = [self, other] - return concat(to_concat, ignore_index=ignore_index, - verify_integrity=verify_integrity, - sort=sort) + + # sorting behavior when sort=None + # TODO: remove when kwarg value change + if sort is None: + # stabilish desired behavior + if _obj_type in (dict, Series): + # dict/ser + + sort = False + warn = False + elif _item_type in (dict, Series): + # [dict]/[ser] + + if (self.columns.get_indexer(other[0].columns) >= 0).all(): + # self.columns >= other[0].columns + sort = False + warn = False + else: + sort = True + types = [df.columns.dtype for df in [self] + other] + common = find_common_type(types) + warn = (common == object) + else: + # frame/[frame] + + if all(self.columns.equals(df.columns) for df in other): + # all values the same + sort = False + warn = False + else: + sort = True + types = [df.columns.dtype for df in [self] + other] + common = find_common_type(types) + warn = (common == object) + + # warn if necessary + if warn: + from pandas.core.indexes.api import _sort_msg + warnings.warn(_sort_msg, FutureWarning) + + # The behavior of concat is a bit problematic as it is. To get around, + # we prepare the DataFrames before feeding them into concat. + to_concat = [self] + other + to_concat_norm = _normalize_dataframes(to_concat, sort=sort) + result = concat(to_concat_norm, ignore_index=ignore_index, + verify_integrity=verify_integrity, sort=sort) + + # preserve base DataFrame indexes names + # XXX: how will this work with MultiIndex (?) + result.columns.name = self.columns.name + if not ignore_index: + result.index.name = self.index.name + + # Reindexing the columns created an artificial float64 where it + # was not needed. We can convert the columns back to the expected + # type. + if result.shape[0] == 1: + base_frame = next(df for df in to_concat_norm if df.shape[0] == 1) + dtypes = base_frame.dtypes.to_dict() + result = result.astype(dtypes) # won't work well dups cols + + return result def join(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index e50a4b099a8e1..eb1748a10197d 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,11 +1,19 @@ import textwrap import warnings -from pandas.core.indexes.base import (Index, - _new_Index, - ensure_index, - ensure_index_from_sequences, - InvalidIndexError) # noqa +from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, + ABCIntervalIndex, + ABCMultiIndex, + ABCPeriodIndex, +) +from pandas.core.indexes.base import ( + Index, + _new_Index, + ensure_index, + ensure_index_from_sequences, + InvalidIndexError, +) from pandas.core.indexes.category import CategoricalIndex # noqa from pandas.core.indexes.multi import MultiIndex # noqa from pandas.core.indexes.interval import IntervalIndex # noqa @@ -29,6 +37,18 @@ """) +class _CannotSortError(Exception): + pass + + +class _CannotSortDuplicatesError(Exception): + pass + + +class _DuplicatesError(Exception): + pass + + # TODO: there are many places that rely on these private methods existing in # pandas.core.index __all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index', @@ -160,3 +180,196 @@ def _all_indexes_same(indexes): if not first.equals(index): return False return True + + +def _normalize_dataframes(frame_list, verify_inputs=True, sort=False): + """Normalize the columns from a list of DataFrames + + First, an index is created by merging all the original columns. Then, + all columns are reindexed to match this new index. + + Parameters + ---------- + index_list: list of Index objects + verify_inputs: boolean, default True + Verify if the input indexes contain duplicate values. Ignored when all + input indexes share the same identity (a is b). + sort: boolean, default False + Order resulting index. If False, values will come in the order they + appear. + + Raises + ------ + InvalidIndexError: + When there are duplicates in at least one of the indexes (col) + and they are not allowed. + TypeError: + When sort=True and the resulting index (col) could not be sorted. + """ + orig_columns = [df.columns for df in frame_list] + + try: + merged_columns = _merge_index_list( + orig_columns, + verify_dups=verify_inputs, + allow_matching_dups=verify_inputs, # same-id indexes allowed + sort=sort + ) + except _DuplicatesError: + raise InvalidIndexError("Indexes with duplicates are only allowed" + " when they are the same (a is b).") + except _CannotSortDuplicatesError: + raise InvalidIndexError("When sort=True, indexes with duplicate" + " values are not allowed.") + except _CannotSortError: + raise TypeError("The resulting columns could not be sorted." + " You can try setting sort=False or use" + " compatible index types.") + + # Because _merge_index_list may infer the index dtype based on values, + # we have to provide a workaround to conserve the original dtype. + # + # Empty indexes come from DataFrames with no columns, and we do not + # consider them when calculating the final index dtype. + # + # XXX: goes against DataFrame.append behavior for empty columns, where we + # let them be object dtype. + # + # What behavior should be adopted? + relevant_cols = [i for i in orig_columns + if not (len(i) == 0 and i.dtype == 'object')] + if relevant_cols: + from pandas.core.dtypes.cast import find_common_type + types = [i.dtype for i in relevant_cols] + common_type = find_common_type(types) + merged_columns = merged_columns.astype(common_type) + + return [_reindex(df, merged_columns, axis=1) for df in frame_list] + + +def _merge_index_list(index_list, + verify_dups=True, + allow_matching_dups=False, + sort=False): + """Merge a list of indexes into one big index + + Parameters + ---------- + index_list: list of Index objects + verify_dups: boolean, default True + Verify if the input indexes contain duplicate values. + allow_matching_dups: boolean, default False + Only relevant when verify_dups=True. Allow duplicate values when all + indexes have the same identity. + sort: boolean, default False + Order result index. If False, values will come in the order they + appear. + + Raises + ------ + _CannotSortError + When sort=True and the result index is not sortable. + _CannotSortDuplicatesError + When sort=True and at least one of the inputs contain duplicate + values. + _DuplicatesError + When verify_dups=True and at least one of the input indexes contain + duplicate values. This is error is not raised if + allow_matching_dups=True and all the indexes have a common identity. + + Notes + ----- + Empty indexes (of object dtype) are forgotten. + """ + # unique index list (a is b) + uindex_list = com.get_distinct_objs(index_list) + uindex_list = [i for i in uindex_list if not i.is_empty()] + + # verify duplicates + if sort or verify_dups: + has_dups = any(ix.has_duplicates for ix in uindex_list) + if has_dups: + if sort: + raise _CannotSortDuplicatesError("Cannot sort an index that" + " contains duplicate values.") + elif verify_dups and not allow_matching_dups: + raise _DuplicatesError("Index has duplicate values.") + elif verify_dups and allow_matching_dups and len(uindex_list) >= 2: + raise _DuplicatesError("Index has duplicate values and does" + " not match other indexes.") + + # edge results + if len(uindex_list) == 0: + return Index([]) + elif len(uindex_list) == 1: + return uindex_list[0] + + # reduce to one result + result = uindex_list[0] + for idx in uindex_list[1:]: + result = _merge_indexes(result, idx) + + # sort + return result if not sort else _sort_index(result) + + +def _merge_indexes(index1, index2): + """Merge two indexes together + """ + + # lots of exception handling because we want to allow any + # indexes types to be merged together + + try: + difference = index2.difference(index1) + except (TypeError, ValueError): + if isinstance(index2, (ABCIntervalIndex, ABCPeriodIndex)): + index2 = index2.astype(object) + difference = index2.difference(index1) + else: + raise + + try: + return index1.append(difference) + except TypeError: + if isinstance(index1, ABCCategoricalIndex): + index1 = index1.astype(object) + return index1.append(difference) + raise + + +def _sort_index(index): + """Sort index and raises when not possible + """ + try: + return index.sort_values() + except TypeError: + raise _CannotSortError + + +def _reindex(df, new_index, axis=0): + """Reindex df axis to match new_index + + Parameters + ---------- + + df: a DataFrame object + new_index: an Index object + axis: int or str, default 0 + + Notes + ----- + + Works the same as DataFrame.reindex, but handles IntervalIndex and + MultiIndex errors. + """ + try: + return df.reindex(new_index, axis=axis, copy=False) + except TypeError: + if isinstance(df.columns, ABCIntervalIndex): + df.columns = df.columns.astype(object) + elif isinstance(df.columns, ABCMultiIndex): + df.columns = df.columns.values + else: + raise + return df.reindex(new_index, axis=axis, copy=False) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b2b6e02e908c5..49a2ede8beb3a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1541,6 +1541,9 @@ def is_unique(self): def has_duplicates(self): return not self.is_unique + def is_empty(self): + return self.inferred_type in ['empty'] + def is_boolean(self): return self.inferred_type in ['boolean'] diff --git a/pandas/tests/reshape/test_append.py b/pandas/tests/reshape/test_append.py new file mode 100644 index 0000000000000..d2e07ec8d92ff --- /dev/null +++ b/pandas/tests/reshape/test_append.py @@ -0,0 +1,1086 @@ +from itertools import product + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series +from pandas.core.indexes.base import InvalidIndexError +from pandas.util.testing import assert_frame_equal + + +indexes = [ + # indexes listed here must be sorted + + # base + pd.Index(['A', 'B', 'C']), + pd.Index(['A', 'B', 'C'], name='foo'), + + # numeric + pd.RangeIndex(3), + pd.Int64Index([3, 4, 5]), + pd.UInt64Index([6, 7, 8]), + pd.Float64Index([3.5, 4.5, 5.5]), + pd.Index([9, 10, 11], dtype=object), # fake int64 + + # datetime + pd.to_datetime(['2013-01-01', '2013-01-10', '2013-01-15']), + pd.to_timedelta(['1 day', '2 days', '3 days']), + pd.PeriodIndex(start='2000', periods=3), + + # interval + pd.interval_range(start=0, end=3), + + # categorical + pd.CategoricalIndex('A B C'.split()), + pd.CategoricalIndex('D E F'.split(), ordered=True), + + # multi-index + pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), +] + + +indexes_with_dups = [ + # base + pd.Index(['A', 'B', 'B']), + pd.Index(['B', 'B', 'A']), + pd.Index(['A', 'B', 'B'], name='foo'), + pd.Index(['B', 'B', 'A'], name='bar'), + + # numeric + pd.Index([9, 10, 10], dtype=object), + pd.Int64Index([3, 4, 4]), + pd.UInt64Index([6, 7, 7]), + pd.Float64Index([3.5, 4.5, 4.5]), + + # datetime + pd.to_datetime(['2013-01-01', '2013-01-10', '2013-01-10']), + pd.to_timedelta(['1 day', '2 days', '2 days']), + pd.PeriodIndex([2000, 2001, 2001], freq='A'), + + # interval + pd.IntervalIndex.from_arrays([0, 1, 1], [1, 2, 2]), + + # categorical + pd.CategoricalIndex('A B B'.split()), + pd.CategoricalIndex('D E E'.split(), ordered=True), + + # multi-index + pd.MultiIndex.from_arrays(['A B B'.split(), 'D E E'.split()]), +] + + +index_sort_groups = [ + # When indexes from the same group are joined, the result is sortable. + # When indexes from different groups are joined, the result is not + # sortable. + + [ # joining produces a string index + pd.Index(['A', 'B', 'C']), + pd.CategoricalIndex('A B C'.split()), + pd.CategoricalIndex('D E F'.split(), ordered=True)], + + [ # numeric indexes + pd.RangeIndex(3), + pd.Int64Index([3, 4, 5]), + pd.UInt64Index([6, 7, 8]), + pd.Float64Index([3.5, 4.5, 5.5]), + pd.Index([9, 10, 11], dtype=object)], + + [pd.to_datetime(['2013-01-01', '2013-01-10', '2013-01-15'])], + [pd.to_timedelta(['1 day', '2 days', '3 days'])], + [pd.PeriodIndex(start='2000', periods=3)], + [pd.interval_range(start=0, end=3)], + [pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()])], +] + + +def cls_name(obj): + return obj.__class__.__name__ + + +@pytest.fixture(params=[True, False]) +def sort(request): + """Boolean sort keyword for DataFrame.append + """ + return request.param + + +class TestAppendBasic(object): + def test_different_types_of_input(self, sort): + # There are 7 types of accepted input by append: + # + # dict + # Series + # DataFrame + # empty list + # list of dicts + # list of Series + # list of DataFrames + # + # Using one or another should always be interchangeable. + + # append to dict + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + map = { + 0: 7, + 1: 8, + 2: 9 + } + result = df.append(map, ignore_index=True, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + assert_frame_equal(result, expected) + + # append to Series + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + ser = pd.Series([7, 8, 9]) + result = df.append(ser, ignore_index=True, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + assert_frame_equal(result, expected) + + # append to DataFrame + df1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + df2 = pd.DataFrame([[7, 8, 9]]) + result = df1.append(df2, ignore_index=True, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + assert_frame_equal(result, expected) + + # append to empty list + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + result = df1.append([], sort=sort) + expected = df + assert_frame_equal(result, expected) + + # append to list of dicts + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + map = { + 0: 7, + 1: 8, + 2: 9 + } + result = df.append([map], ignore_index=True, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + assert_frame_equal(result, expected) + + # append to list of Series + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + ser = pd.Series([7, 8, 9]) + result = df.append([ser], ignore_index=True, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + assert_frame_equal(result, expected) + + # append to list of DataFrames + df1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + df2 = pd.DataFrame([[7, 8, 9]]) + result = df1.append([df2], ignore_index=True, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + assert_frame_equal(result, expected) + + # append to list of dicts (2 dicts) + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + map = { + 0: 7, + 1: 8, + 2: 9 + } + result = df.append([map, map], ignore_index=True, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [7, 8, 9]]) + assert_frame_equal(result, expected) + + # append to list of Series (2 series) + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + ser = pd.Series([7, 8, 9]) + result = df.append([ser, ser], ignore_index=True, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [7, 8, 9]]) + assert_frame_equal(result, expected) + + # append to list of DataFrames (2 dframes) + df1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + df2 = pd.DataFrame([[7, 8, 9]]) + result = df1.append([df2, df2], ignore_index=True, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [7, 8, 9]]) + assert_frame_equal(result, expected) + + def test_bad_input_type(self, sort): + # When appending a bad input type, the function + # should raise an exception. + + bad_input_msg = r'The value of other must be .*' + mixed_list_msg = r'When other is a list, its .*' + + # integer input + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError, match=bad_input_msg): + df.append(1, ignore_index=True, sort=sort) + + # string input + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError, match=bad_input_msg): + df.append("1 2 3", ignore_index=True, sort=sort) + + # tuple input + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError, match=bad_input_msg): + df.append((df, ), ignore_index=True, sort=sort) + + # list of integers + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError, match=bad_input_msg): + df.append([1], ignore_index=True, sort=sort) + + # list of strings + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError, match=bad_input_msg): + df.append(["1 2 3"], ignore_index=True, sort=sort) + + # list of lists + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError, match=bad_input_msg): + df.append([[df]], ignore_index=True, sort=sort) + + # list of tuples + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError, match=bad_input_msg): + df.append([(df, )], ignore_index=True, sort=sort) + + # mixed list + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + ser = pd.Series([7, 8, 9]) + dict = { + 0: 10, + 1: 11, + 2: 12 + } + with pytest.raises(TypeError, match=mixed_list_msg): + df.append([ser, dict], ignore_index=True, sort=sort) + with pytest.raises(TypeError, match=mixed_list_msg): + df.append([dict, ser], ignore_index=True, sort=sort) + + # mixed list with bad first element + # (when the first element is bad, display the + # bad input msg instead of the mixed list one) + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + ser = pd.Series([7, 8, 9]) + with pytest.raises(TypeError, match=bad_input_msg): + df.append([1, ser, ser], ignore_index=True, sort=sort) + + # mixed list with bad second element + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + ser = pd.Series([7, 8, 9]) + with pytest.raises(TypeError, match=mixed_list_msg): + df.append([ser, 1, ser], ignore_index=True, sort=sort) + + # mixed list with bad third element + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + ser = pd.Series([7, 8, 9]) + with pytest.raises(TypeError, match=mixed_list_msg): + df.append([ser, ser, 1], ignore_index=True, sort=sort) + + def test_no_unecessary_upcast(self, sort): + # GH: 22621 + # When appending, the result columns should + # not be float64 without necessity. + + # basic + df1 = pd.DataFrame([[1, 2, 3]]) + df2 = pd.DataFrame([[4, 5, 6]], index=[1]) + result = df1.append(df2, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + assert_frame_equal(result, expected) + + # 0 rows 0 columns + df1 = pd.DataFrame([[1, 2, 3]]) + df2 = pd.DataFrame() + result = df1.append(df2, sort=sort) + expected = df1.copy() + assert_frame_equal(result, expected) + + df1 = pd.DataFrame() + df2 = pd.DataFrame([[1, 2, 3]]) + result = df1.append(df2, sort=sort) + expected = df2.copy() + assert_frame_equal(result, expected) + + # 0 rows 2 columns + df1 = pd.DataFrame([[1, 2, 3]], columns=[0, 1, 2]) + df2 = pd.DataFrame(columns=[3, 4]) + result = df1.append(df2, sort=sort) + expected = pd.DataFrame([[1, 2, 3, np.nan, np.nan]]) + assert_frame_equal(result, expected) + + df1 = pd.DataFrame(columns=[0, 1]) + df2 = pd.DataFrame([[1, 2, 3]], columns=[2, 3, 4]) + result = df1.append(df2, sort=sort) + expected = pd.DataFrame([[np.nan, np.nan, 1, 2, 3]]) + assert_frame_equal(result, expected) + + # big.append(small) + big = pd.DataFrame([[1, 2, 3]]) + small = pd.DataFrame([[4, 5]], index=[1]) + result = big.append(small, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, np.nan]]) + assert_frame_equal(result, expected) + + # small.append(big) + small = pd.DataFrame([[1, 2]]) + big = pd.DataFrame([[3, 4, 5]], index=[1]) + result = small.append(big, sort=sort) + expected = pd.DataFrame([[1, 2, np.nan], [3, 4, 5]]) + assert_frame_equal(result, expected) + + +class TestAppendSortNone(object): + """Regression tests to preserve the behavior of sort=None + """ + + def generate_frames(self, compare, special): + if compare == 'lt': + if special: + df1 = DataFrame([[11, 12]], columns=[2, 1]) + df2 = DataFrame([[13, 14, 15]], columns=[3, 2, 1]) + else: + df1 = DataFrame([[11, 12]], columns=list('ba')) + df2 = DataFrame([[13, 14, 15]], columns=list('cba')) + elif compare == 'eq': + if special: + df1 = DataFrame([[11, 12, 13]], columns=[3, 2, 1]) + df2 = DataFrame([[14, 15, 16]], columns=[3, 2, 1]) + else: + df1 = DataFrame([[11, 12, 13]], columns=list('cba')) + df2 = DataFrame([[14, 15, 16]], columns=list('cba')) + elif compare == 'gt': + if special: + df1 = DataFrame([[11, 12, 13]], columns=[3, 2, 1]) + df2 = DataFrame([[14, 15]], columns=[2, 1]) + else: + df1 = DataFrame([[11, 12, 13]], columns=list('cba')) + df2 = DataFrame([[14, 15]], columns=list('ba')) + elif compare == 'dups': + # special category for duplicates + # assumes compare = 'eq' + if special: + df1 = DataFrame([[11, 12, 13]], columns=[3, 3, 1]) + df2 = DataFrame([[14, 15, 16]], columns=[3, 3, 1]) + else: + df1 = DataFrame([[11, 12, 13]], columns=list('cca')) + df2 = DataFrame([[14, 15, 16]], columns=list('cca')) + + # avoid upcasting problems + df1 = df1.astype('float64') + df2 = df2.astype('float64') + + return df1, df2 + + def merge_indexes(self, idx1, idx2, sort): + len1 = idx1.size + len2 = idx2.size + + if len1 < len2: + # match 'lt' in self.generate_frames + vals1 = idx1.tolist() + vals2 = [idx2.tolist()[0]] + result = Index(vals1 + vals2) + else: + result = idx1.copy() + + return result.sort_values() if sort else result + + def merge_frames(self, df1, df2, sort): + new_index = self.merge_indexes(df1.columns, df2.columns, sort) + df1 = df1.reindex(new_index, axis=1) + df2 = df2.reindex(new_index, axis=1) + + values = np.vstack([df1.values[0, :], df2.values[0, :]]) + result = DataFrame(values, columns=new_index) + return result + + @pytest.mark.parametrize('input_type', ['series', 'dict']) + @pytest.mark.parametrize('special', [True, False]) + @pytest.mark.parametrize('compare', ['lt', 'eq', 'gt', 'dups']) + def test_append_series_dict(self, compare, special, input_type): + # When appending a Series or dict, the resulting columns come unsorted + # and no warning is raised. + + sorts = False + warns = False + + df1, df2 = self.generate_frames(compare, special) + if input_type == 'series': + other = df2.loc[0] + else: + other = df2.loc[0].to_dict() + if compare == 'dups': + return + + ctx = pytest.warns(FutureWarning) if warns else pytest.warns(None) + expected = self.merge_frames(df1, df2, sorts) + with ctx: + result = df1.append(other, ignore_index=True, sort=None) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('input_type', ['[series]', '[dict]']) + @pytest.mark.parametrize('special', [True, False]) + @pytest.mark.parametrize('compare', ['lt', 'eq', 'gt']) # dups won't work + def test_append_list_of_series_dict(self, compare, special, input_type): + # When appending a list of Series or list of dicts, the behavior is + # as specified below. + + if compare in ('gt', 'eq'): + sorts = False + warns = False + else: + sorts = True + warns = not special + + df1, df2 = self.generate_frames(compare, special) + if input_type == '[series]': + other = [df2.loc[0]] + else: + other = [df2.loc[0].to_dict()] + + ctx = pytest.warns(FutureWarning) if warns else pytest.warns(None) + expected = self.merge_frames(df1, df2, sorts) + with ctx: + result = df1.append(other, ignore_index=True, sort=None) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('input_type', ['dataframe', '[dataframe]']) + @pytest.mark.parametrize('special', [True, False]) + @pytest.mark.parametrize('compare', ['lt', 'eq', 'gt', 'dups']) + def test_append_dframe_list_of_dframe(self, compare, special, input_type): + # When appenindg a DataFrame of list of DataFrames, the behavior is as + # specified below. + + if compare in ('dups', 'eq'): + sorts = False + warns = False + else: + sorts = True + warns = not special + + df1, df2 = self.generate_frames(compare, special) + if input_type == 'dataframe': + other = df2 + else: + other = [df2] + + ctx = pytest.warns(FutureWarning) if warns else pytest.warns(None) + expected = self.merge_frames(df1, df2, sorts) + with ctx: + result = df1.append(other, ignore_index=True, sort=None) + assert_frame_equal(result, expected) + + +class TestAppendColumnsIndex(object): + @pytest.mark.parametrize('idx_name3', [None, 'foo', 'bar', 'baz']) + @pytest.mark.parametrize('idx_name2', [None, 'foo', 'bar', 'baz']) + @pytest.mark.parametrize('idx_name1', [None, 'foo', 'bar', 'baz']) + def test_preserve_index_name(self, sort, idx_name1, idx_name2, idx_name3): + # When appending, the name of the indexes + # of the base DataFrame must always be + # preserved in the result. + + df1 = pd.DataFrame([[1, 2, 3]]) + df2 = pd.DataFrame([[4, 5, 6]], index=[1]) + df3 = pd.DataFrame([[7, 8, 9]], index=[2]) + + df1.columns.name = idx_name1 + df2.columns.name = idx_name2 + df3.columns.name = idx_name3 + + # append [] + result = df1.append([], sort=sort) + expected = df1.copy() + assert_frame_equal(result, expected) + + # append [df] + result = df1.append([df2], sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + expected.columns.name = idx_name1 + assert_frame_equal(result, expected) + + # append [df, df] + result = df1.append([df2, df3], sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + expected.columns.name = idx_name1 + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('index', indexes, ids=cls_name) + def test_preserve_index_type(self, sort, index): + # when there's only one index type in the inputs, + # it must be preserved in the output. + + # basic + df1 = pd.DataFrame([[1, 2, 3]], columns=index) + df2 = pd.DataFrame([[4, 5, 6]], index=[1], columns=index) + result = df1.append(df2, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index) + assert_frame_equal(result, expected) + + # big.append(small) + big = pd.DataFrame([[1, 2, 3]], columns=index) + small = pd.DataFrame([[4, 5]], index=[1], columns=index[:2]) + result = big.append(small, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, np.nan]], columns=index) + assert_frame_equal(result, expected) + + # small.append(big) + small = pd.DataFrame([[1, 2]], columns=index[:2]) + big = pd.DataFrame([[3, 4, 5]], index=[1], columns=index) + result = small.append(big, sort=sort) + expected = pd.DataFrame([[1, 2, np.nan], [3, 4, 5]], columns=index) + assert_frame_equal(result, expected) + + def test_ignore_empty_index_dtype(self, sort): + # When one of the indexes is empty and of object dtype, it should be + # ignored in the result (as empty). + + df1 = pd.DataFrame() + df2 = pd.DataFrame([[11, 12, 13]], columns=[1, 2, 3]) + + result1 = df1.append(df2, sort=sort) + result2 = df2.append(df1, sort=sort) + + expected = df2.copy() + assert_frame_equal(result1, expected) + assert_frame_equal(result2, expected) + + def test_account_empty_index_dtype(self, sort): + # When one of the indexes is empty and of dtype different from object, + # it should not be ignored when calculating the result dtype. + + df1 = pd.DataFrame(columns=pd.Float64Index([])) + df2 = pd.DataFrame([[11, 12, 13]], columns=[1, 2, 3]) + + result1 = df1.append(df2, sort=sort) + result2 = df2.append(df1, sort=sort) + + expected = df2.copy() + expected.columns = [1.0, 2.0, 3.0] + assert_frame_equal(result1, expected) + assert_frame_equal(result2, expected) + + @pytest.mark.parametrize('index2', indexes, ids=cls_name) + @pytest.mark.parametrize('index1', indexes, ids=cls_name) + def test_preserve_index_values_without_sort(self, index1, index2): + # When appending indexes of different types, we want + # the resulting index to preserve the exact indexes + # values. + + # Related to GH13626 + from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, ABCMultiIndex, ABCTimedeltaIndex + ) + if isinstance(index1, ABCMultiIndex): + if isinstance(index2, ABCDatetimeIndex): + pytest.xfail("MultiIndex + DatetimeIndex produces bad value") + if isinstance(index2, ABCTimedeltaIndex): + pytest.xfail("MultiIndex + TimedeltaIndex produces bad value") + + df1 = pd.DataFrame([[1, 2, 3]], columns=index1) + df2 = pd.DataFrame([[4, 5, 6]], columns=index2, index=[1]) + result = df1.append(df2, sort=False) + for value in index1: + assert value in result.columns + for value in index2: + assert value in result.columns + + @pytest.mark.parametrize( + 'index1, index2', + [(i1, i2) + for group in index_sort_groups + for i1, i2 in product(group, repeat=2)], + ids=cls_name + ) + def test_preserve_index_values_with_sort(self, index1, index2): + # When appending indexes of different types, we want + # the resulting index to preserve the exact indexes + # values. + + df1 = pd.DataFrame([[1, 2, 3]], columns=index1) + df2 = pd.DataFrame([[4, 5, 6]], columns=index2, index=[1]) + result = df1.append(df2, sort=True) + for value in index1: + assert value in result.columns + for value in index2: + assert value in result.columns + + @pytest.mark.parametrize('col_index', indexes_with_dups, ids=cls_name) + def test_good_duplicates_without_sort(self, col_index): + # When all indexes have the same identity (a is b), duplicates should + # be allowed and append works. + + df1 = pd.DataFrame([[1, 2, 3]], columns=col_index) + df2 = pd.DataFrame([[4, 5, 6]], columns=col_index) + + # df1.append([]) + result = df1.append([], sort=False) + expected = df1.copy() + assert_frame_equal(result, expected) + + # df1.append([df2]) + result = df1.append([df2], ignore_index=True, sort=False) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + expected.columns = col_index + assert_frame_equal(result, expected) + + # df1.append([df2, df2]) + result = df1.append([df2, df2], ignore_index=True, sort=False) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [4, 5, 6]]) + expected.columns = col_index + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('col_index', indexes_with_dups, ids=cls_name) + def test_bad_duplicates_without_sort(self, col_index): + # When the indexes do not share a common identity, duplicates are not + # allowed and append raises. + + df1 = pd.DataFrame([[1, 2, 3]], columns=col_index) + df2 = pd.DataFrame([[4, 5, 6]], columns=col_index) + df3 = pd.DataFrame([[7, 8, 9]], columns=col_index.copy()) # different + ctx = pytest.raises(InvalidIndexError, + match=r'Indexes with duplicates.*a is b.*') + with ctx: + result = df1.append([df3], sort=False) + with ctx: + result = df1.append([df2, df3], sort=False) + with ctx: + result = df1.append([df3, df2], sort=False) + with ctx: + result = df1.append([df3, df3], sort=False) + + @pytest.mark.parametrize('col_index', indexes_with_dups, ids=cls_name) + def test_duplicates_with_sort(self, col_index): + # When sort=True, indexes with duplicate values are not be allowed. + + df1 = pd.DataFrame([[1, 2, 3]], columns=col_index) + df2 = pd.DataFrame([[4, 5, 6]], columns=col_index.copy()) + ctx = pytest.raises(InvalidIndexError, + match=r'When sort=True, indexes with dupl.*') + + with ctx: + result = df1.append([], sort=True) + with ctx: + result = df1.append([df1], sort=True) + with ctx: + result = df1.append([df2], sort=True) + with ctx: + result = df1.append([df1, df1], sort=True) + with ctx: + result = df1.append([df1, df2], sort=True) + with ctx: + result = df1.append([df2, df1], sort=True) + with ctx: + result = df1.append([df2, df2], sort=True) + + def test_nosort_basic(self): + # When sort=False, the resulting columns come + # in the order that they appear in the inputs. + + nan = np.nan + + # NUMERIC INDEX TESTS + + # append [] + df = pd.DataFrame([[1, 2, 3]], columns=[0, 1, 2]) + result = df.append([], sort=False) + expected = df[[0, 1, 2]] + assert_frame_equal(result, expected) + + df = pd.DataFrame([[1, 2, 3]], columns=[2, 1, 0]) + result = df.append([], sort=False) + expected = df[[2, 1, 0]] + assert_frame_equal(result, expected) + + # append [df] + df1 = pd.DataFrame([[1, 2]], columns=[0.0, 1.0]) + df2 = pd.DataFrame([[1, 2]], columns=[0.5, 1.5], index=[1]) + result = df1.append(df2, sort=False) + expected = pd.DataFrame([[1, 2, nan, nan], + [nan, nan, 1, 2]], + columns=[0.0, 1.0, 0.5, 1.5]) + assert_frame_equal(result, expected) + + # append [df, df] + df1 = pd.DataFrame([[1, 2]], columns=[0.0, 1.0]) + df2 = pd.DataFrame([[1, 2]], columns=[0.3, 1.3], index=[1]) + df3 = pd.DataFrame([[1, 2]], columns=[0.6, 1.6], index=[2]) + result = df1.append([df2, df3], sort=False) + expected = pd.DataFrame([[1, 2, nan, nan, nan, nan], + [nan, nan, 1, 2, nan, nan], + [nan, nan, nan, nan, 1, 2]], + columns=[0.0, 1.0, 0.3, 1.3, 0.6, 1.6]) + assert_frame_equal(result, expected) + + # STRING INDEX TESTS + + # append [] + df = pd.DataFrame([[1, 2, 3]], columns=['a', 'b', 'c']) + result = df.append([], sort=False) + expected = df[['a', 'b', 'c']] + assert_frame_equal(result, expected) + + df = pd.DataFrame([[1, 2, 3]], columns=['c', 'b', 'a']) + result = df.append([], sort=False) + expected = df[['c', 'b', 'a']] + assert_frame_equal(result, expected) + + # append [df] + df1 = pd.DataFrame([[1, 2]], columns=['a', 'c']) + df2 = pd.DataFrame([[1, 2]], columns=['b', 'd'], index=[1]) + result = df1.append(df2, sort=False) + expected = pd.DataFrame([[1, 2, nan, nan], + [nan, nan, 1, 2]], + columns=['a', 'c', 'b', 'd']) + assert_frame_equal(result, expected) + + # append [df, df] + df1 = pd.DataFrame([[1, 2]], columns=['a', 'd']) + df2 = pd.DataFrame([[1, 2]], columns=['b', 'e'], index=[1]) + df3 = pd.DataFrame([[1, 2]], columns=['c', 'f'], index=[2]) + result = df1.append([df2, df3], sort=False) + expected = pd.DataFrame([[1, 2, nan, nan, nan, nan], + [nan, nan, 1, 2, nan, nan], + [nan, nan, nan, nan, 1, 2]], + columns=['a', 'd', 'b', 'e', 'c', 'f']) + assert_frame_equal(result, expected) + + def test_sort_basic(self): + # When sort=True, the resulting columns must come + # out sorted. + + nan = np.nan + + # NUMERIC INDEX TESTS + + # append [] + df = pd.DataFrame([[1, 2, 3]], columns=[0, 1, 2]) + result = df.append([], sort=True) + expected = df[[0, 1, 2]] + assert_frame_equal(result, expected) + + df = pd.DataFrame([[1, 2, 3]], columns=[2, 1, 0]) + result = df.append([], sort=True) + expected = df[[0, 1, 2]] + assert_frame_equal(result, expected) + + # append [df] + df1 = pd.DataFrame([[1, 2]], columns=[0.0, 1.0]) + df2 = pd.DataFrame([[1, 2]], columns=[0.5, 1.5], index=[1]) + result = df1.append(df2, sort=True) + expected = pd.DataFrame([[1, nan, 2, nan], + [nan, 1, nan, 2]], + columns=[0.0, 0.5, 1.0, 1.5]) + assert_frame_equal(result, expected) + + # append [df, df] + df1 = pd.DataFrame([[1, 2]], columns=[0.0, 1.0]) + df2 = pd.DataFrame([[1, 2]], columns=[0.3, 1.3], index=[1]) + df3 = pd.DataFrame([[1, 2]], columns=[0.6, 1.6], index=[2]) + result = df1.append([df2, df3], sort=True) + expected = pd.DataFrame([[1, nan, nan, 2, nan, nan], + [nan, 1, nan, nan, 2, nan], + [nan, nan, 1, nan, nan, 2]], + columns=[0.0, 0.3, 0.6, 1.0, 1.3, 1.6]) + assert_frame_equal(result, expected) + + # STRING INDEX TESTS + + # append [] + df = pd.DataFrame([[1, 2, 3]], columns=['a', 'b', 'c']) + result = df.append([], sort=True) + expected = df[['a', 'b', 'c']] + assert_frame_equal(result, expected) + + df = pd.DataFrame([[1, 2, 3]], columns=['c', 'b', 'a']) + result = df.append([], sort=True) + expected = df[['a', 'b', 'c']] + assert_frame_equal(result, expected) + + # append [df] + df1 = pd.DataFrame([[1, 2]], columns=['a', 'c']) + df2 = pd.DataFrame([[1, 2]], columns=['b', 'd'], index=[1]) + result = df1.append(df2, sort=True) + expected = pd.DataFrame([[1, nan, 2, nan], + [nan, 1, nan, 2]], + columns=['a', 'b', 'c', 'd']) + assert_frame_equal(result, expected) + + # append [df, df] + df1 = pd.DataFrame([[1, 2]], columns=['a', 'd']) + df2 = pd.DataFrame([[1, 2]], columns=['b', 'e'], index=[1]) + df3 = pd.DataFrame([[1, 2]], columns=['c', 'f'], index=[2]) + result = df1.append([df2, df3], sort=True) + expected = pd.DataFrame([[1, nan, nan, 2, nan, nan], + [nan, 1, nan, nan, 2, nan], + [nan, nan, 1, nan, nan, 2]], + columns=['a', 'b', 'c', 'd', 'e', 'f']) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('index2', indexes, ids=cls_name) + @pytest.mark.parametrize('index1', indexes, ids=cls_name) + def test_index_types_without_sort(self, index1, index2): + # We should be able to append to a DataFrame + # regardless of the type of its index. + + # TODO: check end of append and create tests (empty / IntervalIndex) + # TODO: implement different way for df.append([]) + from pandas.core.dtypes.generic import ABCIntervalIndex + if isinstance(index1, ABCIntervalIndex): + pytest.xfail("Cannot do df[interval] for IntervalIndex") + + # the code below should not raise any exceptions + df1 = pd.DataFrame([[1, 2, 3]], columns=index1) + df2 = pd.DataFrame([[4, 5, 6]], columns=index2, index=[1]) + df1.append([], sort=False) + df1.append([df2], sort=False) + df1.append([df2, df2], sort=False) + + @pytest.mark.parametrize( + 'index1, index2', + [(i1, i2) + for group in index_sort_groups + for i1, i2 in product(group, repeat=2)], + ids=cls_name + ) + def test_index_types_with_possible_sort(self, index1, index2): + # When the result of joining two indexes is sortable, + # we should not raise any exceptions. + + # TODO: check end of append and create tests (empty / IntervalIndex) + # TODO: implement different way for df.append([]) + from pandas.core.dtypes.generic import ABCIntervalIndex + if isinstance(index1, ABCIntervalIndex): + pytest.xfail("Cannot do df[interval] for IntervalIndex") + + df1 = pd.DataFrame([[1, 2, 3]], columns=index1) + df2 = pd.DataFrame([[4, 5, 6]], columns=index2, index=[1]) + df1.append([], sort=True) # sorts the original frame + df1.append([df2], sort=True) + df1.append([df2, df2], sort=True) + + @pytest.mark.parametrize( + 'index1, index2', + [(i1, i2) + for g1, g2 in product(index_sort_groups, repeat=2) + # different sort groups + if type(g1[0]) != type(g2[0]) + for i1, i2 in product(g1, g2)], + ids=cls_name + ) + def test_index_types_with_impossible_sort(self, index1, index2): + # When the result of joining two indexes is not sortable, + # we should raise an exception. + + # TODO: check end of append and create tests (empty / IntervalIndex) + # TODO: implement different way for df.append([]) + from pandas.core.dtypes.generic import ABCIntervalIndex + if isinstance(index1, ABCIntervalIndex): + pytest.xfail("Cannot do df[interval] for IntervalIndex") + + err_msg = r'The resulting columns could not be sorted\..*' + + df1 = pd.DataFrame([[1, 2, 3]], columns=index1) + df2 = pd.DataFrame([[4, 5, 6]], columns=index2, index=[1]) + + with pytest.raises(TypeError, match=err_msg): + df1.append([df2], sort=True) + with pytest.raises(TypeError, match=err_msg): + df1.append([df2, df2], sort=True) + + +class TestAppendRowsIndex(object): + @pytest.mark.parametrize('idx_name3', [None, 'foo', 'bar', 'baz']) + @pytest.mark.parametrize('idx_name2', [None, 'foo', 'bar', 'baz']) + @pytest.mark.parametrize('idx_name1', [None, 'foo', 'bar', 'baz']) + def test_preserve_index_name(self, sort, idx_name1, idx_name2, idx_name3): + # When appending, the name of the indexes + # of the base DataFrame must always be + # preserved in the result. + + df1 = pd.DataFrame([[1, 2, 3]]) + df2 = pd.DataFrame([[4, 5, 6]], index=[1]) + df3 = pd.DataFrame([[7, 8, 9]], index=[2]) + + df1.index.name = idx_name1 + df2.index.name = idx_name2 + df3.index.name = idx_name3 + + # append [] + result = df1.append([], sort=sort) + expected = df1.copy() + assert_frame_equal(result, expected) + + # append [df] + result = df1.append([df2], sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + expected.index.name = idx_name1 + assert_frame_equal(result, expected) + + # append [df, df] + result = df1.append([df2, df3], sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + expected.index.name = idx_name1 + assert_frame_equal(result, expected) + + def test_ignore_empty_index_dtype(self, sort): + # When one of the indexes is empty and of object dtype, it should be + # ignored in the result (as empty). + + df1 = pd.DataFrame() + df2 = pd.DataFrame([[11], [12], [13]], index=[1, 2, 3]) + + result1 = df1.append(df2, sort=sort) + result2 = df2.append(df1, sort=sort) + + expected = df2.copy() + assert_frame_equal(result1, expected) + assert_frame_equal(result2, expected) + + def test_account_empty_index_dtype(self, sort): + # When one of the indexes is empty and of dtype different from object, + # it should not be ignored when calculating the result dtype. + + df1 = pd.DataFrame(index=pd.Float64Index([])) + df2 = pd.DataFrame([[11], [12], [13]], index=[1, 2, 3]) + + result1 = df1.append(df2, sort=sort) + result2 = df2.append(df1, sort=sort) + + expected = df2.copy() + expected.index = [1.0, 2.0, 3.0] + assert_frame_equal(result1, expected) + assert_frame_equal(result2, expected) + + @pytest.mark.parametrize('index', indexes, ids=cls_name) + def test_preserve_index_type(self, sort, index): + # when there's only one index type in the inputs, + # it must be preserved in the output. + + index1 = index[:1] + index2 = index[1:2] + index_comb = index1.append(index2) + + df1 = pd.DataFrame([[1, 2, 3]], index=index1) + df2 = pd.DataFrame([[4, 5, 6]], index=index2) + result = df1.append(df2, sort=sort) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=index_comb) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('index2', indexes, ids=cls_name) + @pytest.mark.parametrize('index1', indexes, ids=cls_name) + def test_preserve_index_values(self, sort, index1, index2): + # When appending indexes of different types, we want + # the resulting index to preserve the exact indexes + # values. + + # Related to GH13626 + from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, ABCMultiIndex, ABCTimedeltaIndex + ) + if isinstance(index1, ABCMultiIndex): + if isinstance(index2, ABCDatetimeIndex): + pytest.xfail("MultiIndex + DatetimeIndex produces bad value") + if isinstance(index2, ABCTimedeltaIndex): + pytest.xfail("MultiIndex + TimedeltaIndex produces bad value") + + # Concat raises a TypeError when appending a CategoricalIndex + # with another type + from pandas.core.dtypes.generic import ABCCategoricalIndex + if isinstance(index1, ABCCategoricalIndex): + pytest.xfail("Cannot have a CategoricalIndex append to another typ") + + df1 = pd.DataFrame([[1, 2, 3]], index=index1[:1]) + df2 = pd.DataFrame([[4, 5, 6]], index=index2[:1]) + result = df1.append(df2, sort=sort) + assert index1[0] in result.index + assert index2[0] in result.index + + def test_duplicates_without_verify_integrity(self): + # When verify_integrity=False, the function should + # allow duplicate values in the rows index. + + raise NotImplementedError + + def test_duplicates_with_verify_integrity(self): + # When verify_integrity=True, the function should + # not allow duplicate values in the rows index (whether + # in the input or output). + + raise NotImplementedError + + def test_ignore_index(self): + # When ignore_index=True, the function should completely + # ignore the input indexes and generate one that is brand + # new (RangeIndex). + + raise NotImplementedError + + def test_warning_ignore_index_and_verify_integrity(self): + # It makes no sense to set verify_integrity=True when + # ignore_index=True. To warn of a possible user + # misunderstanding, append should raise a warning in + # this situation. + + raise NotImplementedError + + +class TestAppendDangling(object): + """Tests that have not been concretized yet + """ + + def test_append_unnamed_series_raises(self, sort): + dict_msg = 'Can only append a dict if ignore_index=True' + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + dict = { + 0: 7, + 1: 8, + 2: 9 + } + with pytest.raises(TypeError, match=dict_msg): + df.append(dict, sort=sort) + with pytest.raises(TypeError, match=dict_msg): + df.append([dict], sort=sort) + with pytest.raises(TypeError, match=dict_msg): + df.append([dict, dict], sort=sort) + + series_msg = 'Can only append a Series if ignore_index=True or .*' + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + series = pd.Series([7, 8, 9]) + with pytest.raises(TypeError, match=series_msg): + df.append(series, sort=sort) + with pytest.raises(TypeError, match=series_msg): + df.append([series], sort=sort) + with pytest.raises(TypeError, match=series_msg): + df.append([series, series], sort=sort) + + indexes = [ + None, + pd.Index([0, 1]), + pd.Index(['a', 'b']), + pd.Index(['a', 'b'], name='foo') + ] + + @pytest.mark.parametrize('index1', indexes, ids=lambda x: repr(x)) + @pytest.mark.parametrize('index2', indexes, ids=lambda x: repr(x)) + def test_append_ignore_index(self, sort, index1, index2): + # when appending with ignore_index=True, + # all index content must be forgotten + df1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=index1) + df2 = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=index2) + + result = df1.append(df2, ignore_index=True) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], + [1, 2, 3], [4, 5, 6]]) + assert_frame_equal(result, expected) + + result = df1.append([df2], ignore_index=True) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], + [1, 2, 3], [4, 5, 6]]) + assert_frame_equal(result, expected) + + result = df1.append([df2, df2], ignore_index=True) + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], + [1, 2, 3], [4, 5, 6], + [1, 2, 3], [4, 5, 6]]) + assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 762b04cc3bd4f..b9a5caeb4a2e7 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -904,7 +904,7 @@ def test_append_same_columns_type(self, index): ser_index = index[:2] ser = pd.Series([7, 8], index=ser_index, name=2) result = df.append(ser) - expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]], + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index) assert_frame_equal(result, expected) @@ -958,13 +958,13 @@ def test_append_different_columns_types_raises( df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append) ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other, name=2) - with pytest.raises(TypeError): + with pytest.raises((AttributeError, ValueError, TypeError)): df.append(ser) df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_cannot_append_with_other) ser = pd.Series([7, 8, 9], index=index_can_append, name=2) - with pytest.raises(TypeError): + with pytest.raises((AttributeError, ValueError, TypeError)): df.append(ser) def test_append_dtype_coerce(self, sort):