diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 40efc4c65476a..301cfa53e3e0b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5881,13 +5881,13 @@ def pivot(self, index=None, columns=None, values=None): hierarchical columns whose top level are the function names (inferred from the function objects themselves) If dict is passed, the key is column to aggregate and value - is function or list of functions + is function or list of functions. fill_value : scalar, default None - Value to replace missing values with + Value to replace missing values with. margins : bool, default False - Add all row / columns (e.g. for subtotal / grand totals) + Add all row / columns (e.g. for subtotal / grand totals). dropna : bool, default True - Do not include columns whose entries are all NaN + Do not include columns whose entries are all NaN. margins_name : str, default 'All' Name of the row / column that will contain the totals when margins is True. @@ -5901,6 +5901,7 @@ def pivot(self, index=None, columns=None, values=None): Returns ------- DataFrame + An Excel style pivot table. See Also -------- diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index c9554016630cd..0c077702b4cb4 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1407,24 +1407,24 @@ def interval_range( Parameters ---------- start : numeric or datetime-like, default None - Left bound for generating intervals + Left bound for generating intervals. end : numeric or datetime-like, default None - Right bound for generating intervals + Right bound for generating intervals. periods : int, default None - Number of periods to generate + Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None - Name of the resulting IntervalIndex + Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Returns ------- - rng : IntervalIndex + IntervalIndex See Also -------- diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a0f16789621c7..ca7be9ba512da 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -997,28 +997,28 @@ def memory_usage(self, deep=False): def period_range(start=None, end=None, periods=None, freq=None, name=None): """ - Return a fixed frequency PeriodIndex, with day (calendar) as the default - frequency. + Return a fixed frequency PeriodIndex. + + The day (calendar) is the default frequency. Parameters ---------- start : str or period-like, default None - Left bound for generating periods + Left bound for generating periods. end : str or period-like, default None - Right bound for generating periods + Right bound for generating periods. periods : int, default None - Number of periods to generate + Number of periods to generate. freq : str or DateOffset, optional Frequency alias. By default the freq is taken from `start` or `end` if those are Period objects. Otherwise, the default is ``"D"`` for daily frequency. - name : str, default None - Name of the resulting PeriodIndex + Name of the resulting PeriodIndex. Returns ------- - prng : PeriodIndex + PeriodIndex Notes ----- diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 98fee491e0a73..f7d9462d2ec32 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -206,12 +206,12 @@ def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): Parameters ---------- df : DataFrame - The wide-format DataFrame + The wide-format DataFrame. stubnames : str or list-like The stub name(s). The wide format variables are assumed to start with the stub names. i : str or list-like - Column(s) to use as id variable(s) + Column(s) to use as id variable(s). j : str The name of the sub-observation variable. What you wish to name your suffix in the long format. @@ -219,14 +219,14 @@ def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): A character indicating the separation of the variable names in the wide format, to be stripped from the names in the long format. For example, if your column names are A-suffix1, A-suffix2, you - can strip the hyphen by specifying `sep='-'` + can strip the hyphen by specifying `sep='-'`. suffix : str, default '\\d+' A regular expression capturing the wanted suffixes. '\\d+' captures numeric suffixes. Suffixes with no numbers could be specified with the negated character class '\\D+'. You can also further disambiguate suffixes, for example, if your wide variables are of the form A-one, B-two,.., and you have an unrelated column A-rating, you can - ignore the last one by specifying `suffix='(!?one|two)'` + ignore the last one by specifying `suffix='(!?one|two)'`. .. versionchanged:: 0.23.0 When all suffixes are numeric, they are cast to int64/float64. @@ -360,7 +360,7 @@ def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): >>> stubnames = sorted( ... set([match[0] for match in df.columns.str.findall( - ... r'[A-B]\(.*\)').values if match != [] ]) + ... r'[A-B]\(.*\)').values if match != []]) ... ) >>> list(stubnames) ['A(weekly)', 'B(weekly)'] diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index a189b2cd1ab84..956642b51ce97 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -176,9 +176,10 @@ def merge_ordered( how="outer", ): """ - Perform merge with optional filling/interpolation designed for ordered - data like time series data. Optionally perform group-wise merge (see - examples). + Perform merge with optional filling/interpolation. + + Designed for ordered data like time series data. Optionally + perform group-wise merge (see examples). Parameters ---------- @@ -189,18 +190,18 @@ def merge_ordered( left_on : label or list, or array-like Field names to join on in left DataFrame. Can be a vector or list of vectors of the length of the DataFrame to use a particular vector as - the join key instead of columns + the join key instead of columns. right_on : label or list, or array-like Field names to join on in right DataFrame or vector/list of vectors per - left_on docs + left_on docs. left_by : column name or list of column names Group left DataFrame by group columns and merge piece by piece with - right DataFrame + right DataFrame. right_by : column name or list of column names Group right DataFrame by group columns and merge piece by piece with - left DataFrame + left DataFrame. fill_method : {'ffill', None}, default None - Interpolation method for data + Interpolation method for data. suffixes : Sequence, default is ("_x", "_y") A length-2 sequence where each element is optionally a string indicating the suffix to add to overlapping column names in @@ -214,13 +215,13 @@ def merge_ordered( * left: use only keys from left frame (SQL: left outer join) * right: use only keys from right frame (SQL: right outer join) * outer: use union of keys from both frames (SQL: full outer join) - * inner: use intersection of keys from both frames (SQL: inner join) + * inner: use intersection of keys from both frames (SQL: inner join). Returns ------- - merged : DataFrame - The output type will the be same as 'left', if it is a subclass - of DataFrame. + DataFrame + The merged DataFrame output type will the be same as + 'left', if it is a subclass of DataFrame. See Also -------- @@ -229,15 +230,21 @@ def merge_ordered( Examples -------- - >>> A >>> B - key lvalue group key rvalue - 0 a 1 a 0 b 1 - 1 c 2 a 1 c 2 - 2 e 3 a 2 d 3 + >>> A + key lvalue group + 0 a 1 a + 1 c 2 a + 2 e 3 a 3 a 1 b 4 c 2 b 5 e 3 b + >>> B + Key rvalue + 0 b 1 + 1 c 2 + 2 d 3 + >>> merge_ordered(A, B, fill_method='ffill', left_by='group') group key lvalue rvalue 0 a a 1 NaN diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 70143e4603a4b..bb8d15896b727 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -577,14 +577,12 @@ def to_datetime( Parameters ---------- - arg : int, float, str, datetime, list, tuple, 1-d array, Series - or DataFrame/dict-like - + arg : int, float, str, datetime, list, tuple, 1-d array, Series DataFrame/dict-like + The object to convert to a datetime. errors : {'ignore', 'raise', 'coerce'}, default 'raise' - - - If 'raise', then invalid parsing will raise an exception - - If 'coerce', then invalid parsing will be set as NaT - - If 'ignore', then invalid parsing will return the input + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaT. + - If 'ignore', then invalid parsing will return the input. dayfirst : bool, default False Specify a date parse order if `arg` is str or its list-likes. If True, parses dates with the day first, eg 10/11/12 is parsed as @@ -605,7 +603,6 @@ def to_datetime( Return UTC DatetimeIndex if True (converting any tz-aware datetime.datetime objects as well). box : bool, default True - - If True returns a DatetimeIndex or Index-like object - If False returns ndarray of values. @@ -615,17 +612,17 @@ def to_datetime( respectively. format : str, default None - strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse + The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse all the way up to nanoseconds. See strftime documentation for more information on choices: - https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior + https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior. exact : bool, True by default - + Behaves as: - If True, require an exact format match. - If False, allow the format to match anywhere in the target string. unit : str, default 'ns' - unit of the arg (D,s,ms,us,ns) denote the unit, which is an + The unit of the arg (D,s,ms,us,ns) denote the unit, which is an integer or float number. This will be based off the origin. Example, with unit='ms' and origin='unix' (the default), this would calculate the number of milliseconds to the unix epoch start. @@ -652,11 +649,12 @@ def to_datetime( .. versionadded:: 0.23.0 .. versionchanged:: 0.25.0 - - changed default value from False to True + - changed default value from False to True. Returns ------- - ret : datetime if parsing succeeded. + datetime + If parsing succeeded. Return type depends on input: - list-like: DatetimeIndex @@ -712,10 +710,10 @@ def to_datetime( 4 3/12/2000 dtype: object - >>> %timeit pd.to_datetime(s,infer_datetime_format=True) # doctest: +SKIP + >>> %timeit pd.to_datetime(s, infer_datetime_format=True) # doctest: +SKIP 100 loops, best of 3: 10.4 ms per loop - >>> %timeit pd.to_datetime(s,infer_datetime_format=False) # doctest: +SKIP + >>> %timeit pd.to_datetime(s, infer_datetime_format=False) # doctest: +SKIP 1 loop, best of 3: 471 ms per loop Using a unix epoch time diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 011ea1b8e42f2..23c370638b572 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -67,11 +67,11 @@ def hash_pandas_object( Parameters ---------- index : bool, default True - include the index in the hash (if Series/DataFrame) + Include the index in the hash (if Series/DataFrame). encoding : str, default 'utf8' - encoding for data & key when strings + Encoding for data & key when strings. hash_key : str, default _default_hash_key - hash_key for string key to encode + Hash_key for string key to encode. categorize : bool, default True Whether to first categorize object arrays before hashing. This is more efficient when the array contains duplicate values. @@ -253,9 +253,9 @@ def hash_array( ---------- vals : ndarray, Categorical encoding : str, default 'utf8' - encoding for data & key when strings + Encoding for data & key when strings. hash_key : str, default _default_hash_key - hash_key for string key to encode + Hash_key for string key to encode. categorize : bool, default True Whether to first categorize object arrays before hashing. This is more efficient when the array contains duplicate values. diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 1f1ad55969d6f..d0ab6dd37596c 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -79,8 +79,6 @@ subset of data is selected with ``usecols``, index_col is based on the subset. usecols : int, str, list-like, or callable default None - Return a subset of the columns. - * If None, then parse all columns. * If int, then indicates last column to be parsed. @@ -98,6 +96,8 @@ * If callable, then evaluate each column name against it and parse the column if the callable returns ``True``. + Returns a subset of the columns according to behavior above. + .. versionadded:: 0.24.0 squeeze : bool, default False diff --git a/pandas/io/packers.py b/pandas/io/packers.py index c0ace7996e1b9..253441ab25813 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -191,7 +191,7 @@ def read_msgpack(path_or_buf, encoding="utf-8", iterator=False, **kwargs): ``StringIO``. encoding : Encoding for decoding msgpack str type iterator : boolean, if True, return an iterator to the unpacker - (default is False) + (default is False). Returns -------