From d77de8aae482a9d0a7f114945c184bc220486e9f Mon Sep 17 00:00:00 2001 From: Julio Martinez Date: Sat, 10 Mar 2018 15:07:31 +0100 Subject: [PATCH 01/11] DOC: improve the docstring of DataFrame.plot.hist() --- pandas/plotting/_core.py | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 98fdcf8f94ae0..897e13e6fc735 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2772,20 +2772,47 @@ def box(self, by=None, **kwds): def hist(self, by=None, bins=10, **kwds): """ - Histogram + Draw one histogram of the DataFrame's Series using matplotlib. + + A histogram is a representation of the distribution of data. + This function groups the values of all given Series in the DataFrame + into bins, and draws all bins in only one matplotlib.AxesSubplot. This + is useful when the DataFrame's Series are in a similar scale. Parameters ---------- - by : string or sequence + by : str Column in the DataFrame to group by. - bins: integer, default 10 - Number of histogram bins to be used - `**kwds` : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + bins : int, default 10 + Number of histogram bins to be used. + **kwds : optional + Parameters to pass on to :meth:`pandas.DataFrame.plot`. Returns ------- axes : matplotlib.AxesSubplot or np.array of them + + See Also + -------- + :meth:`pandas.DataFrame.hist` : Draw histograms per DataFrame's Series. + :meth:`pandas.Series.hist` : Draw a histogram with Series' data. + + Examples + -------- + + When using values between 0 and 3, calling hist() with bins = 3 will + create three bins: one that groups values between 0 and 1, another + for values between 1 and 2, and another for values between 2 and 3. + We use alpha parameter to be able to see overlapping columns. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'length': [ 1.5, 0.5, 1.2, 0.9, 3], + ... 'width': [ 0.7, 0.2, 0.15, 0.2, 1.1] + ... }, index= ['pig', 'rabbit', 'duck', 'chicken', 'horse']) + >>> hist = df.plot.hist(bins = 3, xticks = range(4), alpha = 0.5) """ return self(kind='hist', by=by, bins=bins, **kwds) From 28f1043b333fdafed6b843b91ff21f012ac936e7 Mon Sep 17 00:00:00 2001 From: Julio Martinez Date: Sat, 10 Mar 2018 15:31:33 +0100 Subject: [PATCH 02/11] DOC: Improve pandas.DataFrame.plot.hist docstring --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 897e13e6fc735..a83f4663f4364 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2790,7 +2790,7 @@ def hist(self, by=None, bins=10, **kwds): Returns ------- - axes : matplotlib.AxesSubplot or np.array of them + axes : matplotlib.AxesSubplot or np.array of them. See Also -------- From a494111056b772e179e0823214cef001198156e0 Mon Sep 17 00:00:00 2001 From: Julio Martinez Date: Sat, 10 Mar 2018 15:44:59 +0100 Subject: [PATCH 03/11] DOC: Remove unnecesary empty line in pandas.DataFrame.plot.hist docstring --- pandas/plotting/_core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index a83f4663f4364..be1db432f5442 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2799,7 +2799,6 @@ def hist(self, by=None, bins=10, **kwds): Examples -------- - When using values between 0 and 3, calling hist() with bins = 3 will create three bins: one that groups values between 0 and 1, another for values between 1 and 2, and another for values between 2 and 3. From ddd6ebfe14e5438b2c7fe2ef32de0b7083757eb8 Mon Sep 17 00:00:00 2001 From: Julio Martinez Date: Sat, 10 Mar 2018 15:55:06 +0100 Subject: [PATCH 04/11] DOC: improve the pandas.DataFrame.plot.hist docstring --- pandas/plotting/_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index be1db432f5442..50db3dbc7ae61 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2786,11 +2786,11 @@ def hist(self, by=None, bins=10, **kwds): bins : int, default 10 Number of histogram bins to be used. **kwds : optional - Parameters to pass on to :meth:`pandas.DataFrame.plot`. + Keyword arguments to pass on to :meth:`pandas.DataFrame.plot`. Returns ------- - axes : matplotlib.AxesSubplot or np.array of them. + axes : matplotlib.AxesSubplot histogram. See Also -------- From a39598384d861a6cca57e91b8cd93eebc92c661f Mon Sep 17 00:00:00 2001 From: Julio Martinez Date: Sat, 10 Mar 2018 17:43:39 +0100 Subject: [PATCH 05/11] DOC: improve the pandas.DataFrame.plot.hist docstring --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 50db3dbc7ae61..a5c3c87890c54 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2781,7 +2781,7 @@ def hist(self, by=None, bins=10, **kwds): Parameters ---------- - by : str + by : str or sequence Column in the DataFrame to group by. bins : int, default 10 Number of histogram bins to be used. From ccadb3479a94b70175ec4537043a23f142acdebc Mon Sep 17 00:00:00 2001 From: Julio Martinez Date: Tue, 13 Mar 2018 22:58:02 +0100 Subject: [PATCH 06/11] DOC: fix issues in DataFrame.plot.hist docstring --- pandas/plotting/_core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 1f5c97f3045ba..fbe241f398dfb 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2951,7 +2951,7 @@ def box(self, by=None, **kwds): def hist(self, by=None, bins=10, **kwds): """ - Draw one histogram of the DataFrame's Series using matplotlib. + Draw one histogram of the DataFrame's columns. A histogram is a representation of the distribution of data. This function groups the values of all given Series in the DataFrame @@ -2960,7 +2960,7 @@ def hist(self, by=None, bins=10, **kwds): Parameters ---------- - by : str or sequence + by : str or sequence, optional Column in the DataFrame to group by. bins : int, default 10 Number of histogram bins to be used. @@ -2973,8 +2973,8 @@ def hist(self, by=None, bins=10, **kwds): See Also -------- - :meth:`pandas.DataFrame.hist` : Draw histograms per DataFrame's Series. - :meth:`pandas.Series.hist` : Draw a histogram with Series' data. + pandas.DataFrame.hist : Draw histograms per DataFrame's Series. + pandas.Series.hist : Draw a histogram with Series' data. Examples -------- @@ -2989,7 +2989,7 @@ def hist(self, by=None, bins=10, **kwds): >>> df = pd.DataFrame({ ... 'length': [ 1.5, 0.5, 1.2, 0.9, 3], ... 'width': [ 0.7, 0.2, 0.15, 0.2, 1.1] - ... }, index= ['pig', 'rabbit', 'duck', 'chicken', 'horse']) + ... }, index = ['pig', 'rabbit', 'duck', 'chicken', 'horse']) >>> hist = df.plot.hist(bins = 3, xticks = range(4), alpha = 0.5) """ return self(kind='hist', by=by, bins=bins, **kwds) From df0506090bbafc82aec07b2a2cc817af5cda3686 Mon Sep 17 00:00:00 2001 From: Julio Martinez Date: Thu, 15 Mar 2018 23:39:18 +0100 Subject: [PATCH 07/11] DOC: fix details in DataFrame.plot.hist docstring --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index fbe241f398dfb..907ca076a1d30 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2965,7 +2965,7 @@ def hist(self, by=None, bins=10, **kwds): bins : int, default 10 Number of histogram bins to be used. **kwds : optional - Keyword arguments to pass on to :meth:`pandas.DataFrame.plot`. + Additional keyword arguments are documented in :meth:`pandas.DataFrame.plot`. Returns ------- From dbed27e54d3cb891a535bf68cb00338ad4edd85c Mon Sep 17 00:00:00 2001 From: Julio Martinez Date: Thu, 15 Mar 2018 23:41:41 +0100 Subject: [PATCH 08/11] DOC: fix PEP8 in DataFrame.plot.hist docstring --- pandas/plotting/_core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 907ca076a1d30..35153812c3752 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2965,7 +2965,8 @@ def hist(self, by=None, bins=10, **kwds): bins : int, default 10 Number of histogram bins to be used. **kwds : optional - Additional keyword arguments are documented in :meth:`pandas.DataFrame.plot`. + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. Returns ------- From 37adb70d872bb9c6311df5f53bcc0ab8f5d3ad33 Mon Sep 17 00:00:00 2001 From: Julio Martinez Date: Sun, 18 Mar 2018 21:35:50 +0100 Subject: [PATCH 09/11] DOC: Improve DataFrame.plot.hist docstring --- pandas/plotting/_core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 35153812c3752..163223dc00981 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2955,8 +2955,8 @@ def hist(self, by=None, bins=10, **kwds): A histogram is a representation of the distribution of data. This function groups the values of all given Series in the DataFrame - into bins, and draws all bins in only one matplotlib.AxesSubplot. This - is useful when the DataFrame's Series are in a similar scale. + into bins, and draws all bins in only one :ref:`matplotlib.axes.Axes`. + This is useful when the DataFrame's Series are in a similar scale. Parameters ---------- @@ -2964,7 +2964,7 @@ def hist(self, by=None, bins=10, **kwds): Column in the DataFrame to group by. bins : int, default 10 Number of histogram bins to be used. - **kwds : optional + **kwds Additional keyword arguments are documented in :meth:`pandas.DataFrame.plot`. @@ -2974,8 +2974,8 @@ def hist(self, by=None, bins=10, **kwds): See Also -------- - pandas.DataFrame.hist : Draw histograms per DataFrame's Series. - pandas.Series.hist : Draw a histogram with Series' data. + DataFrame.hist : Draw histograms per DataFrame's Series. + Series.hist : Draw a histogram with Series' data. Examples -------- From 53b43aaf8de6975a6f3569dbf495b2bbfc4530bc Mon Sep 17 00:00:00 2001 From: Julio Martinez Date: Mon, 19 Mar 2018 21:40:58 +0100 Subject: [PATCH 10/11] DOC: Improve example in DataFrame.plot.hist docstring --- pandas/plotting/_core.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 163223dc00981..9e6f73d796026 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2979,19 +2979,18 @@ def hist(self, by=None, bins=10, **kwds): Examples -------- - When using values between 0 and 3, calling hist() with bins = 3 will - create three bins: one that groups values between 0 and 1, another - for values between 1 and 2, and another for values between 2 and 3. - We use alpha parameter to be able to see overlapping columns. + When we draw a dice 6000 times, we expect to get each value around 1000 + times. But when we draw two dices and sum the result, the distribution + is going to be quite different. Let's display it. .. plot:: :context: close-figs - >>> df = pd.DataFrame({ - ... 'length': [ 1.5, 0.5, 1.2, 0.9, 3], - ... 'width': [ 0.7, 0.2, 0.15, 0.2, 1.1] - ... }, index = ['pig', 'rabbit', 'duck', 'chicken', 'horse']) - >>> hist = df.plot.hist(bins = 3, xticks = range(4), alpha = 0.5) + >>> df = pd.DataFrame( + ... np.random.randint(1, 7, 6000), + ... columns = ['one']) + >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) + >>> hist = df.plot.hist(bins = 12, alpha = 0.5) """ return self(kind='hist', by=by, bins=bins, **kwds) From 627170ed3caf58521adca6363e7eda8d72676c02 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 19 Mar 2018 21:56:36 +0100 Subject: [PATCH 11/11] pep8 --- pandas/plotting/_core.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 9e6f73d796026..2da9ad597a0bc 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2981,7 +2981,8 @@ def hist(self, by=None, bins=10, **kwds): -------- When we draw a dice 6000 times, we expect to get each value around 1000 times. But when we draw two dices and sum the result, the distribution - is going to be quite different. Let's display it. + is going to be quite different. A histogram illustrates those + distributions. .. plot:: :context: close-figs @@ -2990,7 +2991,7 @@ def hist(self, by=None, bins=10, **kwds): ... np.random.randint(1, 7, 6000), ... columns = ['one']) >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) - >>> hist = df.plot.hist(bins = 12, alpha = 0.5) + >>> ax = df.plot.hist(bins=12, alpha=0.5) """ return self(kind='hist', by=by, bins=bins, **kwds)