@@ -2770,9 +2770,10 @@ def plot_group(group, ax):
2770
2770
return fig
2771
2771
2772
2772
2773
- def hist_frame (data , column = None , by = None , grid = True , xlabelsize = None ,
2774
- xrot = None , ylabelsize = None , yrot = None , ax = None , sharex = False ,
2775
- sharey = False , figsize = None , layout = None , bins = 10 , ** kwds ):
2773
+ def hist_frame (data , column = None , weights = None , by = None , grid = True ,
2774
+ xlabelsize = None , xrot = None , ylabelsize = None , yrot = None , ax = None ,
2775
+ sharex = False , sharey = False , figsize = None , layout = None , bins = 10 ,
2776
+ ** kwds ):
2776
2777
"""
2777
2778
Draw histogram of the DataFrame's series using matplotlib / pylab.
2778
2779
@@ -2781,6 +2782,8 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None,
2781
2782
data : DataFrame
2782
2783
column : string or sequence
2783
2784
If passed, will be used to limit data to a subset of columns
2785
+ weights : string or sequence
2786
+ If passed, will be used to weight the data
2784
2787
by : object, optional
2785
2788
If passed, then used to form histograms for separate groups
2786
2789
grid : boolean, default True
@@ -2812,7 +2815,7 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None,
2812
2815
"""
2813
2816
2814
2817
if by is not None :
2815
- axes = grouped_hist (data , column = column , by = by , ax = ax , grid = grid , figsize = figsize ,
2818
+ axes = grouped_hist (data , column = column , weights = weights , by = by , ax = ax , grid = grid , figsize = figsize ,
2816
2819
sharex = sharex , sharey = sharey , layout = layout , bins = bins ,
2817
2820
xlabelsize = xlabelsize , xrot = xrot , ylabelsize = ylabelsize , yrot = yrot ,
2818
2821
** kwds )
@@ -2916,17 +2919,18 @@ def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None,
2916
2919
return axes
2917
2920
2918
2921
2919
- def grouped_hist (data , column = None , by = None , ax = None , bins = 50 , figsize = None ,
2920
- layout = None , sharex = False , sharey = False , rot = 90 , grid = True ,
2921
- xlabelsize = None , xrot = None , ylabelsize = None , yrot = None ,
2922
- ** kwargs ):
2922
+ def grouped_hist (data , column = None , weights = None , by = None , ax = None , bins = 50 ,
2923
+ figsize = None , layout = None , sharex = False , sharey = False , rot = 90 ,
2924
+ grid = True , xlabelsize = None , xrot = None , ylabelsize = None ,
2925
+ yrot = None , ** kwargs ):
2923
2926
"""
2924
2927
Grouped histogram
2925
2928
2926
2929
Parameters
2927
2930
----------
2928
2931
data: Series/DataFrame
2929
2932
column: object, optional
2933
+ weights: object, optional
2930
2934
by: object, optional
2931
2935
ax: axes, optional
2932
2936
bins: int, default 50
@@ -2942,12 +2946,20 @@ def grouped_hist(data, column=None, by=None, ax=None, bins=50, figsize=None,
2942
2946
-------
2943
2947
axes: collection of Matplotlib Axes
2944
2948
"""
2945
- def plot_group (group , ax ):
2946
- ax .hist (group .dropna ().values , bins = bins , ** kwargs )
2949
+ def plot_group (group , ax , weights = None ):
2950
+ if weights is not None :
2951
+ # remove fields where we have nan in weights OR in group
2952
+ # for both data sets
2953
+ inx_na = (np .isnan (weights )) | (np .isnan (group ))
2954
+ weights = weights .ix [~ inx_na ]
2955
+ group = group .ix [~ inx_na ]
2956
+ else :
2957
+ group = group .dropna ()
2958
+ ax .hist (group .values , weights = weights .values , bins = bins , ** kwargs )
2947
2959
2948
2960
xrot = xrot or rot
2949
2961
2950
- fig , axes = _grouped_plot (plot_group , data , column = column ,
2962
+ fig , axes = _grouped_plot (plot_group , data , column = column , weights = weights ,
2951
2963
by = by , sharex = sharex , sharey = sharey , ax = ax ,
2952
2964
figsize = figsize , layout = layout , rot = rot )
2953
2965
@@ -3034,9 +3046,9 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
3034
3046
return ret
3035
3047
3036
3048
3037
- def _grouped_plot (plotf , data , column = None , by = None , numeric_only = True ,
3038
- figsize = None , sharex = True , sharey = True , layout = None ,
3039
- rot = 0 , ax = None , ** kwargs ):
3049
+ def _grouped_plot (plotf , data , column = None , weights = None , by = None ,
3050
+ numeric_only = True , figsize = None , sharex = True , sharey = True ,
3051
+ layout = None , rot = 0 , ax = None , ** kwargs ):
3040
3052
from pandas import DataFrame
3041
3053
3042
3054
if figsize == 'default' :
@@ -3047,6 +3059,8 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
3047
3059
3048
3060
grouped = data .groupby (by )
3049
3061
if column is not None :
3062
+ if weights is not None :
3063
+ weights = grouped [weights ]
3050
3064
grouped = grouped [column ]
3051
3065
3052
3066
naxes = len (grouped )
@@ -3056,11 +3070,20 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
3056
3070
3057
3071
_axes = _flatten (axes )
3058
3072
3073
+ weight = None
3059
3074
for i , (key , group ) in enumerate (grouped ):
3060
3075
ax = _axes [i ]
3076
+ if weights is not None :
3077
+ weight = weights .get_group (key )
3061
3078
if numeric_only and isinstance (group , DataFrame ):
3062
3079
group = group ._get_numeric_data ()
3063
- plotf (group , ax , ** kwargs )
3080
+ if weight is not None :
3081
+ weight = weight ._get_numeric_data ()
3082
+ if weight is not None :
3083
+ plotf (group , ax , weight , ** kwargs )
3084
+ else :
3085
+ # scatterplot etc has not the weight implemented in plotf
3086
+ plotf (group , ax , ** kwargs )
3064
3087
ax .set_title (com .pprint_thing (key ))
3065
3088
3066
3089
return fig , axes
0 commit comments