Skip to content

Commit 5e0eb41

Browse files
dberenbaumDave Berenbaum
authored and
Dave Berenbaum
committed
ENH: option to return -inf/inf as lower/upper bound for qcut quantiles, see pandas-dev#17282
1 parent d30c4a0 commit 5e0eb41

File tree

3 files changed

+16
-1
lines changed

3 files changed

+16
-1
lines changed

doc/source/whatsnew/v0.24.0.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ New features
1717

1818
- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
1919

20+
- :func: qcut now accepts ``bounded`` as a keyword argument, allowing for unbounded quantiles such that the lower/upper bounds are -inf/inf (:issue:`17282`)
21+
2022
.. _whatsnew_0240.enhancements.extension_array_operators:
2123

2224
``ExtensionArray`` operator support

pandas/core/reshape/tile.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
238238
series_index, name, dtype)
239239

240240

241-
def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
241+
def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise',
242+
bounded=True):
242243
"""
243244
Quantile-based discretization function. Discretize variable into
244245
equal-sized buckets based on rank or based on sample quantiles. For example
@@ -262,6 +263,9 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
262263
The precision at which to store and display the bins labels
263264
duplicates : {default 'raise', 'drop'}, optional
264265
If bin edges are not unique, raise ValueError or drop non-uniques.
266+
bounded : bool, optional
267+
Use the min/max of the distribution as the lower/upper bounds if True,
268+
otherwise use -inf/inf. Ignored if dtype is datetime/timedelta.
265269
266270
.. versionadded:: 0.20.0
267271
@@ -302,6 +306,9 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
302306
else:
303307
quantiles = q
304308
bins = algos.quantile(x, quantiles)
309+
if not bounded and not dtype:
310+
bins[0] = -np.inf
311+
bins[1] = np.inf
305312
fac, bins = _bins_to_cuts(x, bins, labels=labels,
306313
precision=precision, include_lowest=True,
307314
dtype=dtype, duplicates=duplicates)

pandas/tests/reshape/test_tile.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,12 @@ def test_cut_read_only(self, array_1_writeable, array_2_writeable):
479479
tm.assert_categorical_equal(cut(hundred_elements, array_1),
480480
cut(hundred_elements, array_2))
481481

482+
def test_qcut_unbounded(self):
483+
result = qcut(range(5), 4, bounded=False)
484+
expected = [(-np.inf, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0],
485+
(3.0, np.inf]]
486+
assert result == expected
487+
482488

483489
class TestDatelike(object):
484490

0 commit comments

Comments
 (0)