Skip to content

Commit fbc4425

Browse files
committed
Updates
* indexer -> indices * doc user-facing vs physical * assert na_cmps * test reindex w/ non-NA fill_value
1 parent 741f284 commit fbc4425

File tree

7 files changed

+76
-39
lines changed

7 files changed

+76
-39
lines changed

pandas/core/algorithms.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,8 +1448,9 @@ def func(arr, indexer, out, fill_value=np.nan):
14481448
return func
14491449

14501450

1451-
def take(arr, indexer, allow_fill=False, fill_value=None):
1452-
"""Take elements from an array.
1451+
def take(arr, indices, allow_fill=False, fill_value=None):
1452+
"""
1453+
Take elements from an array.
14531454
14541455
.. versionadded:: 0.23.0
14551456
@@ -1458,22 +1459,23 @@ def take(arr, indexer, allow_fill=False, fill_value=None):
14581459
arr : sequence
14591460
Non array-likes (sequences without a dtype) are coereced
14601461
to an ndarray.
1461-
indexer : sequence of integers
1462+
indices : sequence of integers
14621463
Indices to be taken.
14631464
allow_fill : bool, default False
1464-
How to handle negative values in `indexer`.
1465+
How to handle negative values in `indices`.
14651466
1466-
* False: negative values in `indexer` indicate
1467-
slices from the right (the default)
1467+
* False: negative values in `indices` indicate indexing from
1468+
the right (the default). This is similar to :func:`numpy.take`.
14681469
1469-
* True: negative values in `indexer` indicate
1470+
* True: negative values in `indices` indicate
14701471
missing values. These values are set to `fill_value`. Any other
14711472
other negative values raise a ``ValueError``.
14721473
14731474
fill_value : any, optional
14741475
Fill value to use for NA-indicies when `allow_fill` is True.
14751476
This may be ``None``, in which case the default NA value for
1476-
the type, ``self.dtype.na_value``, is used.
1477+
the type is used. For ndarrays, :attr:`numpy.nan` is used. For
1478+
ExtensionArrays, a different value may be used.
14771479
14781480
Returns
14791481
-------
@@ -1483,17 +1485,17 @@ def take(arr, indexer, allow_fill=False, fill_value=None):
14831485
Raises
14841486
------
14851487
IndexError
1486-
When the indexer is out of bounds for the array.
1488+
When `indices` is out of bounds for the array.
14871489
ValueError
14881490
When the indexer contains negative values other than ``-1``
14891491
and `allow_fill` is True.
14901492
14911493
Notes
14921494
-----
1493-
When `allow_fill` is False, `indexer` may be whatever dimensionality
1495+
When `allow_fill` is False, `indices` may be whatever dimensionality
14941496
is accepted by NumPy for `arr`.
14951497
1496-
When `allow_fill` is True, `indexer` should be 1-D.
1498+
When `allow_fill` is True, `indices` should be 1-D.
14971499
14981500
See Also
14991501
--------
@@ -1524,15 +1526,15 @@ def take(arr, indexer, allow_fill=False, fill_value=None):
15241526
arr = np.asarray(arr)
15251527

15261528
# Do we require int64 or intp here?
1527-
indexer = np.asarray(indexer, dtype='int')
1529+
indices = np.asarray(indices, dtype='int')
15281530

15291531
if allow_fill:
15301532
# Pandas style, -1 means NA
1531-
validate_indices(indexer, len(arr))
1532-
result = take_1d(arr, indexer, allow_fill=True, fill_value=fill_value)
1533+
validate_indices(indices, len(arr))
1534+
result = take_1d(arr, indices, allow_fill=True, fill_value=fill_value)
15331535
else:
15341536
# NumPy style
1535-
result = arr.take(indexer)
1537+
result = arr.take(indices)
15361538
return result
15371539

15381540

pandas/core/arrays/base.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -463,45 +463,51 @@ def factorize(self, na_sentinel=-1):
463463
# Indexing methods
464464
# ------------------------------------------------------------------------
465465

466-
def take(self, indexer, allow_fill=False, fill_value=None):
466+
def take(self, indices, allow_fill=False, fill_value=None):
467467
# type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray
468468
"""Take elements from an array.
469469
470470
Parameters
471471
----------
472-
indexer : sequence of integers
472+
indices : sequence of integers
473473
Indices to be taken. See Notes for how negative indicies
474474
are handled.
475475
allow_fill : bool, default False
476-
How to handle negative values in `indexer`.
476+
How to handle negative values in `indices`.
477477
478-
For False values (the default), negative values in `indexer`
478+
For False values (the default), negative values in `indices`
479479
indiciate slices from the right.
480480
481-
For True values, indicies where `indexer` is ``-1`` indicate
481+
For True values, indicies where `indices` is ``-1`` indicate
482482
missing values. These values are set to `fill_value`. Any other
483483
other negative value should raise a ``ValueError``.
484484
fill_value : any, optional
485485
Fill value to use for NA-indicies when `allow_fill` is True.
486486
This may be ``None``, in which case the default NA value for
487487
the type, ``self.dtype.na_value``, is used.
488488
489+
For many ExtensionArrays, there will be two representations of
490+
`fill_value`: a user-facing "boxed" scalar, and a low-level
491+
physical NA value. `fill_value` should be the user-facing version,
492+
and the implementation should handle translating that to the
493+
physical version for processing the take if nescessary.
494+
489495
Returns
490496
-------
491497
ExtensionArray
492498
493499
Raises
494500
------
495501
IndexError
496-
When the indexer is out of bounds for the array.
502+
When the indices are out of bounds for the array.
497503
ValueError
498-
When the indexer contains negative values other than ``-1``
504+
When `indices` contains negative values other than ``-1``
499505
and `allow_fill` is True.
500506
501507
Notes
502508
-----
503509
ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
504-
``iloc``, when the indexer is a sequence of values. Additionally,
510+
``iloc``, when `indices` is a sequence of values. Additionally,
505511
it's called by :meth:`Series.reindex`, or any other method
506512
that causes realignemnt, with a `fill_value`.
507513
@@ -518,14 +524,17 @@ def take(self, indexer, allow_fill=False, fill_value=None):
518524
519525
.. code-block:: python
520526
521-
def take(self, indexer, allow_fill=False, fill_value=None):
527+
def take(self, indices, allow_fill=False, fill_value=None):
522528
from pandas.core.algorithms import take
523529
530+
# If the ExtensionArray is backed by an ndarray, then
531+
# just pass that here instead of coercing to object.
524532
data = self.astype(object)
533+
525534
if allow_fill and fill_value is None:
526535
fill_value = self.dtype.na_value
527536
528-
result = take(data, indexer, fill_value=fill_value,
537+
result = take(data, indices, fill_value=fill_value,
529538
allow_fill=allow_fill)
530539
return self._from_sequence(result)
531540
"""

pandas/core/dtypes/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ class _DtypeOpsMixin(object):
1717
# class's methods can be moved to ExtensionDtype and removed.
1818

1919
# na_value is the default NA value to use for this type. This is used in
20-
# e.g. ExtensionArray.take.
20+
# e.g. ExtensionArray.take. This should be the user-facing "boxed" version
21+
# of the NA value, not the physical NA vaalue for storage.
2122
na_value = np.nan
2223

2324
def __eq__(self, other):

pandas/core/internals.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5405,9 +5405,6 @@ def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy):
54055405

54065406
for placement, join_units in concat_plan:
54075407

5408-
# The issue: we have a join unit (or maybe several) that needs to be
5409-
# reindexed.
5410-
54115408
if len(join_units) == 1 and not join_units[0].indexers:
54125409
b = join_units[0].block
54135410
values = b.values

pandas/tests/extension/base/getitem.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,11 @@ def test_take(self, data, na_value, na_cmp):
127127
result = data.take([0, -1])
128128
assert result.dtype == data.dtype
129129
assert result[0] == data[0]
130-
na_cmp(result[1], na_value)
130+
assert result[1] == data[-1]
131+
132+
result = data.take([0, -1], allow_fill=True, fill_value=na_value)
133+
assert result[0] == data[0]
134+
assert na_cmp(result[1], na_value)
131135

132136
with tm.assert_raises_regex(IndexError, "out of bounds"):
133137
data.take([len(data) + 1])
@@ -136,7 +140,7 @@ def test_take_empty(self, data, na_value, na_cmp):
136140
empty = data[:0]
137141

138142
result = empty.take([-1], allow_fill=True)
139-
na_cmp(result[0], na_value)
143+
assert na_cmp(result[0], na_value)
140144

141145
with pytest.raises(IndexError):
142146
empty.take([-1])
@@ -170,7 +174,6 @@ def test_take_out_of_bounds_raises(self, data, allow_fill):
170174
with pytest.raises(IndexError):
171175
arr.take(np.asarray([0, 3]), allow_fill=allow_fill)
172176

173-
@pytest.mark.xfail(reason="Series.take with extension array buggy for -1")
174177
def test_take_series(self, data):
175178
s = pd.Series(data)
176179
result = s.take([0, -1])
@@ -196,3 +199,14 @@ def test_reindex(self, data, na_value):
196199
expected = pd.Series(data._from_sequence([na_value, na_value]),
197200
index=[n, n + 1])
198201
self.assert_series_equal(result, expected)
202+
203+
def test_reindex_non_na_fill_value(self, data_missing):
204+
valid = data_missing[1]
205+
na = data_missing[0]
206+
207+
array = data_missing._from_sequence([na, valid])
208+
ser = pd.Series(array)
209+
result = ser.reindex([0, 1, 2], fill_value=valid)
210+
expected = pd.Series(data_missing._from_sequence([na, valid, valid]))
211+
212+
self.assert_series_equal(result, expected)

pandas/tests/extension/decimal/test_decimal.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,15 @@ class TestReshaping(BaseDecimal, base.BaseReshapingTests):
108108

109109

110110
class TestGetitem(BaseDecimal, base.BaseGetitemTests):
111-
pass
111+
112+
def test_take_na_value_other_decimal(self):
113+
arr = DecimalArray([decimal.Decimal('1.0'),
114+
decimal.Decimal('2.0')])
115+
result = arr.take([0, -1], allow_fill=True,
116+
fill_value=decimal.Decimal('-1.0'))
117+
expected = DecimalArray([decimal.Decimal('1.0'),
118+
decimal.Decimal('-1.0')])
119+
self.assert_extension_array_equal(result, expected)
112120

113121

114122
class TestMissing(BaseDecimal, base.BaseMissingTests):

pandas/tests/extension/json/array.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
"""Test extension array for storing nested data in a pandas container.
2+
3+
The JSONArray stores lists of dictionaries. The storage mechanism is a list,
4+
not an ndarray.
5+
6+
Note:
7+
8+
We currently store lists of UserDicts (Py3 only). Pandas has a few places
9+
internally that specifically check for dicts, and does non-scalar things
10+
in that case. We *want* the dictionaries to be treated as scalars, so we
11+
hack around pandas by using UserDicts.
12+
"""
113
import collections
214
import itertools
315
import numbers
@@ -125,12 +137,6 @@ def take(self, indexer, allow_fill=False, fill_value=None):
125137

126138
return self._from_sequence(output)
127139

128-
# def astype(self, dtype, copy=True):
129-
# # NumPy has issues when all the dicts are the same length.
130-
# # np.array([UserDict(...), UserDict(...)]) fails,
131-
# # but np.array([{...}, {...}]) works, so cast.
132-
# return np.array([dict(x) for x in self], dtype=dtype, copy=copy)
133-
134140
def copy(self, deep=False):
135141
return type(self)(self.data[:])
136142

0 commit comments

Comments
 (0)