Skip to content

Commit 035e512

Browse files
committed
REF: deduplicate _NDFrameIndexer._multi_take code
1 parent 9e982e1 commit 035e512

File tree

2 files changed

+84
-96
lines changed

2 files changed

+84
-96
lines changed

pandas/core/frame.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2723,7 +2723,8 @@ def _getitem_array(self, key):
27232723
indexer = key.nonzero()[0]
27242724
return self._take(indexer, axis=0)
27252725
else:
2726-
indexer = self.loc._convert_to_indexer(key, axis=1)
2726+
indexer = self.loc._convert_to_indexer(key, axis=1,
2727+
raise_missing=True)
27272728
return self._take(indexer, axis=1)
27282729

27292730
def _getitem_multilevel(self, key):

pandas/core/indexing.py

Lines changed: 82 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -927,28 +927,8 @@ def _multi_take(self, tup):
927927
"""
928928
try:
929929
o = self.obj
930-
d = {}
931-
for key, axis in zip(tup, o._AXIS_ORDERS):
932-
ax = o._get_axis(axis)
933-
# Have the index compute an indexer or return None
934-
# if it cannot handle:
935-
indexer, keyarr = ax._convert_listlike_indexer(key,
936-
kind=self.name)
937-
# We only act on all found values:
938-
if indexer is not None and (indexer != -1).all():
939-
self._validate_read_indexer(key, indexer, axis)
940-
d[axis] = (ax[indexer], indexer)
941-
continue
942-
943-
# If we are trying to get actual keys from empty Series, we
944-
# patiently wait for a KeyError later on - otherwise, convert
945-
if len(ax) or not len(key):
946-
key = self._convert_for_reindex(key, axis)
947-
indexer = ax.get_indexer_for(key)
948-
keyarr = ax.reindex(keyarr)[0]
949-
self._validate_read_indexer(keyarr, indexer,
950-
o._get_axis_number(axis))
951-
d[axis] = (keyarr, indexer)
930+
d = {axis: self._get_listlike_indexer(key, axis)
931+
for (key, axis) in zip(tup, o._AXIS_ORDERS)}
952932
return o._reindex_with_indexers(d, copy=True, allow_dups=True)
953933
except (KeyError, IndexingError) as detail:
954934
raise self._exception(detail)
@@ -1124,63 +1104,83 @@ def _getitem_axis(self, key, axis=None):
11241104

11251105
return self._get_label(key, axis=axis)
11261106

1127-
def _getitem_iterable(self, key, axis=None):
1128-
if axis is None:
1129-
axis = self.axis or 0
1107+
def _get_listlike_indexer(self, key, axis, raise_missing=False):
1108+
"""
1109+
Transform a list-like of keys into a new index and an indexer.
11301110
1131-
self._validate_key(key, axis)
1111+
Parameters
1112+
----------
1113+
key : list-like
1114+
Target labels
1115+
axis: int
1116+
Dimension on which the indexing is being made
1117+
raise_missing: bool
1118+
Whether to raise a KeyError if some labels are not found. Will be
1119+
removed in the future, and then this method will always behave as
1120+
if raise_missing=True.
11321121
1133-
labels = self.obj._get_axis(axis)
1122+
Raises
1123+
------
1124+
KeyError
1125+
If at least one key was requested but none was found, and
1126+
raise_missing=True.
11341127
1135-
if com.is_bool_indexer(key):
1136-
key = check_bool_indexer(labels, key)
1137-
inds, = key.nonzero()
1138-
return self.obj._take(inds, axis=axis)
1139-
else:
1128+
Returns
1129+
-------
1130+
keyarr: Index
1131+
New index (coinciding with 'key' if the axis is unique)
1132+
values : array-like
1133+
An indexer for the return object; -1 denotes keys not found
1134+
"""
1135+
try:
1136+
o = self.obj
1137+
ax = o._get_axis(axis)
11401138
# Have the index compute an indexer or return None
1141-
# if it cannot handle; we only act on all found values
1142-
indexer, keyarr = labels._convert_listlike_indexer(
1143-
key, kind=self.name)
1139+
# if it cannot handle:
1140+
indexer, keyarr = ax._convert_listlike_indexer(key,
1141+
kind=self.name)
1142+
# We only act on all found values:
11441143
if indexer is not None and (indexer != -1).all():
1145-
self._validate_read_indexer(key, indexer, axis)
1146-
return self.obj.take(indexer, axis=axis)
1144+
self._validate_read_indexer(key, indexer, axis,
1145+
raise_missing=raise_missing)
1146+
return ax[indexer], indexer
11471147

1148-
ax = self.obj._get_axis(axis)
1149-
# existing labels are unique and indexer are unique
1150-
if labels.is_unique and Index(keyarr).is_unique:
1148+
if ax.is_unique:
1149+
# If we are trying to get actual keys from empty Series, we
1150+
# patiently wait for a KeyError later on - otherwise, convert
1151+
if len(ax) or not len(key):
1152+
key = self._convert_for_reindex(key, axis)
11511153
indexer = ax.get_indexer_for(key)
1152-
self._validate_read_indexer(key, indexer, axis)
1153-
1154-
d = {axis: [ax.reindex(keyarr)[0], indexer]}
1155-
return self.obj._reindex_with_indexers(d, copy=True,
1156-
allow_dups=True)
1157-
1158-
# existing labels are non-unique
1154+
keyarr = ax.reindex(keyarr)[0]
11591155
else:
1156+
keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
11601157

1161-
# reindex with the specified axis
1162-
if axis + 1 > self.obj.ndim:
1163-
raise AssertionError("invalid indexing error with "
1164-
"non-unique index")
1165-
1166-
new_target, indexer, new_indexer = labels._reindex_non_unique(
1167-
keyarr)
1158+
self._validate_read_indexer(keyarr, indexer,
1159+
o._get_axis_number(axis),
1160+
raise_missing=raise_missing)
1161+
return keyarr, indexer
1162+
except (KeyError, IndexingError) as detail:
1163+
raise self._exception(detail)
11681164

1169-
if new_indexer is not None:
1170-
result = self.obj._take(indexer[indexer != -1], axis=axis)
1165+
def _getitem_iterable(self, key, axis=None):
1166+
if axis is None:
1167+
axis = self.axis or 0
11711168

1172-
self._validate_read_indexer(key, new_indexer, axis)
1173-
result = result._reindex_with_indexers(
1174-
{axis: [new_target, new_indexer]},
1175-
copy=True, allow_dups=True)
1169+
self._validate_key(key, axis)
11761170

1177-
else:
1178-
self._validate_read_indexer(key, indexer, axis)
1179-
result = self.obj._take(indexer, axis=axis)
1171+
labels = self.obj._get_axis(axis)
11801172

1181-
return result
1173+
if com.is_bool_indexer(key):
1174+
key = check_bool_indexer(labels, key)
1175+
inds, = key.nonzero()
1176+
return self.obj._take(inds, axis=axis)
1177+
else:
1178+
keyarr, indexer = self._get_listlike_indexer(key, axis,
1179+
raise_missing=False)
1180+
return self.obj._reindex_with_indexers({axis: [keyarr, indexer]},
1181+
copy=True, allow_dups=True)
11821182

1183-
def _validate_read_indexer(self, key, indexer, axis):
1183+
def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
11841184
"""
11851185
Check that indexer can be used to return a result (e.g. at least one
11861186
element was found, unless the list of keys was actually empty).
@@ -1193,11 +1193,16 @@ def _validate_read_indexer(self, key, indexer, axis):
11931193
Indices corresponding to the key (with -1 indicating not found)
11941194
axis: int
11951195
Dimension on which the indexing is being made
1196+
raise_missing: bool
1197+
Whether to raise a KeyError if some labels are not found. Will be
1198+
removed in the future, and then this method will always behave as
1199+
if raise_missing=True.
11961200
11971201
Raises
11981202
------
11991203
KeyError
1200-
If at least one key was requested none was found.
1204+
If at least one key was requested but none was found, and
1205+
raise_missing=True.
12011206
"""
12021207

12031208
ax = self.obj._get_axis(axis)
@@ -1214,6 +1219,10 @@ def _validate_read_indexer(self, key, indexer, axis):
12141219
u"None of [{key}] are in the [{axis}]".format(
12151220
key=key, axis=self.obj._get_axis_name(axis)))
12161221

1222+
if not(self.name == 'loc' and not raise_missing):
1223+
not_found = list(set(key) - set(ax))
1224+
raise KeyError("{} not in index".format(not_found))
1225+
12171226
# we skip the warning on Categorical/Interval
12181227
# as this check is actually done (check for
12191228
# non-missing values), but a bit later in the
@@ -1229,9 +1238,10 @@ def _validate_read_indexer(self, key, indexer, axis):
12291238

12301239
if not (ax.is_categorical() or ax.is_interval()):
12311240
warnings.warn(_missing_key_warning,
1232-
FutureWarning, stacklevel=5)
1241+
FutureWarning, stacklevel=6)
12331242

1234-
def _convert_to_indexer(self, obj, axis=None, is_setter=False):
1243+
def _convert_to_indexer(self, obj, axis=None, is_setter=False,
1244+
raise_missing=False):
12351245
"""
12361246
Convert indexing key into something we can use to do actual fancy
12371247
indexing on an ndarray
@@ -1310,33 +1320,10 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False):
13101320
inds, = obj.nonzero()
13111321
return inds
13121322
else:
1313-
1314-
# Have the index compute an indexer or return None
1315-
# if it cannot handle
1316-
indexer, objarr = labels._convert_listlike_indexer(
1317-
obj, kind=self.name)
1318-
if indexer is not None:
1319-
return indexer
1320-
1321-
# unique index
1322-
if labels.is_unique:
1323-
indexer = check = labels.get_indexer(objarr)
1324-
1325-
# non-unique (dups)
1326-
else:
1327-
(indexer,
1328-
missing) = labels.get_indexer_non_unique(objarr)
1329-
# 'indexer' has dupes, create 'check' using 'missing'
1330-
check = np.zeros(len(objarr), dtype=np.intp)
1331-
check[missing] = -1
1332-
1333-
mask = check == -1
1334-
if mask.any():
1335-
raise KeyError('{mask} not in index'
1336-
.format(mask=objarr[mask]))
1337-
1338-
return com._values_from_object(indexer)
1339-
1323+
# When setting, missing keys are not allowed, even with .loc:
1324+
kwargs = {'raise_missing': True if is_setter else
1325+
raise_missing}
1326+
return self._get_listlike_indexer(obj, axis, **kwargs)[1]
13401327
else:
13411328
try:
13421329
return labels.get_loc(obj)

0 commit comments

Comments
 (0)