Skip to content

Commit 489e52a

Browse files
committed
REF: deduplicate _NDFrameIndexer._multi_take code
1 parent 9e982e1 commit 489e52a

File tree

2 files changed

+113
-99
lines changed

2 files changed

+113
-99
lines changed

pandas/core/frame.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2723,7 +2723,8 @@ def _getitem_array(self, key):
27232723
indexer = key.nonzero()[0]
27242724
return self._take(indexer, axis=0)
27252725
else:
2726-
indexer = self.loc._convert_to_indexer(key, axis=1)
2726+
indexer = self.loc._convert_to_indexer(key, axis=1,
2727+
raise_missing=True)
27272728
return self._take(indexer, axis=1)
27282729

27292730
def _getitem_multilevel(self, key):

pandas/core/indexing.py

Lines changed: 111 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -925,33 +925,10 @@ def _multi_take(self, tup):
925925
""" create the reindex map for our objects, raise the _exception if we
926926
can't create the indexer
927927
"""
928-
try:
929-
o = self.obj
930-
d = {}
931-
for key, axis in zip(tup, o._AXIS_ORDERS):
932-
ax = o._get_axis(axis)
933-
# Have the index compute an indexer or return None
934-
# if it cannot handle:
935-
indexer, keyarr = ax._convert_listlike_indexer(key,
936-
kind=self.name)
937-
# We only act on all found values:
938-
if indexer is not None and (indexer != -1).all():
939-
self._validate_read_indexer(key, indexer, axis)
940-
d[axis] = (ax[indexer], indexer)
941-
continue
942-
943-
# If we are trying to get actual keys from empty Series, we
944-
# patiently wait for a KeyError later on - otherwise, convert
945-
if len(ax) or not len(key):
946-
key = self._convert_for_reindex(key, axis)
947-
indexer = ax.get_indexer_for(key)
948-
keyarr = ax.reindex(keyarr)[0]
949-
self._validate_read_indexer(keyarr, indexer,
950-
o._get_axis_number(axis))
951-
d[axis] = (keyarr, indexer)
952-
return o._reindex_with_indexers(d, copy=True, allow_dups=True)
953-
except (KeyError, IndexingError) as detail:
954-
raise self._exception(detail)
928+
o = self.obj
929+
d = {axis: self._get_listlike_indexer(key, axis)
930+
for (key, axis) in zip(tup, o._AXIS_ORDERS)}
931+
return o._reindex_with_indexers(d, copy=True, allow_dups=True)
955932

956933
def _convert_for_reindex(self, key, axis=None):
957934
return key
@@ -1124,63 +1101,110 @@ def _getitem_axis(self, key, axis=None):
11241101

11251102
return self._get_label(key, axis=axis)
11261103

1127-
def _getitem_iterable(self, key, axis=None):
1128-
if axis is None:
1129-
axis = self.axis or 0
1104+
def _get_listlike_indexer(self, key, axis, raise_missing=False):
1105+
"""
1106+
Transform a list-like of keys into a new index and an indexer.
11301107
1131-
self._validate_key(key, axis)
1108+
Parameters
1109+
----------
1110+
key : list-like
1111+
Target labels
1112+
axis: int
1113+
Dimension on which the indexing is being made
1114+
raise_missing: bool
1115+
Whether to raise a KeyError if some labels are not found. Will be
1116+
removed in the future, and then this method will always behave as
1117+
if raise_missing=True.
11321118
1133-
labels = self.obj._get_axis(axis)
1119+
Raises
1120+
------
1121+
KeyError
1122+
If at least one key was requested but none was found, and
1123+
raise_missing=True.
11341124
1135-
if com.is_bool_indexer(key):
1136-
key = check_bool_indexer(labels, key)
1137-
inds, = key.nonzero()
1138-
return self.obj._take(inds, axis=axis)
1139-
else:
1125+
Returns
1126+
-------
1127+
keyarr: Index
1128+
New index (coinciding with 'key' if the axis is unique)
1129+
values : array-like
1130+
An indexer for the return object; -1 denotes keys not found
1131+
"""
1132+
o = self.obj
1133+
ax = o._get_axis(axis)
1134+
try:
11401135
# Have the index compute an indexer or return None
1141-
# if it cannot handle; we only act on all found values
1142-
indexer, keyarr = labels._convert_listlike_indexer(
1143-
key, kind=self.name)
1136+
# if it cannot handle:
1137+
indexer, keyarr = ax._convert_listlike_indexer(key,
1138+
kind=self.name)
1139+
# We only act on all found values:
11441140
if indexer is not None and (indexer != -1).all():
1145-
self._validate_read_indexer(key, indexer, axis)
1146-
return self.obj.take(indexer, axis=axis)
1141+
self._validate_read_indexer(key, indexer, axis,
1142+
raise_missing=raise_missing)
1143+
return ax[indexer], indexer
11471144

1148-
ax = self.obj._get_axis(axis)
1149-
# existing labels are unique and indexer are unique
1150-
if labels.is_unique and Index(keyarr).is_unique:
1145+
if ax.is_unique:
1146+
# If we are trying to get actual keys from empty Series, we
1147+
# patiently wait for a KeyError later on - otherwise, convert
1148+
if len(ax) or not len(key):
1149+
key = self._convert_for_reindex(key, axis)
11511150
indexer = ax.get_indexer_for(key)
1152-
self._validate_read_indexer(key, indexer, axis)
1151+
keyarr = ax.reindex(keyarr)[0]
1152+
else:
1153+
keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
11531154

1154-
d = {axis: [ax.reindex(keyarr)[0], indexer]}
1155-
return self.obj._reindex_with_indexers(d, copy=True,
1156-
allow_dups=True)
1155+
self._validate_read_indexer(keyarr, indexer,
1156+
o._get_axis_number(axis),
1157+
raise_missing=raise_missing)
1158+
return keyarr, indexer
1159+
except (KeyError, IndexingError) as detail:
1160+
raise self._exception(detail)
11571161

1158-
# existing labels are non-unique
1159-
else:
1162+
def _getitem_iterable(self, key, axis=None):
1163+
"""
1164+
Index current object with an an iterable key (which can be a boolean
1165+
indexer, or a collection of keys).
11601166
1161-
# reindex with the specified axis
1162-
if axis + 1 > self.obj.ndim:
1163-
raise AssertionError("invalid indexing error with "
1164-
"non-unique index")
1167+
Parameters
1168+
----------
1169+
key : iterable
1170+
Target labels, or boolean indexer
1171+
axis: int, default None
1172+
Dimension on which the indexing is being made
11651173
1166-
new_target, indexer, new_indexer = labels._reindex_non_unique(
1167-
keyarr)
1174+
Raises
1175+
------
1176+
KeyError
1177+
If no key was found. Will change in the future to raise if not all
1178+
keys were found.
1179+
IndexingError
1180+
If the boolean indexer is unalignable with the object being
1181+
indexed.
11681182
1169-
if new_indexer is not None:
1170-
result = self.obj._take(indexer[indexer != -1], axis=axis)
1183+
Returns
1184+
-------
1185+
scalar, DataFrame, or Series: indexed value(s),
1186+
"""
11711187

1172-
self._validate_read_indexer(key, new_indexer, axis)
1173-
result = result._reindex_with_indexers(
1174-
{axis: [new_target, new_indexer]},
1175-
copy=True, allow_dups=True)
1188+
if axis is None:
1189+
axis = self.axis or 0
11761190

1177-
else:
1178-
self._validate_read_indexer(key, indexer, axis)
1179-
result = self.obj._take(indexer, axis=axis)
1191+
self._validate_key(key, axis)
11801192

1181-
return result
1193+
labels = self.obj._get_axis(axis)
1194+
1195+
if com.is_bool_indexer(key):
1196+
# A boolean indexer
1197+
key = check_bool_indexer(labels, key)
1198+
inds, = key.nonzero()
1199+
return self.obj._take(inds, axis=axis)
1200+
else:
1201+
# A collection of keys
1202+
keyarr, indexer = self._get_listlike_indexer(key, axis,
1203+
raise_missing=False)
1204+
return self.obj._reindex_with_indexers({axis: [keyarr, indexer]},
1205+
copy=True, allow_dups=True)
11821206

1183-
def _validate_read_indexer(self, key, indexer, axis):
1207+
def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
11841208
"""
11851209
Check that indexer can be used to return a result (e.g. at least one
11861210
element was found, unless the list of keys was actually empty).
@@ -1193,11 +1217,16 @@ def _validate_read_indexer(self, key, indexer, axis):
11931217
Indices corresponding to the key (with -1 indicating not found)
11941218
axis: int
11951219
Dimension on which the indexing is being made
1220+
raise_missing: bool
1221+
Whether to raise a KeyError if some labels are not found. Will be
1222+
removed in the future, and then this method will always behave as
1223+
if raise_missing=True.
11961224
11971225
Raises
11981226
------
11991227
KeyError
1200-
If at least one key was requested none was found.
1228+
If at least one key was requested but none was found, and
1229+
raise_missing=True.
12011230
"""
12021231

12031232
ax = self.obj._get_axis(axis)
@@ -1214,6 +1243,12 @@ def _validate_read_indexer(self, key, indexer, axis):
12141243
u"None of [{key}] are in the [{axis}]".format(
12151244
key=key, axis=self.obj._get_axis_name(axis)))
12161245

1246+
# We (temporarily) allow for some missing keys with .loc, except in
1247+
# some cases (e.g. setting) in which, "raise_missing" will be False
1248+
if not(self.name == 'loc' and not raise_missing):
1249+
not_found = list(set(key) - set(ax))
1250+
raise KeyError("{} not in index".format(not_found))
1251+
12171252
# we skip the warning on Categorical/Interval
12181253
# as this check is actually done (check for
12191254
# non-missing values), but a bit later in the
@@ -1229,9 +1264,10 @@ def _validate_read_indexer(self, key, indexer, axis):
12291264

12301265
if not (ax.is_categorical() or ax.is_interval()):
12311266
warnings.warn(_missing_key_warning,
1232-
FutureWarning, stacklevel=5)
1267+
FutureWarning, stacklevel=6)
12331268

1234-
def _convert_to_indexer(self, obj, axis=None, is_setter=False):
1269+
def _convert_to_indexer(self, obj, axis=None, is_setter=False,
1270+
raise_missing=False):
12351271
"""
12361272
Convert indexing key into something we can use to do actual fancy
12371273
indexing on an ndarray
@@ -1310,33 +1346,10 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False):
13101346
inds, = obj.nonzero()
13111347
return inds
13121348
else:
1313-
1314-
# Have the index compute an indexer or return None
1315-
# if it cannot handle
1316-
indexer, objarr = labels._convert_listlike_indexer(
1317-
obj, kind=self.name)
1318-
if indexer is not None:
1319-
return indexer
1320-
1321-
# unique index
1322-
if labels.is_unique:
1323-
indexer = check = labels.get_indexer(objarr)
1324-
1325-
# non-unique (dups)
1326-
else:
1327-
(indexer,
1328-
missing) = labels.get_indexer_non_unique(objarr)
1329-
# 'indexer' has dupes, create 'check' using 'missing'
1330-
check = np.zeros(len(objarr), dtype=np.intp)
1331-
check[missing] = -1
1332-
1333-
mask = check == -1
1334-
if mask.any():
1335-
raise KeyError('{mask} not in index'
1336-
.format(mask=objarr[mask]))
1337-
1338-
return com._values_from_object(indexer)
1339-
1349+
# When setting, missing keys are not allowed, even with .loc:
1350+
kwargs = {'raise_missing': True if is_setter else
1351+
raise_missing}
1352+
return self._get_listlike_indexer(obj, axis, **kwargs)[1]
13401353
else:
13411354
try:
13421355
return labels.get_loc(obj)

0 commit comments

Comments
 (0)