Skip to content

Commit 99e11b4

Browse files
committed
Refactor map to use common code for series and index when possible and add dict performance test
1 parent ef7daba commit 99e11b4

File tree

5 files changed

+108
-59
lines changed

5 files changed

+108
-59
lines changed

asv_bench/benchmarks/series_methods.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,27 @@ def setup(self):
120120

121121
def time_series_dropna_datetime(self):
122122
self.s.dropna()
123+
124+
125+
class series_map_dict(object):
126+
goal_time = 0.2
127+
128+
def setup(self):
129+
map_size = 1000
130+
self.s = Series(np.random.randint(0, map_size, 10000))
131+
self.map_dict = {i: map_size - i for i in range(map_size)}
132+
133+
def time_series_map_dict(self):
134+
self.s.map(self.map_dict)
135+
136+
137+
class series_map_series(object):
138+
goal_time = 0.2
139+
140+
def setup(self):
141+
map_size = 1000
142+
self.s = Series(np.random.randint(0, map_size, 10000))
143+
self.map_series = Series(map_size - np.arange(map_size))
144+
145+
def time_series_map_series(self):
146+
self.s.map(self.map_series)

pandas/core/base.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88

99
from pandas.core.dtypes.missing import isnull
1010
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass
11-
from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar
11+
from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar, is_extension_type
1212
from pandas.util.validators import validate_bool_kwarg
1313

14-
from pandas.core import common as com
14+
from pandas.core import common as com, algorithms
1515
import pandas.core.nanops as nanops
1616
import pandas._libs.lib as lib
1717
from pandas.compat.numpy import function as nv
@@ -933,6 +933,51 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
933933
klass=self.__class__.__name__, op=name))
934934
return func(**kwds)
935935

936+
def _map_values(self, values, arg, na_action=None):
937+
if is_extension_type(self.dtype):
938+
if na_action is not None:
939+
raise NotImplementedError
940+
map_f = lambda values, f: values.map(f)
941+
else:
942+
if na_action == 'ignore':
943+
def map_f(values, f):
944+
return lib.map_infer_mask(values, f,
945+
isnull(values).view(np.uint8))
946+
else:
947+
map_f = lib.map_infer
948+
949+
map_values = None
950+
if isinstance(arg, dict):
951+
if hasattr(arg, '__missing__'):
952+
# If a dictionary subclass defines a default value method,
953+
# convert arg to a lookup function (GH #15999).
954+
dict_with_default = arg
955+
arg = lambda x: dict_with_default[x]
956+
else:
957+
# Dictionary does not have a default. Thus it's safe to
958+
# convert to an Index for efficiency.
959+
from pandas import Index
960+
idx = Index(arg.keys())
961+
# Cast to dict so we can get values using lib.fast_multiget
962+
# if this is a dict subclass (GH #15999)
963+
map_values = idx._get_values_from_dict(dict(arg))
964+
arg = idx
965+
elif isinstance(arg, ABCSeries):
966+
map_values = arg.values
967+
arg = arg.index
968+
969+
if map_values is not None:
970+
# Since values were input this means we came from either
971+
# a dict or a series and arg should be an index
972+
indexer = arg.get_indexer(values)
973+
new_values = algorithms.take_1d(map_values, indexer)
974+
else:
975+
# arg is a function
976+
new_values = map_f(values, arg)
977+
978+
return new_values
979+
980+
936981
def value_counts(self, normalize=False, sort=True, ascending=False,
937982
bins=None, dropna=True):
938983
"""

pandas/core/indexes/base.py

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2674,7 +2674,7 @@ def get_indexer_for(self, target, **kwargs):
26742674
26752675
Parameters
26762676
----------
2677-
data : dict
2677+
data : {dict, DictWithoutMissing}
26782678
The dictionary from which to extract the values
26792679
26802680
Returns
@@ -2726,43 +2726,36 @@ def groupby(self, values):
27262726

27272727
return result
27282728

2729-
def map(self, mapper):
2730-
"""Apply mapper function to an index.
2729+
def map(self, arg, na_action=None):
2730+
"""Map values of Series using input correspondence (which can be a
2731+
dict, Series, or function)
27312732
27322733
Parameters
27332734
----------
2734-
mapper : {callable, dict, Series}
2735-
Function to be applied or input correspondence object.
2736-
dict and Series support new in 0.20.0.
2735+
arg : function, dict, or Series
2736+
na_action : {None, 'ignore'}
2737+
If 'ignore', propagate NA values, without passing them to the
2738+
mapping function
27372739
27382740
Returns
27392741
-------
2740-
applied : Union[Index, MultiIndex], inferred
2742+
applied : {Index, MultiIndex}, inferred
27412743
The output of the mapping function applied to the index.
27422744
If the function returns a tuple with more than one element
27432745
a MultiIndex will be returned.
27442746
27452747
"""
2746-
from .multi import MultiIndex
2747-
2748-
if isinstance(mapper, ABCSeries):
2749-
indexer = mapper.index.get_indexer(self.values)
2750-
mapped_values = algos.take_1d(mapper.values, indexer)
2751-
elif isinstance(mapper, dict):
2752-
idx = Index(mapper.keys())
2753-
data = idx._get_values_from_dict(mapper)
2754-
indexer = idx.get_indexer(self.values)
2755-
mapped_values = algos.take_1d(data, indexer)
2756-
else:
2757-
mapped_values = self._arrmap(self.values, mapper)
27582748

2749+
from .multi import MultiIndex
2750+
new_values = super(Index, self)._map_values(
2751+
self.values, arg, na_action=na_action)
27592752
attributes = self._get_attributes_dict()
2760-
if mapped_values.size and isinstance(mapped_values[0], tuple):
2761-
return MultiIndex.from_tuples(mapped_values,
2753+
if new_values.size and isinstance(new_values[0], tuple):
2754+
return MultiIndex.from_tuples(new_values,
27622755
names=attributes.get('name'))
27632756

27642757
attributes['copy'] = False
2765-
return Index(mapped_values, **attributes)
2758+
return Index(new_values, **attributes)
27662759

27672760
def isin(self, values, level=None):
27682761
"""

pandas/core/series.py

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2105,43 +2105,13 @@ def map(self, arg, na_action=None):
21052105
3 0
21062106
dtype: int64
21072107
"""
2108-
21092108
if is_extension_type(self.dtype):
2110-
values = self._values
2111-
if na_action is not None:
2112-
raise NotImplementedError
2113-
map_f = lambda values, f: values.map(f)
2109+
input_values = self._values
21142110
else:
2115-
values = self.asobject
2116-
2117-
if na_action == 'ignore':
2118-
def map_f(values, f):
2119-
return lib.map_infer_mask(values, f,
2120-
isnull(values).view(np.uint8))
2121-
else:
2122-
map_f = lib.map_infer
2123-
2124-
if isinstance(arg, dict):
2125-
if hasattr(arg, '__missing__'):
2126-
# If a dictionary subclass defines a default value method,
2127-
# convert arg to a lookup function (GH #15999).
2128-
dict_with_default = arg
2129-
arg = lambda x: dict_with_default[x]
2130-
else:
2131-
# Dictionary does not have a default. Thus it's safe to
2132-
# convert to an indexed series for efficiency.
2133-
arg = self._constructor(arg, index=arg.keys())
2134-
2135-
if isinstance(arg, Series):
2136-
# arg is a Series
2137-
indexer = arg.index.get_indexer(values)
2138-
new_values = algorithms.take_1d(arg._values, indexer)
2139-
else:
2140-
# arg is a function
2141-
new_values = map_f(values, arg)
2142-
2143-
return self._constructor(new_values,
2144-
index=self.index).__finalize__(self)
2111+
input_values = self.asobject
2112+
new_values = super(Series, self)._map_values(
2113+
input_values, arg, na_action=na_action)
2114+
return self._constructor(new_values, index=self.index).__finalize__(self)
21452115

21462116
def _gotitem(self, key, ndim, subset=None):
21472117
"""

pandas/tests/indexes/test_base.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
from datetime import datetime, timedelta
66

7+
from collections import defaultdict
8+
79
import pandas.util.testing as tm
810
from pandas.core.indexes.api import Index, MultiIndex
911
from pandas.tests.indexes.common import Base
@@ -860,6 +862,21 @@ def test_map_with_non_function_missing_values(self):
860862
mapper = {0: 'foo', 2: 2.0, -1: 'baz'}
861863
tm.assert_index_equal(expected, input.map(mapper))
862864

865+
def test_map_na_exclusion(self):
866+
idx = Index([1.5, np.nan, 3, np.nan, 5])
867+
868+
result = idx.map(lambda x: x * 2, na_action='ignore')
869+
exp = idx * 2
870+
tm.assert_index_equal(result, exp)
871+
872+
def test_map_defaultdict(self):
873+
idx = Index([1, 2, 3])
874+
default_dict = defaultdict(lambda: 'blank')
875+
default_dict[1] = 'stuff'
876+
result = idx.map(default_dict)
877+
expected = Index(['stuff', 'blank', 'blank'])
878+
tm.assert_index_equal(result, expected)
879+
863880
def test_append_multiple(self):
864881
index = Index(['a', 'b', 'c', 'd', 'e', 'f'])
865882

0 commit comments

Comments
 (0)