Skip to content

Commit 699055e

Browse files
committed
Refactor map to use common code for series and index when possible and add dict performance test
1 parent 65aed3b commit 699055e

File tree

5 files changed

+107
-58
lines changed

5 files changed

+107
-58
lines changed

asv_bench/benchmarks/series_methods.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,30 @@ def time_series_dropna_datetime(self):
123123
self.s.dropna()
124124

125125

126+
class series_map_dict(object):
127+
goal_time = 0.2
128+
129+
def setup(self):
130+
map_size = 1000
131+
self.s = Series(np.random.randint(0, map_size, 10000))
132+
self.map_dict = {i: map_size - i for i in range(map_size)}
133+
134+
def time_series_map_dict(self):
135+
self.s.map(self.map_dict)
136+
137+
138+
class series_map_series(object):
139+
goal_time = 0.2
140+
141+
def setup(self):
142+
map_size = 1000
143+
self.s = Series(np.random.randint(0, map_size, 10000))
144+
self.map_series = Series(map_size - np.arange(map_size))
145+
146+
def time_series_map_series(self):
147+
self.s.map(self.map_series)
148+
149+
126150
class series_clip(object):
127151
goal_time = 0.2
128152

pandas/core/base.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar
1212
from pandas.util._validators import validate_bool_kwarg
1313

14-
from pandas.core import common as com
14+
from pandas.core import common as com, algorithms
1515
import pandas.core.nanops as nanops
1616
import pandas._libs.lib as lib
1717
from pandas.compat.numpy import function as nv
@@ -897,6 +897,51 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
897897
klass=self.__class__.__name__, op=name))
898898
return func(**kwds)
899899

900+
def _map_values(self, values, arg, na_action=None):
901+
if is_extension_type(self.dtype):
902+
if na_action is not None:
903+
raise NotImplementedError
904+
map_f = lambda values, f: values.map(f)
905+
else:
906+
if na_action == 'ignore':
907+
def map_f(values, f):
908+
return lib.map_infer_mask(values, f,
909+
isnull(values).view(np.uint8))
910+
else:
911+
map_f = lib.map_infer
912+
913+
map_values = None
914+
if isinstance(arg, dict):
915+
if hasattr(arg, '__missing__'):
916+
# If a dictionary subclass defines a default value method,
917+
# convert arg to a lookup function (GH #15999).
918+
dict_with_default = arg
919+
arg = lambda x: dict_with_default[x]
920+
else:
921+
# Dictionary does not have a default. Thus it's safe to
922+
# convert to an Index for efficiency.
923+
from pandas import Index
924+
idx = Index(arg.keys())
925+
# Cast to dict so we can get values using lib.fast_multiget
926+
# if this is a dict subclass (GH #15999)
927+
map_values = idx._get_values_from_dict(dict(arg))
928+
arg = idx
929+
elif isinstance(arg, ABCSeries):
930+
map_values = arg.values
931+
arg = arg.index
932+
933+
if map_values is not None:
934+
# Since values were input this means we came from either
935+
# a dict or a series and arg should be an index
936+
indexer = arg.get_indexer(values)
937+
new_values = algorithms.take_1d(map_values, indexer)
938+
else:
939+
# arg is a function
940+
new_values = map_f(values, arg)
941+
942+
return new_values
943+
944+
900945
def value_counts(self, normalize=False, sort=True, ascending=False,
901946
bins=None, dropna=True):
902947
"""

pandas/core/indexes/base.py

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2675,7 +2675,7 @@ def get_indexer_for(self, target, **kwargs):
26752675
26762676
Parameters
26772677
----------
2678-
data : dict
2678+
data : {dict, DictWithoutMissing}
26792679
The dictionary from which to extract the values
26802680
26812681
Returns
@@ -2727,43 +2727,36 @@ def groupby(self, values):
27272727

27282728
return result
27292729

2730-
def map(self, mapper):
2731-
"""Apply mapper function to an index.
2730+
def map(self, arg, na_action=None):
2731+
"""Map values of Series using input correspondence (which can be a
2732+
dict, Series, or function)
27322733
27332734
Parameters
27342735
----------
2735-
mapper : {callable, dict, Series}
2736-
Function to be applied or input correspondence object.
2737-
dict and Series support new in 0.20.0.
2736+
arg : function, dict, or Series
2737+
na_action : {None, 'ignore'}
2738+
If 'ignore', propagate NA values, without passing them to the
2739+
mapping function
27382740
27392741
Returns
27402742
-------
2741-
applied : Union[Index, MultiIndex], inferred
2743+
applied : {Index, MultiIndex}, inferred
27422744
The output of the mapping function applied to the index.
27432745
If the function returns a tuple with more than one element
27442746
a MultiIndex will be returned.
27452747
27462748
"""
2747-
from .multi import MultiIndex
2748-
2749-
if isinstance(mapper, ABCSeries):
2750-
indexer = mapper.index.get_indexer(self.values)
2751-
mapped_values = algos.take_1d(mapper.values, indexer)
2752-
elif isinstance(mapper, dict):
2753-
idx = Index(mapper.keys())
2754-
data = idx._get_values_from_dict(mapper)
2755-
indexer = idx.get_indexer(self.values)
2756-
mapped_values = algos.take_1d(data, indexer)
2757-
else:
2758-
mapped_values = self._arrmap(self.values, mapper)
27592749

2750+
from .multi import MultiIndex
2751+
new_values = super(Index, self)._map_values(
2752+
self.values, arg, na_action=na_action)
27602753
attributes = self._get_attributes_dict()
2761-
if mapped_values.size and isinstance(mapped_values[0], tuple):
2762-
return MultiIndex.from_tuples(mapped_values,
2754+
if new_values.size and isinstance(new_values[0], tuple):
2755+
return MultiIndex.from_tuples(new_values,
27632756
names=attributes.get('name'))
27642757

27652758
attributes['copy'] = False
2766-
return Index(mapped_values, **attributes)
2759+
return Index(new_values, **attributes)
27672760

27682761
def isin(self, values, level=None):
27692762
"""

pandas/core/series.py

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2133,43 +2133,13 @@ def map(self, arg, na_action=None):
21332133
3 0
21342134
dtype: int64
21352135
"""
2136-
21372136
if is_extension_type(self.dtype):
2138-
values = self._values
2139-
if na_action is not None:
2140-
raise NotImplementedError
2141-
map_f = lambda values, f: values.map(f)
2137+
input_values = self._values
21422138
else:
2143-
values = self.asobject
2144-
2145-
if na_action == 'ignore':
2146-
def map_f(values, f):
2147-
return lib.map_infer_mask(values, f,
2148-
isnull(values).view(np.uint8))
2149-
else:
2150-
map_f = lib.map_infer
2151-
2152-
if isinstance(arg, dict):
2153-
if hasattr(arg, '__missing__'):
2154-
# If a dictionary subclass defines a default value method,
2155-
# convert arg to a lookup function (GH #15999).
2156-
dict_with_default = arg
2157-
arg = lambda x: dict_with_default[x]
2158-
else:
2159-
# Dictionary does not have a default. Thus it's safe to
2160-
# convert to an indexed series for efficiency.
2161-
arg = self._constructor(arg, index=arg.keys())
2162-
2163-
if isinstance(arg, Series):
2164-
# arg is a Series
2165-
indexer = arg.index.get_indexer(values)
2166-
new_values = algorithms.take_1d(arg._values, indexer)
2167-
else:
2168-
# arg is a function
2169-
new_values = map_f(values, arg)
2170-
2171-
return self._constructor(new_values,
2172-
index=self.index).__finalize__(self)
2139+
input_values = self.asobject
2140+
new_values = super(Series, self)._map_values(
2141+
input_values, arg, na_action=na_action)
2142+
return self._constructor(new_values, index=self.index).__finalize__(self)
21732143

21742144
def _gotitem(self, key, ndim, subset=None):
21752145
"""

pandas/tests/indexes/test_base.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
from datetime import datetime, timedelta
66

7+
from collections import defaultdict
8+
79
import pandas.util.testing as tm
810
from pandas.core.indexes.api import Index, MultiIndex
911
from pandas.tests.indexes.common import Base
@@ -859,6 +861,21 @@ def test_map_with_non_function_missing_values(self):
859861
mapper = {0: 'foo', 2: 2.0, -1: 'baz'}
860862
tm.assert_index_equal(expected, input.map(mapper))
861863

864+
def test_map_na_exclusion(self):
865+
idx = Index([1.5, np.nan, 3, np.nan, 5])
866+
867+
result = idx.map(lambda x: x * 2, na_action='ignore')
868+
exp = idx * 2
869+
tm.assert_index_equal(result, exp)
870+
871+
def test_map_defaultdict(self):
872+
idx = Index([1, 2, 3])
873+
default_dict = defaultdict(lambda: 'blank')
874+
default_dict[1] = 'stuff'
875+
result = idx.map(default_dict)
876+
expected = Index(['stuff', 'blank', 'blank'])
877+
tm.assert_index_equal(result, expected)
878+
862879
def test_append_multiple(self):
863880
index = Index(['a', 'b', 'c', 'd', 'e', 'f'])
864881

0 commit comments

Comments
 (0)