Skip to content

Commit 014fae0

Browse files
committed
Refactor tests and documentation
1 parent 22d4892 commit 014fae0

File tree

2 files changed

+125
-174
lines changed

2 files changed

+125
-174
lines changed

pandas/core/accessors.py

Lines changed: 46 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -1,181 +1,73 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
3-
"""
4-
5-
An example/recipe for creating a custom accessor.
6-
7-
8-
The primary use case for accessors is when a Series contains instances
9-
of a particular class and we want to access properties/methods of these
10-
instances in Series form.
11-
12-
Suppose we have a custom State class representing US states:
13-
14-
class State(object):
15-
def __repr__(self):
16-
return repr(self.name)
17-
18-
def __init__(self, name):
19-
self.name = name
20-
self._abbrev_dict = {'California': 'CA', 'Alabama': 'AL'}
21-
22-
@property
23-
def abbrev(self):
24-
return self._abbrev_dict[self.name]
25-
26-
@abbrev.setter
27-
def abbrev(self, value):
28-
self._abbrev_dict[self.name] = value
29-
30-
def fips(self):
31-
return {'California': 6, 'Alabama': 1}[self.name]
32-
33-
34-
We can construct a series of these objects:
35-
36-
>>> ser = pd.Series([State('Alabama'), State('California')])
37-
>>> ser
38-
0 'Alabama'
39-
1 'California'
40-
dtype: object
3+
from pandas.core.base import PandasObject
414

42-
We would like direct access to the `abbrev` property and `fips` method.
43-
One option is to access these manually with `apply`:
445

45-
>>> ser.apply(lambda x: x.fips())
46-
0 1
47-
1 6
48-
dtype: int64
6+
class PandasDelegate(PandasObject):
7+
""" an abstract base class for delegating methods/properties
498
50-
But doing that repeatedly gets old in a hurry, so we decide to make a
51-
custom accessor. This entails subclassing `PandasDelegate` to specify
52-
what should be accessed and how.
9+
Usage: To make a custom accessor, subclass `PandasDelegate`, overriding
10+
the methods below. Then decorate this subclass with
11+
`accessors.wrap_delegate_names` describing the methods and properties
12+
that should be delegated.
5313
54-
There are four methods that *may* be defined in this subclass, one of which
55-
*must* be defined. The mandatory method is a classmethod called
56-
`_make_accessor`. `_make_accessor` is responsible doing any validation on
57-
inputs for the accessor. In this case, the inputs must be a Series
58-
containing State objects.
14+
Examples can be found in:
5915
16+
pandas.core.accessors.CategoricalAccessor
17+
pandas.core.indexes.accessors (complicated example)
18+
pandas.core.indexes.category.CategoricalIndex
19+
pandas.core.strings.StringMethods
20+
pandas.tests.test_accessors
6021
61-
class StateDelegate(PandasDelegate):
22+
"""
6223

6324
def __init__(self, values):
25+
"""
26+
The subclassed constructor will generally only be called by
27+
_make_accessor. See _make_accessor.__doc__.
28+
"""
6429
self.values = values
6530

6631
@classmethod
67-
def _make_accessor(cls, data):
68-
if not isinstance(data, pd.Series):
69-
raise ValueError('Input must be a Series of States')
70-
elif not data.apply(lambda x: isinstance(x, State)).all():
71-
raise ValueError('All entries must be State objects')
72-
return StateDelegate(data)
73-
74-
75-
With `_make_accessor` defined, we have enough to create the accessor, but
76-
not enough to actually do anything useful with it. In order to access
77-
*methods* of State objects, we implement `_delegate_method`.
78-
`_delegate_method` calls the underlying method for each object in the
79-
series and wraps these in a new Series. The simplest version looks like:
80-
81-
def _delegate_method(self, name, *args, **kwargs):
82-
state_method = lambda x: getattr(x, name)(*args, **kwargs)
83-
return self.values.apply(state_method)
84-
85-
Similarly in order to access *properties* of State objects, we need to
86-
implement `_delegate_property_get`:
87-
88-
def _delegate_property_get(self, name):
89-
state_property = lambda x: getattr(x, name)
90-
return self.values.apply(state_property)
91-
92-
93-
On ocassion, we may want to be able to *set* property being accessed.
94-
This is discouraged, but allowed (as long as the class being accessed
95-
allows the property to be set). Doing so requires implementing
96-
`_delegate_property_set`:
97-
98-
def _delegate_property_set(self, name, new_values):
99-
for (obj, val) in zip(self.values, new_values):
100-
setattr(obj, name, val)
101-
102-
103-
With these implemented, `StateDelegate` knows how to handle methods and
104-
properties. We just need to tell it what names and properties it is
105-
supposed to handle. This is done by decorating the `StateDelegate`
106-
class with `pd.accessors.wrap_delegate_names`. We apply the decorator
107-
once with a list of all the methods the accessor should recognize and
108-
once with a list of all the properties the accessor should recognize.
109-
110-
111-
@wrap_delegate_names(delegate=State,
112-
accessors=["fips"],
113-
typ="method")
114-
@wrap_delegate_names(delegate=State,
115-
accessors=["abbrev"],
116-
typ="property")
117-
class StateDelegate(PandasDelegate):
118-
[...]
119-
120-
121-
We can now pin the `state` accessor to the pd.Series class (we could
122-
alternatively pin it to the pd.Index class with a slightly different
123-
implementation above):
124-
125-
pd.Series.state = accessors.AccessorProperty(StateDelegate)
126-
127-
128-
>>> ser = pd.Series([State('Alabama'), State('California')])
129-
>>> isinstance(ser.state, StateDelegate)
130-
True
131-
132-
>>> ser.state.abbrev
133-
0 AL
134-
1 CA
135-
dtype: object
136-
137-
>>> ser.state.fips()
138-
0 1
139-
1 6
140-
141-
>>> ser.state.abbrev = ['Foo', 'Bar']
142-
>>> ser.state.abbrev
143-
0 Foo
144-
1 Bar
145-
dtype: object
146-
147-
148-
149-
"""
150-
from pandas.core.base import PandasObject
151-
from pandas.core import common as com
152-
153-
154-
class PandasDelegate(PandasObject):
155-
""" an abstract base class for delegating methods/properties
32+
def _make_accessor(cls, data): # pragma: no cover
33+
"""
34+
_make_accessor should implement any necessary validation on the
35+
data argument to ensure that the properties/methods being
36+
accessed will be available.
15637
157-
Usage: To make a custom accessor, start by subclassing `Delegate`.
158-
See example in the module-level docstring.
38+
_make_accessor should return cls(data). If necessary, the arguments
39+
to the constructor can be expanded. In this case, __init__ will
40+
need to be overrided as well.
15941
160-
"""
42+
Parameters
43+
----------
44+
data : the underlying object being accessed, usually Series or Index
16145
162-
def __init__(self, values):
163-
self.values = values
164-
# #self._freeze()
46+
Returns
47+
-------
48+
Delegate : instance of PandasDelegate or subclass
16549
166-
@classmethod
167-
def _make_accessor(cls, data): # pragma: no cover
50+
"""
16851
raise NotImplementedError(
16952
'It is up to subclasses to implement '
17053
'_make_accessor. This does input validation on the object to '
17154
'which the accessor is being pinned. '
17255
'It should return an instance of `cls`.')
56+
# return cls(data)
17357

17458
def _delegate_property_get(self, name, *args, **kwargs):
17559
raise TypeError("You cannot access the "
17660
"property {name}".format(name=name))
17761

17862
def _delegate_property_set(self, name, value, *args, **kwargs):
63+
"""
64+
Overriding _delegate_property_set is discouraged. It is generally
65+
better to directly interact with the underlying data than to
66+
alter it via the accessor.
67+
68+
An example that ignores this advice can be found in
69+
tests.test_accessors.TestVectorizedAccessor
70+
"""
17971
raise TypeError("The property {name} cannot be set".format(name=name))
18072

18173
def _delegate_method(self, name, *args, **kwargs):
@@ -242,14 +134,8 @@ def create_delegator_method(name, delegate):
242134
def func(self, *args, **kwargs):
243135
return self._delegate_method(name, *args, **kwargs)
244136

245-
if callable(name):
246-
# A function/method was passed directly instead of a name
247-
# This may also render the `delegate` arg unnecessary.
248-
func.__name__ = name.__name__ # TODO: is this generally valid?
249-
func.__doc__ = name.__doc__
250-
else:
251-
func.__name__ = name
252-
func.__doc__ = getattr(delegate, name).__doc__
137+
func.__name__ = name
138+
func.__doc__ = getattr(delegate, name).__doc__
253139
return func
254140

255141
@staticmethod
@@ -294,13 +180,10 @@ def add_delegate_accessors(cls):
294180
else:
295181
func = Delegator.create_delegator_method(name, delegate)
296182

297-
# Allow for a callable to be passed instead of a name.
298-
title = com._get_callable_name(name)
299-
title = title or name
300183
# don't overwrite existing methods/properties unless
301184
# specifically told to do so
302-
if overwrite or not hasattr(cls, title):
303-
setattr(cls, title, func)
185+
if overwrite or not hasattr(cls, name):
186+
setattr(cls, name, func)
304187

305188
return cls
306189

pandas/tests/test_accessors.py

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,22 @@
55
An example/recipe/test for implementing custom accessors.
66
77
"""
8+
import unittest
9+
import pandas.util.testing as tm
810

911
import pandas as pd
1012

1113
from pandas.core.accessors import (wrap_delegate_names,
1214
PandasDelegate, AccessorProperty)
1315

16+
# Example 1:
17+
# An accessor for attributes of custom class in a Series with object dtype.
18+
1419

1520
class State(object):
21+
"""
22+
A dummy class for which only two states have the attributes implemented.
23+
"""
1624
def __repr__(self):
1725
return repr(self.name)
1826

@@ -72,20 +80,80 @@ def _delegate_property_set(self, name, new_values):
7280
setattr(obj, name, val)
7381

7482

75-
def test_geo_state_accessor():
76-
import pandas.util.testing as tm
83+
class TestVectorizedAccessor(unittest.TestCase):
84+
85+
@classmethod
86+
def setup_class(cls):
87+
pd.Series.state = AccessorProperty(StateDelegate)
88+
89+
cls.ser = pd.Series([State('Alabama'), State('California')])
90+
91+
@classmethod
92+
def teardown_class(cls):
93+
del pd.Series.state
94+
# TODO: is there a nicer way to do this with `mock`?
95+
96+
def test_method(self):
97+
ser = self.ser
98+
fips = pd.Series([1, 6])
99+
tm.assert_series_equal(ser.state.fips(), fips)
100+
101+
def test_property_get(self):
102+
ser = self.ser
103+
abbrev = pd.Series(['AL', 'CA'])
104+
tm.assert_series_equal(ser.state.abbrev, abbrev)
105+
106+
def test_property_set(self):
107+
ser = self.ser.copy()
108+
109+
ser.state.abbrev = ['Foo', 'Bar']
110+
new_abbrev = pd.Series(['Foo', 'Bar'])
111+
tm.assert_series_equal(ser.state.abbrev, new_abbrev)
112+
77113

78-
pd.Series.state = AccessorProperty(StateDelegate)
114+
@wrap_delegate_names(delegate=pd.Series,
115+
accessors=["real", "imag"],
116+
typ="property")
117+
@wrap_delegate_names(delegate=pd.Series,
118+
accessors=["abs"],
119+
typ="method")
120+
class ForgotToOverride(PandasDelegate):
121+
# A case where the relevant methods were not overridden. Everything
122+
# should raise NotImplementedError or TypeError
123+
@classmethod
124+
def _make_accessor(cls, data):
125+
return cls(data)
126+
127+
128+
class TestUnDelegated(unittest.TestCase):
129+
130+
@classmethod
131+
def setup_class(cls):
132+
pd.Series.forgot = AccessorProperty(ForgotToOverride)
133+
134+
cls.ser = pd.Series(range(-2, 2))
135+
136+
@classmethod
137+
def teardown_class(cls):
138+
del pd.Series.forgot
79139

80-
ser = pd.Series([State('Alabama'), State('California')])
140+
def test_get_fails(self):
141+
forgot = self.ser.forgot
142+
with self.assertRaises(TypeError):
143+
forgot.real
81144

82-
abbrev = pd.Series(['AL', 'CA'])
83-
tm.assert_series_equal(ser.state.abbrev, abbrev)
145+
with self.assertRaises(TypeError):
146+
forgot.imag
84147

85-
fips = pd.Series([1, 6])
86-
tm.assert_series_equal(ser.state.fips(), fips)
148+
def test_set_fails(self):
149+
forgot = self.ser.forgot
150+
with self.assertRaises(TypeError):
151+
forgot.real = range(5)
87152

88-
ser.state.abbrev = ['Foo', 'Bar']
153+
# Check that the underlying hasn't been affected
154+
tm.assert_series_equal(self.ser, pd.Series(range(-2, 2)))
89155

90-
new_abbrev = pd.Series(['Foo', 'Bar'])
91-
tm.assert_series_equal(ser.state.abbrev, new_abbrev)
156+
def test_method_fails(self):
157+
forgot = self.ser.forgot
158+
with self.assertRaises(TypeError):
159+
forgot.abs()

0 commit comments

Comments
 (0)