Skip to content

Commit 468644b

Browse files
MAC Address support (#12)
1 parent df6853a commit 468644b

15 files changed

+495
-320
lines changed

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ install:
2121
script:
2222
- echo "script start"
2323
- source activate test-environment
24-
- pytest cyberpandas
25-
- flake8 cyberpandas
24+
- pytest
25+
- flake8
2626
- source ./ci/build.sh
2727

2828
after_success:

cyberpandas/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
IPType,
55
IPArray,
66
IPAccessor,
7-
IPAddressIndex,
87
)
98
from .parser import to_ipaddress # noqa
109

cyberpandas/base.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import operator
2+
3+
import numpy as np
4+
5+
import pandas as pd
6+
from pandas.core.arrays import ExtensionArray
7+
8+
from ._utils import refactorize
9+
10+
11+
class NumPyBackedExtensionArrayMixin(ExtensionArray):
12+
@property
13+
def dtype(self):
14+
"""The dtype for this extension array, IPType"""
15+
return self._dtype
16+
17+
@classmethod
18+
def _constructor_from_sequence(cls, scalars):
19+
return cls(scalars)
20+
21+
@property
22+
def shape(self):
23+
return (len(self.data),)
24+
25+
def __len__(self):
26+
return len(self.data)
27+
28+
def __getitem__(self, *args):
29+
result = operator.getitem(self.data, *args)
30+
if isinstance(result, tuple):
31+
return self._box_scalar(result)
32+
elif result.ndim == 0:
33+
return self._box_scalar(result.item())
34+
else:
35+
return type(self)(result)
36+
37+
def setitem(self, indexer, value):
38+
"""Set the 'value' inplace.
39+
"""
40+
# I think having a separate than __setitem__ is good
41+
# since we have to return here, but __setitem__ doesn't.
42+
self[indexer] = value
43+
return self
44+
45+
@property
46+
def nbytes(self):
47+
return self._itemsize * len(self)
48+
49+
def _formatting_values(self):
50+
return np.array(self._format_values(), dtype='object')
51+
52+
def copy(self, deep=False):
53+
return type(self)(self.data.copy())
54+
55+
@classmethod
56+
def _concat_same_type(cls, to_concat):
57+
return cls(np.concatenate([array.data for array in to_concat]))
58+
59+
def tolist(self):
60+
return self.data.tolist()
61+
62+
def argsort(self, axis=-1, kind='quicksort', order=None):
63+
return self.data.argsort()
64+
65+
def unique(self):
66+
# type: () -> ExtensionArray
67+
# https://github.com/pandas-dev/pandas/pull/19869
68+
_, indices = np.unique(self.data, return_index=True)
69+
data = self.data.take(np.sort(indices))
70+
return self._from_ndarray(data)
71+
72+
def factorize(self, na_sentinel=-1):
73+
"""Factorize an IPArray into integer labels and unique values.
74+
75+
Calling :meth:`pandas.Series.factorize` or :meth:`pandas.factorize`
76+
will dispatch to this method.
77+
78+
Parameters
79+
----------
80+
na_sentinel : int, default -1
81+
The value in `labels` to use for indicating missing values in
82+
`self`.
83+
84+
Returns
85+
-------
86+
labels : ndarray
87+
An integer-type ndarray the same length as `self`. Each newly-
88+
observed value in `self` will be assigned the next integer.
89+
Missing values in self are assigned `na_sentinel`.
90+
uniques : IPArray
91+
The unique values in `self` in order of appereance, not including
92+
the missing value ``IPv4Address('0.0.0.0')``.
93+
94+
See Also
95+
--------
96+
pandas.factorize, pandas.Series.factorize
97+
98+
Examples
99+
--------
100+
>>> arr = IPArray([2, 2, 0, 1, 2, 2**64 + 1])
101+
>>> arr
102+
IPArray(['0.0.0.2', '0.0.0.2', '0.0.0.0', '0.0.0.1',
103+
'0.0.0.2', '::1:0:0:0:1'])
104+
105+
>>> labels, uniques = arr.factorize()
106+
>>> labels
107+
array([ 0, 0, -1, 1, 0, 2])
108+
109+
Notice that `uniques` does not include the missing value.
110+
>>> uniques
111+
IPArray(['0.0.0.2', '0.0.0.1', '::1:0:0:0:1'])
112+
"""
113+
# OK, so here's the plan.
114+
# Start with factorizing `self.data`, which has two unfortunate issues
115+
# 1. Requires casting to object.
116+
# 2. Gets the NA logic wrong, since (0, 0) isn't NA to pandas.
117+
# For now, we can't help with 1. Maybe someday.
118+
# For 2, we can "fix" things with a little post-factorization cleanup.
119+
l, u = pd.factorize(self.data)
120+
mask = self.isna()
121+
any_na = mask.any()
122+
123+
if any_na:
124+
first_na = mask.argmax()
125+
refactorize(l, first_na, na_sentinel=na_sentinel) # inplace op
126+
127+
# u is an ndarray of tuples. Go to our record type, then an IPArray
128+
u2 = type(self)((u.astype(self.dtype._record_type)))
129+
# May have a missing value.
130+
if any_na:
131+
u2 = u2[~u2.isna()]
132+
return l, u2

0 commit comments

Comments
 (0)