Description
Code Sample, a copy-pastable example if possible
In [2]: i = pd.Series(list('abcdefghijk'*10**5))
In [3]: alt = [-1, 'AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GR']*6
In [4]: res = i[:10**6].isin(alt)
In [5]: res = i[:10**6+1].isin(alt)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-f3e21d855671> in <module>()
----> 1 res = i[:10**6+1].isin(alt)
/home/pietro/nobackup/repo/pandas/pandas/core/series.py in isin(self, values)
2458
2459 """
-> 2460 result = algorithms.isin(_values_from_object(self), values)
2461 return self._constructor(result, index=self.index).__finalize__(self)
2462
/home/pietro/nobackup/repo/pandas/pandas/core/algorithms.py in isin(comps, values)
421 comps = comps.astype(object)
422
--> 423 return f(comps, values)
424
425
/home/pietro/nobackup/repo/pandas/pandas/core/algorithms.py in <lambda>(x, y)
401 f = lambda x, y: htable.ismember_object(x, values)
402 if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
--> 403 f = lambda x, y: np.in1d(x, y)
404 elif is_integer_dtype(comps):
405 try:
/usr/lib/python3/dist-packages/numpy/lib/arraysetops.py in in1d(ar1, ar2, assume_unique, invert)
399 if not assume_unique:
400 ar1, rev_idx = np.unique(ar1, return_inverse=True)
--> 401 ar2 = np.unique(ar2)
402
403 ar = np.concatenate((ar1, ar2))
/usr/lib/python3/dist-packages/numpy/lib/arraysetops.py in unique(ar, return_index, return_inverse, return_counts)
212 aux = ar[perm]
213 else:
--> 214 ar.sort()
215 aux = ar
216 flag = np.concatenate(([True], aux[1:] != aux[:-1]))
TypeError: unorderable types: str() > int()
Problem description
Although the length of alt
also matters in some way, even with a completely different dataset the problem still started at 1M elements in the Series
(or even Index
) being searched. By the way, triggering the error takes much more time than the successful operation.
Might be related to #13432 , although that one is unrelated to the length of the Series
.
Expected Output
Like In [4]
, just with one more element.
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.5.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.7.0-1-amd64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: it_IT.utf8
LOCALE: it_IT.UTF-8
pandas: 0.19.0+783.gcd35d22a0
pytest: 3.0.6
pip: 9.0.1
setuptools: 33.1.1
Cython: 0.25.2
numpy: 1.12.0
scipy: 0.18.1
xarray: 0.9.1
IPython: 5.1.0.dev
sphinx: 1.4.9
patsy: 0.3.0-dev
dateutil: 2.5.3
pytz: 2016.7
blosc: None
bottleneck: 1.2.0
tables: 3.3.0
numexpr: 2.6.1
feather: 0.3.1
matplotlib: 2.0.0
openpyxl: 2.3.0
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.6
lxml: 3.7.1
bs4: 4.5.3
html5lib: 0.999999999
sqlalchemy: 1.0.15
pymysql: None
psycopg2: None
jinja2: 2.8
s3fs: None
pandas_gbq: None
pandas_datareader: 0.2.1