Description
Code Sample
Running the following code for changing type to category runs perfectly
import pandas as pd
df = pd.DataFrame({'a': ['1',1,3], 'b' : [1,2,3]})
print(df.dtypes)
categoricals = list(df.select_dtypes(include='object').columns.values)
df[categoricals] = df[categoricals].astype('category')
print(df.dtypes)
which returns
a object
b int64
dtype: object
a category
b int64
dtype: object
If an extra extra column is faulty added ('a' is added again):
import pandas as pd
df = pd.DataFrame({'a': ['1',1,3], 'b' : [1,2,3]})
print(df.dtypes)
categoricals = list(df.select_dtypes(include='object').columns.values)
categoricals =categoricals + ['a']
df[categoricals] = df[categoricals].astype('category')
print(df.dtypes)
Python crashes with
a object
b int64
dtype: object
Fatal Python error: Cannot recover from stack overflow.
Current thread 0x00007f806cac8700 (most recent call first):
File "<frozen importlib._bootstrap>", line 172 in _get_module_lock
File "<frozen importlib._bootstrap>", line 148 in __enter__
File "<frozen importlib._bootstrap>", line 960 in _find_and_load
File "<frozen importlib._bootstrap>", line 205 in _call_with_frames_removed
File "<frozen importlib._bootstrap>", line 936 in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 961 in _find_and_load
File "<frozen importlib._bootstrap>", line 205 in _call_with_frames_removed
File "<frozen importlib._bootstrap>", line 936 in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 961 in _find_and_load
File "/home/runner/.site-packages/pandas/core/indexes/base.py", line 4960 in _ensure_index
File "/home/runner/.site-packages/pandas/core/indexes/base.py", line 3363 in get_indexer_non_unique
File "/home/runner/.site-packages/pandas/core/indexes/base.py", line 3386 in get_indexer_for
File "/home/runner/.site-packages/pandas/core/internals.py", line 4132 in get
File "/home/runner/.site-packages/pandas/core/frame.py", line 2698 in _getitem_column
File "/home/runner/.site-packages/pandas/core/frame.py", line 2671 in __getitem__
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
File "/home/runner/.site-packages/pandas/core/generic.py", line 4996 in <genexpr>
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 256 in __init__
File "/home/runner/.site-packages/pandas/core/reshape/concat.py", line 225 in concat
File "/home/runner/.site-packages/pandas/core/generic.py", line 5005 in astype
File "/home/runner/.site-packages/pandas/util/_decorators.py", line 178 in wrapper
Problem description
One would expect pandas to raise an error that there is duplicate columns or remove duplicate instead of crashing.
I'm using Python 3.6.1 and pandas-0.23.4.
Expected Output
"The list of columns you have supplied has duplicates"
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.6.1.final.0
python-bits: 64
OS: Linux
OS-release: 4.13.0-1011-gcp
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: C.UTF-8
LOCALE: en_US.UTF-8
pandas: 0.23.4
pytest: None
pip: 9.0.1
setuptools: 40.6.2
Cython: None
numpy: 1.15.4
scipy: 1.1.0
pyarrow: None
xarray: None
IPython: None
sphinx: None
patsy: None
dateutil: 2.7.5
pytz: 2018.7
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: 2.2.3
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: None
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None