Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
q_period = pd.period_range('2022-1-1', '2022-12-31', freq='Q')
y_period = pd.period_range('2019-1-1', '2023-1-1', freq='Y')
q_df = pd.DataFrame([range(len(q_period))], columns=q_period)
y_df = pd.DataFrame([range(len(y_period))], columns=y_period)
df1 = pd.concat([q_df, y_df], axis=1)
df2 = pd.concat([q_df, y_df], keys=['Quarterly', 'Yearly'], axis=1)
Issue Description
When using keys
parameter on pd.concat
, this code raises IncompatibleFrequency
exception but it's not the case if you don't specify keys
because both period indexes are converted to a simple Index
of Period
instances.
---------------------------------------------------------------------------
IncompatibleFrequency Traceback (most recent call last)
<ipython-input-91-9d5cb81992f8> in <cell line: 8>()
6
7 df1 = pd.concat([q_df, y_df], axis=1)
----> 8 df2 = pd.concat([q_df, y_df], keys=['Quarterly', 'Yearly'], axis=1)
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
332
333 # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
366 1 3 4
367 """
--> 368 op = _Concatenator(
369 objs,
370 axis=axis,
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
561 self.copy = copy
562
--> 563 self.new_axes = self._get_new_axes()
564
565 def get_result(self):
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/core/reshape/concat.py in _get_new_axes(self)
631 def _get_new_axes(self) -> list[Index]:
632 ndim = self._get_result_dim()
--> 633 return [
634 self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i)
635 for i in range(ndim)
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/core/reshape/concat.py in <listcomp>(.0)
632 ndim = self._get_result_dim()
633 return [
--> 634 self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i)
635 for i in range(ndim)
636 ]
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/_libs/properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/core/reshape/concat.py in _get_concat_axis(self)
691 concat_axis = _concat_indexes(indexes)
692 else:
--> 693 concat_axis = _make_concat_multiindex(
694 indexes, self.keys, self.levels, self.names
695 )
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/core/reshape/concat.py in _make_concat_multiindex(indexes, keys, levels, names)
762 codes_list.extend(concat_index.codes)
763 else:
--> 764 codes, categories = factorize_from_iterable(concat_index)
765 levels.append(categories)
766 codes_list.append(codes)
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/core/arrays/categorical.py in factorize_from_iterable(values)
2978 # but only the resulting categories, the order of which is independent
2979 # from ordered. Set ordered to False as default. See GH #15457
-> 2980 cat = Categorical(values, ordered=False)
2981 categories = cat.categories
2982 codes = cat.codes
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath, copy)
439 if dtype.categories is None:
440 try:
--> 441 codes, categories = factorize(values, sort=True)
442 except TypeError as err:
443 codes, categories = factorize(values, sort=False)
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/core/algorithms.py in factorize(values, sort, na_sentinel, use_na_sentinel, size_hint)
830 # TODO: Can remove when na_sentinel=na_sentinel as in TODO above
831 na_sentinel = -1
--> 832 uniques, codes = safe_sort(
833 uniques, codes, na_sentinel=na_sentinel, assume_unique=True, verify=False
834 )
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/core/algorithms.py in safe_sort(values, codes, na_sentinel, assume_unique, verify)
1868 else:
1869 try:
-> 1870 sorter = values.argsort()
1871 if is_mi:
1872 # Operate on original object instead of casted array (MultiIndex)
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/_libs/tslibs/period.pyx in pandas._libs.tslibs.period._Period.__richcmp__()
~/devel/stackoverflow/venv/lib/python3.9/site-packages/pandas/_libs/tslibs/period.pyx in pandas._libs.tslibs.period.PeriodMixin._require_matching_freq()
IncompatibleFrequency: Input has different freq=Q-DEC from Period(freq=A-DEC)
Expected Behavior
I don't except anything just to understand the behavior. I answered to this SO question. In my original answer, I use pd.concat
with keys
to create a MultiIndex
. As it doesn't work as expected, I downcast the Period
instances to string.
Maybe instead of raising an exception, convert as Index
(like if I don't use keys
parameter) and raises a UserWarning
or RuntimeWarning
about the incompatibility of PeriodIndex
?
(You are doing a great job with Pandas, thank you so much!)
Installed Versions
INSTALLED VERSIONS
commit : 2e218d1
python : 3.9.16.final.0
python-bits : 64
OS : Linux
OS-release : 6.1.7-arch1-1
Version : #1 SMP PREEMPT_DYNAMIC Wed, 18 Jan 2023 19:54:38 +0000
machine : x86_64
processor :
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.5.3
numpy : 1.21.5
pytz : 2022.1
dateutil : 2.8.2
setuptools : 65.6.3
pip : 22.3.1
Cython : 0.29.30
pytest : 7.2.0
hypothesis : None
sphinx : 5.0.1
blosc : None
feather : None
xlsxwriter : None
lxml.etree : 4.7.1
html5lib : 1.1
pymysql : 1.0.2
psycopg2 : 2.9.3
jinja2 : 3.1.2
IPython : 7.34.0
pandas_datareader: 0.10.0
bs4 : 4.10.0
bottleneck : None
brotli : 1.0.9
fastparquet : None
fsspec : 2022.01.0
gcsfs : None
matplotlib : 3.5.2
numba : 0.55.1
numexpr : 2.8.3
odfpy : None
openpyxl : 3.1.0
pandas_gbq : None
pyarrow : 6.0.1
pyreadstat : None
pyxlsb : 1.0.8
s3fs : None
scipy : 1.7.1
snappy : None
sqlalchemy : 1.4.25
tables : 3.8.0
tabulate : 0.8.9
xarray : 0.20.1
xlrd : 2.0.1
xlwt : None
zstandard : 0.19.0
tzdata : None