Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
import warnings
warnings.simplefilter("error")
df = pd.DataFrame({"a": [1, 2, 3], "b": pd.Categorical(["x", "x", "y"])})
df.value_counts()
---------------------------------------------------------------------------
FutureWarning Traceback (most recent call last)
Cell In [2], line 5
3 warnings.simplefilter("error")
4 df = pd.DataFrame({"a": [1, 2, 3], "b": pd.Categorical(["x", "x", "y"])})
----> 5 df.value_counts()
File ~/miniconda/envs/py310/lib/python3.10/site-packages/pandas/core/frame.py:7266, in DataFrame.value_counts(self, subset, normalize, sort, ascending, dropna)
7263 subset = self.columns.tolist()
7265 name = "proportion" if normalize else "count"
-> 7266 counts = self.groupby(subset, dropna=dropna).grouper.size()
7267 counts.name = name
7269 if sort:
File ~/miniconda/envs/py310/lib/python3.10/site-packages/pandas/core/frame.py:8870, in DataFrame.groupby(self, by, axis, level, as_index, sort, group_keys, observed, dropna)
8867 if level is None and by is None:
8868 raise TypeError("You have to supply one of 'by' and 'level'")
-> 8870 return DataFrameGroupBy(
8871 obj=self,
8872 keys=by,
8873 axis=axis,
8874 level=level,
8875 as_index=as_index,
8876 sort=sort,
8877 group_keys=group_keys,
8878 observed=observed,
8879 dropna=dropna,
8880 )
File ~/miniconda/envs/py310/lib/python3.10/site-packages/pandas/core/groupby/groupby.py:1283, in GroupBy.__init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, observed, dropna)
1281 if observed is lib.no_default:
1282 if any(ping._passed_categorical for ping in grouper.groupings):
-> 1283 warnings.warn(
1284 "The default of observed=False is deprecated and will be changed "
1285 "to True in a future version of pandas. Pass observed=False to "
1286 "retain current behavior or observed=True to adopt the future "
1287 "default and silence this warning.",
1288 FutureWarning,
1289 stacklevel=find_stack_level(),
1290 )
1291 observed = False
1292 self.observed = observed
FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
Issue Description
DataFrame.value_counts
issues a FutureWarning
when at least one of the columns has a categorical dtype. AFAICT, this warning is non-actionable by the user because it is triggered from a group by operation internal to value_counts
, and the relevant parameter (observed
) is not exposed in the value_counts
API.
I guess the warning itself is valid and indicates that the behavior of DataFrame.value_counts
is going to change in the future. But it is not correct that the user can pass a parameter to (a) ensure forward-compatibility and (b) make the warning go away.
Expected Behavior
I think there's two options:
DataFrame.value_counts
could passobserved=False
internally to avoid triggering the warning and to prevent its behavior from changing in the future.DataFrame.value_counts
could expose theobserved
parameter so that users can control how the upcoming change toGroupBy
affects their code.
Installed Versions
pd.show_versions
is erroring out for me, but my pandas version is 2.1.0rc0
. I'll put the traceback in the details in case useful...
/Users/mwaskom/miniconda/envs/py310/lib/python3.10/site-packages/_distutils_hack/__init__.py:33: UserWarning: Setuptools is replacing distutils.
warnings.warn("Setuptools is replacing distutils.")
---------------------------------------------------------------------------
SystemError Traceback (most recent call last)
Cell In [2], line 1
----> 1 pd.show_versions()
File ~/miniconda/envs/py310/lib/python3.10/site-packages/pandas/util/_print_versions.py:141, in show_versions(as_json)
104 """
105 Provide useful information, important for bug reports.
106
(...)
138 ...
139 """
140 sys_info = _get_sys_info()
--> 141 deps = _get_dependency_info()
143 if as_json:
144 j = {"system": sys_info, "dependencies": deps}
File ~/miniconda/envs/py310/lib/python3.10/site-packages/pandas/util/_print_versions.py:98, in _get_dependency_info()
96 result: dict[str, JSONSerializable] = {}
97 for modname in deps:
---> 98 mod = import_optional_dependency(modname, errors="ignore")
99 result[modname] = get_version(mod) if mod else None
100 return result
File ~/miniconda/envs/py310/lib/python3.10/site-packages/pandas/compat/_optional.py:143, in import_optional_dependency(name, extra, errors, min_version)
138 msg = (
139 f"Missing optional dependency '{install_name}'. {extra} "
140 f"Use pip or conda to install {install_name}."
141 )
142 try:
--> 143 module = importlib.import_module(name)
144 except ImportError:
145 if errors == "raise":
File ~/miniconda/envs/py310/lib/python3.10/importlib/__init__.py:126, in import_module(name, package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1050, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1027, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1006, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:688, in _load_unlocked(spec)
File <frozen importlib._bootstrap_external>:883, in exec_module(self, module)
File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)
File ~/miniconda/envs/py310/lib/python3.10/site-packages/numba/__init__.py:42
38 from numba.core.decorators import (cfunc, generated_jit, jit, njit, stencil,
39 jit_module)
41 # Re-export vectorize decorators and the thread layer querying function
---> 42 from numba.np.ufunc import (vectorize, guvectorize, threading_layer,
43 get_num_threads, set_num_threads,
44 set_parallel_chunksize, get_parallel_chunksize,
45 get_thread_id)
47 # Re-export Numpy helpers
48 from numba.np.numpy_support import carray, farray, from_dtype
File ~/miniconda/envs/py310/lib/python3.10/site-packages/numba/np/ufunc/__init__.py:3
1 # -*- coding: utf-8 -*-
----> 3 from numba.np.ufunc.decorators import Vectorize, GUVectorize, vectorize, guvectorize
4 from numba.np.ufunc._internal import PyUFunc_None, PyUFunc_Zero, PyUFunc_One
5 from numba.np.ufunc import _internal, array_exprs
File ~/miniconda/envs/py310/lib/python3.10/site-packages/numba/np/ufunc/decorators.py:3
1 import inspect
----> 3 from numba.np.ufunc import _internal
4 from numba.np.ufunc.parallel import ParallelUFuncBuilder, ParallelGUFuncBuilder
6 from numba.core.registry import DelayedRegistry
SystemError: initialization of _internal failed without raising an exception