Skip to content

Commit b303665

Browse files
authored
DEPR: ArrayManager (#55044)
* DEPR: ArrayManager * Fixup * Test fixup * debug CI * Test fixup * warn if PANDAS_DATA_MANAGER is set * single_cpu
1 parent 49c89d2 commit b303665

File tree

12 files changed

+98
-29
lines changed

12 files changed

+98
-29
lines changed

doc/source/whatsnew/v2.2.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ Deprecations
199199
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
200200
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
201201
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
202+
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
202203
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
203204

204205
.. ---------------------------------------------------------------------------

pandas/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from __future__ import annotations
22

3+
import os
4+
import warnings
5+
36
__docformat__ = "restructuredtext"
47

58
# Let users know if they're missing any of our hard dependencies
@@ -190,6 +193,17 @@
190193
__git_version__ = v.get("full-revisionid")
191194
del get_versions, v
192195

196+
# GH#55043 - deprecation of the data_manager option
197+
if "PANDAS_DATA_MANAGER" in os.environ:
198+
warnings.warn(
199+
"The env variable PANDAS_DATA_MANAGER is set. The data_manager option is "
200+
"deprecated and will be removed in a future version. Only the BlockManager "
201+
"will be available. Unset this environment variable to silence this warning.",
202+
FutureWarning,
203+
stacklevel=2,
204+
)
205+
# Don't allow users to use pandas.os or pandas.warnings
206+
del os, warnings
193207

194208
# module level doc-string
195209
__doc__ = """

pandas/conftest.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
utc,
5050
)
5151

52+
from pandas._config.config import _get_option
53+
5254
import pandas.util._test_decorators as td
5355

5456
from pandas.core.dtypes.dtypes import (
@@ -1983,15 +1985,18 @@ def using_array_manager() -> bool:
19831985
"""
19841986
Fixture to check if the array manager is being used.
19851987
"""
1986-
return pd.options.mode.data_manager == "array"
1988+
return _get_option("mode.data_manager", silent=True) == "array"
19871989

19881990

19891991
@pytest.fixture
19901992
def using_copy_on_write() -> bool:
19911993
"""
19921994
Fixture to check if Copy-on-Write is enabled.
19931995
"""
1994-
return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block"
1996+
return (
1997+
pd.options.mode.copy_on_write
1998+
and _get_option("mode.data_manager", silent=True) == "block"
1999+
)
19952000

19962001

19972002
warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]

pandas/core/config_init.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,13 @@ def use_inf_as_na_cb(key) -> None:
454454
validator=is_one_of_factory(["block", "array"]),
455455
)
456456

457+
cf.deprecate_option(
458+
# GH#55043
459+
"mode.data_manager",
460+
"data_manager option is deprecated and will be removed in a future "
461+
"version. Only the BlockManager will be available.",
462+
)
463+
457464

458465
# TODO better name?
459466
copy_on_write_doc = """

pandas/core/frame.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
get_option,
4444
using_copy_on_write,
4545
)
46+
from pandas._config.config import _get_option
4647

4748
from pandas._libs import (
4849
algos as libalgos,
@@ -694,7 +695,7 @@ def __init__(
694695
NDFrame.__init__(self, data)
695696
return
696697

697-
manager = get_option("mode.data_manager")
698+
manager = _get_option("mode.data_manager", silent=True)
698699

699700
# GH47215
700701
if isinstance(index, set):
@@ -2411,7 +2412,7 @@ def maybe_reorder(
24112412

24122413
columns = columns.drop(exclude)
24132414

2414-
manager = get_option("mode.data_manager")
2415+
manager = _get_option("mode.data_manager", silent=True)
24152416
mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager)
24162417

24172418
return cls(mgr)
@@ -2612,7 +2613,7 @@ def _from_arrays(
26122613
if dtype is not None:
26132614
dtype = pandas_dtype(dtype)
26142615

2615-
manager = get_option("mode.data_manager")
2616+
manager = _get_option("mode.data_manager", silent=True)
26162617
columns = ensure_index(columns)
26172618
if len(columns) != len(arrays):
26182619
raise ValueError("len(columns) must match len(arrays)")

pandas/core/series.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,8 @@
2626

2727
import numpy as np
2828

29-
from pandas._config import (
30-
get_option,
31-
using_copy_on_write,
32-
)
29+
from pandas._config import using_copy_on_write
30+
from pandas._config.config import _get_option
3331

3432
from pandas._libs import (
3533
lib,
@@ -404,7 +402,7 @@ def __init__(
404402
if fastpath:
405403
# data is a ndarray, index is defined
406404
if not isinstance(data, (SingleBlockManager, SingleArrayManager)):
407-
manager = get_option("mode.data_manager")
405+
manager = _get_option("mode.data_manager", silent=True)
408406
if manager == "block":
409407
data = SingleBlockManager.from_array(data, index)
410408
elif manager == "array":
@@ -510,7 +508,7 @@ def __init__(
510508
else:
511509
data = sanitize_array(data, index, dtype, copy)
512510

513-
manager = get_option("mode.data_manager")
511+
manager = _get_option("mode.data_manager", silent=True)
514512
if manager == "block":
515513
data = SingleBlockManager.from_array(data, index, refs=refs)
516514
elif manager == "array":

pandas/io/parquet.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from warnings import catch_warnings
1414

1515
from pandas._config import using_pyarrow_string_dtype
16+
from pandas._config.config import _get_option
1617

1718
from pandas._libs import lib
1819
from pandas.compat._optional import import_optional_dependency
@@ -258,7 +259,7 @@ def read(
258259
elif using_pyarrow_string_dtype():
259260
to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()
260261

261-
manager = get_option("mode.data_manager")
262+
manager = _get_option("mode.data_manager", silent=True)
262263
if manager == "array":
263264
to_pandas_kwargs["split_blocks"] = True # type: ignore[assignment]
264265

pandas/tests/extension/conftest.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import pytest
44

5+
from pandas._config.config import _get_option
6+
57
from pandas import (
68
Series,
79
options,
@@ -212,4 +214,7 @@ def using_copy_on_write() -> bool:
212214
"""
213215
Fixture to check if Copy-on-Write is enabled.
214216
"""
215-
return options.mode.copy_on_write and options.mode.data_manager == "block"
217+
return (
218+
options.mode.copy_on_write
219+
and _get_option("mode.data_manager", silent=True) == "block"
220+
)

pandas/tests/internals/test_managers.py

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
"""
22
Testing interaction between the different managers (BlockManager, ArrayManager)
33
"""
4+
import os
5+
import subprocess
6+
import sys
7+
8+
import pytest
9+
410
from pandas.core.dtypes.missing import array_equivalent
511

612
import pandas as pd
@@ -14,12 +20,19 @@
1420

1521

1622
def test_dataframe_creation():
17-
with pd.option_context("mode.data_manager", "block"):
18-
df_block = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]})
23+
msg = "data_manager option is deprecated"
24+
with tm.assert_produces_warning(FutureWarning, match=msg):
25+
with pd.option_context("mode.data_manager", "block"):
26+
df_block = pd.DataFrame(
27+
{"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}
28+
)
1929
assert isinstance(df_block._mgr, BlockManager)
2030

21-
with pd.option_context("mode.data_manager", "array"):
22-
df_array = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]})
31+
with tm.assert_produces_warning(FutureWarning, match=msg):
32+
with pd.option_context("mode.data_manager", "array"):
33+
df_array = pd.DataFrame(
34+
{"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}
35+
)
2336
assert isinstance(df_array._mgr, ArrayManager)
2437

2538
# also ensure both are seen as equal
@@ -45,12 +58,15 @@ def test_dataframe_creation():
4558

4659

4760
def test_series_creation():
48-
with pd.option_context("mode.data_manager", "block"):
49-
s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
61+
msg = "data_manager option is deprecated"
62+
with tm.assert_produces_warning(FutureWarning, match=msg):
63+
with pd.option_context("mode.data_manager", "block"):
64+
s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
5065
assert isinstance(s_block._mgr, SingleBlockManager)
5166

52-
with pd.option_context("mode.data_manager", "array"):
53-
s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
67+
with tm.assert_produces_warning(FutureWarning, match=msg):
68+
with pd.option_context("mode.data_manager", "array"):
69+
s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
5470
assert isinstance(s_array._mgr, SingleArrayManager)
5571

5672
# also ensure both are seen as equal
@@ -68,3 +84,20 @@ def test_series_creation():
6884
result = s_array._as_manager("block")
6985
assert isinstance(result._mgr, SingleBlockManager)
7086
tm.assert_series_equal(result, s_array)
87+
88+
89+
@pytest.mark.single_cpu
90+
@pytest.mark.parametrize("manager", ["block", "array"])
91+
def test_array_manager_depr_env_var(manager):
92+
# GH#55043
93+
test_env = os.environ.copy()
94+
test_env["PANDAS_DATA_MANAGER"] = manager
95+
response = subprocess.run(
96+
[sys.executable, "-c", "import pandas"],
97+
capture_output=True,
98+
env=test_env,
99+
check=True,
100+
)
101+
msg = "FutureWarning: The env variable PANDAS_DATA_MANAGER is set"
102+
stderr_msg = response.stderr.decode("utf-8")
103+
assert msg in stderr_msg, stderr_msg

pandas/tests/io/test_parquet.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas._config import (
12-
get_option,
13-
using_copy_on_write,
14-
)
11+
from pandas._config import using_copy_on_write
12+
from pandas._config.config import _get_option
1513

1614
from pandas.compat import is_platform_windows
1715
from pandas.compat.pyarrow import (
@@ -61,7 +59,8 @@
6159
pytest.param(
6260
"fastparquet",
6361
marks=pytest.mark.skipif(
64-
not _HAVE_FASTPARQUET or get_option("mode.data_manager") == "array",
62+
not _HAVE_FASTPARQUET
63+
or _get_option("mode.data_manager", silent=True) == "array",
6564
reason="fastparquet is not installed or ArrayManager is used",
6665
),
6766
),
@@ -88,7 +87,7 @@ def pa():
8887
def fp():
8988
if not _HAVE_FASTPARQUET:
9089
pytest.skip("fastparquet is not installed")
91-
elif get_option("mode.data_manager") == "array":
90+
elif _get_option("mode.data_manager", silent=True) == "array":
9291
pytest.skip("ArrayManager is not supported with fastparquet")
9392
return "fastparquet"
9493

pandas/util/_test_decorators.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ def test_foo():
3838

3939
if TYPE_CHECKING:
4040
from pandas._typing import F
41+
42+
from pandas._config.config import _get_option
43+
4144
from pandas.compat import (
4245
IS64,
4346
is_platform_windows,
@@ -230,12 +233,12 @@ def mark_array_manager_not_yet_implemented(request) -> None:
230233

231234

232235
skip_array_manager_not_yet_implemented = pytest.mark.xfail(
233-
get_option("mode.data_manager") == "array",
236+
_get_option("mode.data_manager", silent=True) == "array",
234237
reason="Not yet implemented for ArrayManager",
235238
)
236239

237240
skip_array_manager_invalid_test = pytest.mark.skipif(
238-
get_option("mode.data_manager") == "array",
241+
_get_option("mode.data_manager", silent=True) == "array",
239242
reason="Test that relies on BlockManager internals or specific behaviour",
240243
)
241244

scripts/validate_unwanted_patterns.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
"_chained_assignment_msg",
5252
"_chained_assignment_method_msg",
5353
"_version_meson",
54+
# TODO(3.0): GH#55043 - remove upon removal of ArrayManager
55+
"_get_option",
5456
}
5557

5658

0 commit comments

Comments
 (0)