Skip to content

Commit f466f0f

Browse files
SQUASGED [ArrayManager] Add SingleArrayManager to back a Series
1 parent e742820 commit f466f0f

File tree

15 files changed

+278
-58
lines changed

15 files changed

+278
-58
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ jobs:
163163
pytest pandas/tests/resample/ --array-manager
164164
pytest pandas/tests/reshape/merge --array-manager
165165
166+
pytest pandas/tests/series/methods --array-manager
167+
pytest pandas/tests/series/test_* --array-manager
168+
166169
# indexing subset (temporary since other tests don't pass yet)
167170
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean --array-manager
168171
pytest pandas/tests/frame/indexing/test_where.py --array-manager

pandas/_libs/reduction.pyx

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,20 @@ cdef class _BaseGrouper:
5959
cached_typ = self.typ(
6060
vslider.buf, dtype=vslider.buf.dtype, index=cached_ityp, name=self.name
6161
)
62+
self.has_block = hasattr(cached_typ._mgr, "_block")
6263
else:
6364
# See the comment in indexes/base.py about _index_data.
6465
# We need this for EA-backed indexes that have a reference
6566
# to a 1-d ndarray like datetime / timedelta / period.
6667
object.__setattr__(cached_ityp, '_index_data', islider.buf)
6768
cached_ityp._engine.clear_mapping()
6869
cached_ityp._cache.clear() # e.g. inferred_freq must go
69-
object.__setattr__(cached_typ._mgr._block, 'values', vslider.buf)
70-
object.__setattr__(cached_typ._mgr._block, 'mgr_locs',
71-
slice(len(vslider.buf)))
70+
if self.has_block:
71+
object.__setattr__(cached_typ._mgr._block, 'values', vslider.buf)
72+
object.__setattr__(cached_typ._mgr._block, 'mgr_locs',
73+
slice(len(vslider.buf)))
74+
else:
75+
cached_typ._mgr.arrays[0] = vslider.buf
7276
object.__setattr__(cached_typ, '_index', cached_ityp)
7377
object.__setattr__(cached_typ, 'name', self.name)
7478

@@ -108,6 +112,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
108112
cdef public:
109113
ndarray arr, index, dummy_arr, dummy_index
110114
object values, f, bins, typ, ityp, name
115+
bint has_block
111116

112117
def __init__(self, object series, object f, object bins):
113118

@@ -201,6 +206,7 @@ cdef class SeriesGrouper(_BaseGrouper):
201206
cdef public:
202207
ndarray arr, index, dummy_arr, dummy_index
203208
object f, labels, values, typ, ityp, name
209+
bint has_block
204210

205211
def __init__(self, object series, object f, object labels,
206212
Py_ssize_t ngroups):

pandas/_typing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@
5858
from pandas.core.internals import (
5959
ArrayManager,
6060
BlockManager,
61+
SingleArrayManager,
62+
SingleBlockManager,
6163
)
6264
from pandas.core.resample import Resampler
6365
from pandas.core.series import Series
@@ -184,3 +186,4 @@
184186

185187
# internals
186188
Manager = Union["ArrayManager", "BlockManager"]
189+
SingleManager = Union["SingleArrayManager", "SingleBlockManager"]

pandas/core/construction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,6 @@ def sanitize_array(
481481
DataFrame constructor, as the dtype keyword there may be interpreted as only
482482
applying to a subset of columns, see GH#24435.
483483
"""
484-
485484
if isinstance(data, ma.MaskedArray):
486485
data = sanitize_masked_array(data)
487486

@@ -555,6 +554,7 @@ def sanitize_array(
555554
inferred = lib.infer_dtype(subarr, skipna=False)
556555
if inferred in {"interval", "period"}:
557556
subarr = array(subarr)
557+
subarr = extract_array(subarr, extract_numpy=True)
558558

559559
return subarr
560560

pandas/core/generic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@
138138
from pandas.core.internals import (
139139
ArrayManager,
140140
BlockManager,
141+
SingleArrayManager,
141142
)
142143
from pandas.core.internals.construction import mgr_to_mgr
143144
from pandas.core.missing import find_valid_index
@@ -5563,7 +5564,7 @@ def _protect_consolidate(self, f):
55635564
Consolidate _mgr -- if the blocks have changed, then clear the
55645565
cache
55655566
"""
5566-
if isinstance(self._mgr, ArrayManager):
5567+
if isinstance(self._mgr, (ArrayManager, SingleArrayManager)):
55675568
return f()
55685569
blocks_before = len(self._mgr.blocks)
55695570
result = f()

pandas/core/groupby/generic.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,16 +1152,18 @@ def py_fallback(values: ArrayLike) -> ArrayLike:
11521152
result = result._consolidate()
11531153
assert isinstance(result, (Series, DataFrame)) # for mypy
11541154
mgr = result._mgr
1155-
assert isinstance(mgr, BlockManager)
1156-
1157-
# unwrap DataFrame to get array
1158-
if len(mgr.blocks) != 1:
1159-
# We've split an object block! Everything we've assumed
1160-
# about a single block input returning a single block output
1161-
# is a lie. See eg GH-39329
1162-
return mgr.as_array()
1155+
if isinstance(mgr, BlockManager):
1156+
# unwrap DataFrame to get array
1157+
if len(mgr.blocks) != 1:
1158+
# We've split an object block! Everything we've assumed
1159+
# about a single block input returning a single block output
1160+
# is a lie. See eg GH-39329
1161+
return mgr.as_array()
1162+
else:
1163+
result = mgr.blocks[0].values
1164+
return result
11631165
else:
1164-
result = mgr.blocks[0].values
1166+
result = mgr.arrays[0]
11651167
return result
11661168

11671169
def array_func(values: ArrayLike) -> ArrayLike:

pandas/core/indexing.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1590,7 +1590,11 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
15901590

15911591
# if there is only one block/type, still have to take split path
15921592
# unless the block is one-dimensional or it can hold the value
1593-
if not take_split_path and self.obj._mgr.blocks and self.ndim > 1:
1593+
if (
1594+
not take_split_path
1595+
and getattr(self.obj._mgr, "blocks", False)
1596+
and self.ndim > 1
1597+
):
15941598
# in case of dict, keys are indices
15951599
val = list(value.values()) if isinstance(value, dict) else value
15961600
blk = self.obj._mgr.blocks[0]

pandas/core/internals/__init__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1-
from pandas.core.internals.array_manager import ArrayManager
2-
from pandas.core.internals.base import DataManager
1+
from pandas.core.internals.array_manager import (
2+
ArrayManager,
3+
SingleArrayManager,
4+
)
5+
from pandas.core.internals.base import (
6+
DataManager,
7+
SingleDataManager,
8+
)
39
from pandas.core.internals.blocks import ( # io.pytables, io.packers
410
Block,
511
CategoricalBlock,
@@ -34,7 +40,9 @@
3440
"DataManager",
3541
"ArrayManager",
3642
"BlockManager",
43+
"SingleDataManager",
3744
"SingleBlockManager",
45+
"SingleArrayManager",
3846
"concatenate_managers",
3947
# those two are preserved here for downstream compatibility (GH-33892)
4048
"create_block_manager_from_arrays",

0 commit comments

Comments
 (0)