Skip to content

Commit dac3a8a

Browse files
authored
PERF: DataFrame.values (#43160)
1 parent 34926ff commit dac3a8a

File tree

2 files changed

+25
-32
lines changed

2 files changed

+25
-32
lines changed

pandas/core/frame.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,6 +1653,8 @@ def to_numpy(
16531653
[2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object)
16541654
"""
16551655
self._consolidate_inplace()
1656+
if dtype is not None:
1657+
dtype = np.dtype(dtype)
16561658
result = self._mgr.as_array(
16571659
transpose=self._AXIS_REVERSED, dtype=dtype, copy=copy, na_value=na_value
16581660
)

pandas/core/internals/managers.py

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,7 +1425,7 @@ def to_dict(self, copy: bool = True):
14251425
def as_array(
14261426
self,
14271427
transpose: bool = False,
1428-
dtype: npt.DTypeLike | None = None,
1428+
dtype: np.dtype | None = None,
14291429
copy: bool = False,
14301430
na_value=lib.no_default,
14311431
) -> np.ndarray:
@@ -1436,7 +1436,7 @@ def as_array(
14361436
----------
14371437
transpose : bool, default False
14381438
If True, transpose the return array.
1439-
dtype : object, default None
1439+
dtype : np.dtype or None, default None
14401440
Data type of the return array.
14411441
copy : bool, default False
14421442
If True then guarantee that a copy is returned. A value of
@@ -1465,15 +1465,7 @@ def as_array(
14651465
# error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no
14661466
# attribute "to_numpy"
14671467
arr = blk.values.to_numpy( # type: ignore[union-attr]
1468-
# pandas/core/internals/managers.py:1428: error: Argument "dtype" to
1469-
# "to_numpy" of "ExtensionArray" has incompatible type
1470-
# "Optional[Union[dtype[Any], None, type, _SupportsDType, str,
1471-
# Union[Tuple[Any, int], Tuple[Any, Union[SupportsIndex,
1472-
# Sequence[SupportsIndex]]], List[Any], _DTypeDict, Tuple[Any,
1473-
# Any]]]]"; expected "Optional[Union[ExtensionDtype, Union[str,
1474-
# dtype[Any]], Type[str], Type[float], Type[int], Type[complex],
1475-
# Type[bool], Type[object]]]"
1476-
dtype=dtype, # type: ignore[arg-type]
1468+
dtype=dtype,
14771469
na_value=na_value,
14781470
).reshape(blk.shape)
14791471
else:
@@ -1495,34 +1487,44 @@ def as_array(
14951487

14961488
def _interleave(
14971489
self,
1498-
dtype: npt.DTypeLike | ExtensionDtype | None = None,
1490+
dtype: np.dtype | None = None,
14991491
na_value=lib.no_default,
15001492
) -> np.ndarray:
15011493
"""
15021494
Return ndarray from blocks with specified item order
15031495
Items must be contained in the blocks
15041496
"""
15051497
if not dtype:
1506-
dtype = interleaved_dtype([blk.dtype for blk in self.blocks])
1498+
# Incompatible types in assignment (expression has type
1499+
# "Optional[Union[dtype[Any], ExtensionDtype]]", variable has
1500+
# type "Optional[dtype[Any]]")
1501+
dtype = interleaved_dtype( # type: ignore[assignment]
1502+
[blk.dtype for blk in self.blocks]
1503+
)
15071504

15081505
# TODO: https://github.com/pandas-dev/pandas/issues/22791
15091506
# Give EAs some input on what happens here. Sparse needs this.
15101507
if isinstance(dtype, SparseDtype):
15111508
dtype = dtype.subtype
1509+
dtype = cast(np.dtype, dtype)
15121510
elif isinstance(dtype, ExtensionDtype):
15131511
dtype = np.dtype("object")
15141512
elif is_dtype_equal(dtype, str):
15151513
dtype = np.dtype("object")
15161514

1517-
# error: Argument "dtype" to "empty" has incompatible type
1518-
# "Union[ExtensionDtype, str, dtype[Any], Type[object], None]"; expected
1519-
# "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int],
1520-
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict,
1521-
# Tuple[Any, Any]]]"
1522-
result = np.empty(self.shape, dtype=dtype) # type: ignore[arg-type]
1515+
result = np.empty(self.shape, dtype=dtype)
15231516

15241517
itemmask = np.zeros(self.shape[0])
15251518

1519+
if dtype == np.dtype("object") and na_value is lib.no_default:
1520+
# much more performant than using to_numpy below
1521+
for blk in self.blocks:
1522+
rl = blk.mgr_locs
1523+
arr = blk.get_values(dtype)
1524+
result[rl.indexer] = arr
1525+
itemmask[rl.indexer] = 1
1526+
return result
1527+
15261528
for blk in self.blocks:
15271529
rl = blk.mgr_locs
15281530
if blk.is_extension:
@@ -1531,22 +1533,11 @@ def _interleave(
15311533
# error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no
15321534
# attribute "to_numpy"
15331535
arr = blk.values.to_numpy( # type: ignore[union-attr]
1534-
# pandas/core/internals/managers.py:1485: error: Argument "dtype" to
1535-
# "to_numpy" of "ExtensionArray" has incompatible type
1536-
# "Union[dtype[Any], None, type, _SupportsDType, str, Tuple[Any,
1537-
# Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any],
1538-
# _DTypeDict, Tuple[Any, Any], ExtensionDtype]"; expected
1539-
# "Optional[Union[ExtensionDtype, Union[str, dtype[Any]], Type[str],
1540-
# Type[float], Type[int], Type[complex], Type[bool], Type[object]]]"
1541-
# [arg-type]
1542-
dtype=dtype, # type: ignore[arg-type]
1536+
dtype=dtype,
15431537
na_value=na_value,
15441538
)
15451539
else:
1546-
# error: Argument 1 to "get_values" of "Block" has incompatible type
1547-
# "Union[ExtensionDtype, str, dtype[Any], Type[object], None]"; expected
1548-
# "Union[dtype[Any], ExtensionDtype, None]"
1549-
arr = blk.get_values(dtype) # type: ignore[arg-type]
1540+
arr = blk.get_values(dtype)
15501541
result[rl.indexer] = arr
15511542
itemmask[rl.indexer] = 1
15521543

0 commit comments

Comments
 (0)