-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: add Series.info #31796
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: add Series.info #31796
Changes from all commits
2b1e5fc
a4ad077
c7bfb94
01fd802
abbae9a
1a474fe
b30ce1b
6d8c765
99411e4
4651bd7
7de4703
99472fd
2902fe7
8b8adfa
c6d8a76
d0b2e1f
2225810
8c6c6f5
8afcb82
71260f3
127f84f
acae58f
9654198
c1006a7
27e45e1
3592e8e
af771e6
317a148
5082bc5
ae0065b
c36d4c4
751d346
631d914
23bd173
304f445
a2d6e43
f33f0df
05c9091
22de3c5
8a58bd6
9568d03
21d263c
cfa8039
3811545
6bcbef7
a245484
c04dabf
d9993ee
700801b
ad39d85
cad1391
a53033b
f0e2290
53e8c20
ee717c8
4d7a211
6eccf00
81d22eb
f2ca520
669ff38
6f8f8b1
97dc73c
0707f32
2a2324b
0c08335
c93f1ad
ddf9efc
21d94b2
a213d9c
089ce24
4581385
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
from abc import ABCMeta, abstractmethod | ||
import sys | ||
from typing import IO, TYPE_CHECKING, List, Optional, Tuple, Union | ||
from typing import IO, TYPE_CHECKING, List, NamedTuple, Optional, Tuple, Union, cast | ||
|
||
from pandas._config import get_option | ||
|
||
|
@@ -15,6 +15,32 @@ | |
from pandas.core.series import Series # noqa: F401 | ||
|
||
|
||
class CountConfigs(NamedTuple): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you generate this using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, can do, although I think this is the newer syntax, it's taken directly from https://docs.python.org/3/library/typing.html#typing.NamedTuple They also recommend it in the mypy docs: https://mypy.readthedocs.io/en/stable/kinds_of_types.html#named-tuples |
||
""" | ||
Configs with which to display counts. | ||
|
||
Attributes | ||
---------- | ||
counts : Series | ||
Non-null count of Series (or of each column of DataFrame). | ||
count_header : str | ||
Header that will be printed out above non-null counts in output. | ||
space_count : int | ||
Number of spaces that count_header should occupy | ||
(including space before `dtypes` column). | ||
len_count : int | ||
Length of count header. | ||
count_temp : str | ||
String that can be formatted to include non-null count. | ||
""" | ||
|
||
counts: "Series" | ||
count_header: str | ||
space_count: int | ||
len_count: int | ||
count_temp: str | ||
|
||
|
||
def _put_str(s: Union[str, Dtype], space: int) -> str: | ||
""" | ||
Make string of specified length, padding to the right if necessary. | ||
|
@@ -72,6 +98,134 @@ def _sizeof_fmt(num: Union[int, float], size_qualifier: str) -> str: | |
return f"{num:3.1f}{size_qualifier} PB" | ||
|
||
|
||
def _get_count_configs( | ||
counts: "Series", col_space: int, show_counts: bool, col_count: Optional[int] = None | ||
) -> CountConfigs: | ||
""" | ||
Get configs for displaying counts, depending on the value of `show_counts`. | ||
|
||
Parameters | ||
---------- | ||
counts : Series | ||
Non-null count of Series (or of each column of DataFrame). | ||
col_space : int | ||
How many space to leave between non-null count and dtype columns. | ||
show_counts : bool | ||
Whether to display non-null counts. | ||
col_count : int, optional | ||
Number of columns in DataFrame. | ||
|
||
Returns | ||
------- | ||
CountConfigs | ||
""" | ||
if show_counts: | ||
if col_count is not None and col_count != len(counts): # pragma: no cover | ||
raise AssertionError( | ||
f"Columns must equal counts ({col_count} != {len(counts)})" | ||
) | ||
count_header = "Non-Null Count" | ||
len_count = len(count_header) | ||
non_null = " non-null" | ||
max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null) | ||
space_count = max(len_count, max_count) + col_space | ||
count_temp = "{count}" + non_null | ||
else: | ||
count_header = "" | ||
space_count = len(count_header) | ||
len_count = space_count | ||
count_temp = "{count}" | ||
return CountConfigs(counts, count_header, space_count, len_count, count_temp) | ||
|
||
|
||
def _display_counts_and_dtypes( | ||
lines: List[str], | ||
ids: "Index", | ||
dtypes: "Series", | ||
show_counts: bool, | ||
count_configs: CountConfigs, | ||
space_dtype: int, | ||
space: int = 0, | ||
space_num: int = 0, | ||
) -> None: | ||
""" | ||
Append count and dtype of Series (or of each column of Frame) to `lines`. | ||
|
||
Parameters | ||
---------- | ||
lines : List[str] | ||
At this stage, this contains the main header and the info table headers. | ||
ids : Index | ||
Series name (or names of DataFrame columns). | ||
dtypes : Series | ||
Series dtype (or dtypes of DataFrame columns). | ||
show_counts : bool | ||
Whether to show non-null counts. | ||
count_configs: CountConfigs | ||
Configs with which to display counts. | ||
space_dtype : int | ||
Number of spaces that `dtypes` column should occupy. | ||
space : int = 0 | ||
Number of spaces that `Column` header should occupy | ||
(including space before `non-null count` column). | ||
space_num : int = 0 | ||
Number of spaces that ` # ` header should occupy (including space | ||
before `Column` column), only applicable for `DataFrame.info`. | ||
""" | ||
for i, col in enumerate(ids): | ||
dtype = dtypes[i] | ||
col = pprint_thing(col) | ||
|
||
line_no = _put_str(f" {i}", space_num) | ||
count = "" | ||
if show_counts: | ||
count = count_configs.counts[i] | ||
|
||
lines.append( | ||
line_no | ||
+ _put_str(col, space) | ||
+ _put_str( | ||
count_configs.count_temp.format(count=count), count_configs.space_count | ||
) | ||
+ _put_str(dtype, space_dtype) | ||
) | ||
|
||
|
||
def _get_header_and_spaces( | ||
dtypes: "Series", space_count: int, count_header: str, header: str = "" | ||
) -> Tuple[int, str, int]: | ||
""" | ||
Append extra columns (count and type) to header, if applicable. | ||
|
||
Parameters | ||
---------- | ||
dtypes : Series | ||
Series dtype (or dtypes of DataFrame columns). | ||
space_count : int | ||
Number of spaces that count_header should occupy | ||
(including space before `dtypes` column). | ||
count_header : str | ||
Header that will be printed out above non-null counts in output. | ||
header : str | ||
Current header. | ||
|
||
Returns | ||
------- | ||
space_dtype : int | ||
Number of spaces that `dtypes` column should occupy. | ||
header : str | ||
Header with extra columns (count and type) appended. | ||
len_dtype : int | ||
Length of dtype header. | ||
""" | ||
dtype_header = "Dtype" | ||
len_dtype = len(dtype_header) | ||
max_dtypes = max(len(pprint_thing(k)) for k in dtypes) | ||
space_dtype = max(len_dtype, max_dtypes) | ||
header += _put_str(count_header, space_count) + _put_str(dtype_header, space_dtype) | ||
return space_dtype, header, len_dtype | ||
|
||
|
||
class BaseInfo(metaclass=ABCMeta): | ||
def __init__( | ||
self, | ||
|
@@ -297,55 +451,68 @@ def _verbose_repr( | |
space_num = max(max_id, len_id) + col_space | ||
|
||
header = _put_str(id_head, space_num) + _put_str(column_head, space) | ||
if show_counts: | ||
counts = self.data.count() | ||
if col_count != len(counts): # pragma: no cover | ||
raise AssertionError( | ||
f"Columns must equal counts ({col_count} != {len(counts)})" | ||
) | ||
count_header = "Non-Null Count" | ||
len_count = len(count_header) | ||
non_null = " non-null" | ||
max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null) | ||
space_count = max(len_count, max_count) + col_space | ||
count_temp = "{count}" + non_null | ||
else: | ||
count_header = "" | ||
space_count = len(count_header) | ||
len_count = space_count | ||
count_temp = "{count}" | ||
|
||
dtype_header = "Dtype" | ||
len_dtype = len(dtype_header) | ||
max_dtypes = max(len(pprint_thing(k)) for k in dtypes) | ||
space_dtype = max(len_dtype, max_dtypes) | ||
header += _put_str(count_header, space_count) + _put_str( | ||
dtype_header, space_dtype | ||
counts = self.data.count() | ||
count_configs = _get_count_configs(counts, col_space, show_counts, col_count) | ||
|
||
space_dtype, header, len_dtype = _get_header_and_spaces( | ||
dtypes, count_configs.space_count, count_configs.count_header, header | ||
) | ||
|
||
lines.append(header) | ||
lines.append( | ||
_put_str("-" * len_id, space_num) | ||
+ _put_str("-" * len_column, space) | ||
+ _put_str("-" * len_count, space_count) | ||
+ _put_str("-" * count_configs.len_count, count_configs.space_count) | ||
+ _put_str("-" * len_dtype, space_dtype) | ||
) | ||
|
||
for i, col in enumerate(ids): | ||
dtype = dtypes[i] | ||
col = pprint_thing(col) | ||
_display_counts_and_dtypes( | ||
lines, | ||
ids, | ||
dtypes, | ||
show_counts, | ||
count_configs, | ||
space_dtype, | ||
space, | ||
space_num, | ||
) | ||
|
||
line_no = _put_str(f" {i}", space_num) | ||
count = "" | ||
if show_counts: | ||
count = counts[i] | ||
def _non_verbose_repr(self, lines: List[str], ids: "Index") -> None: | ||
lines.append(ids._summary(name="Columns")) | ||
|
||
lines.append( | ||
line_no | ||
+ _put_str(col, space) | ||
+ _put_str(count_temp.format(count=count), space_count) | ||
+ _put_str(dtype, space_dtype) | ||
) | ||
|
||
class SeriesInfo(BaseInfo): | ||
def _get_mem_usage(self, deep: bool) -> int: | ||
return self.data.memory_usage(index=True, deep=deep) | ||
|
||
def _get_ids_and_dtypes(self) -> Tuple["Index", "Series"]: | ||
ids = Index([self.data.name]) | ||
dtypes = cast("Series", self.data._constructor(self.data.dtypes)) | ||
return ids, dtypes | ||
|
||
def _verbose_repr( | ||
self, lines: List[str], ids: "Index", dtypes: "Series", show_counts: bool | ||
) -> None: | ||
lines.append(f"Series name: {self.data.name}") | ||
|
||
id_space = 2 | ||
|
||
counts = cast("Series", self.data._constructor(self.data.count())) | ||
count_configs = _get_count_configs(counts, id_space, show_counts) | ||
|
||
space_dtype, header, len_dtype = _get_header_and_spaces( | ||
dtypes, count_configs.space_count, count_configs.count_header | ||
) | ||
|
||
lines.append(header) | ||
lines.append( | ||
_put_str("-" * count_configs.len_count, count_configs.space_count) | ||
+ _put_str("-" * len_dtype, space_dtype) | ||
) | ||
|
||
_display_counts_and_dtypes( | ||
lines, ids, dtypes, show_counts, count_configs, space_dtype, | ||
) | ||
|
||
def _non_verbose_repr(self, lines: List[str], ids: "Index") -> None: | ||
lines.append(ids._summary(name="Columns")) | ||
pass |
Uh oh!
There was an error while loading. Please reload this page.