Skip to content

Commit ff70d33

Browse files
author
Pratap Vardhan
committed
df.info column summary line numbers with header separator
1 parent ba8f01c commit ff70d33

File tree

3 files changed

+66
-22
lines changed

3 files changed

+66
-22
lines changed

doc/source/whatsnew/v0.24.0.txt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,35 @@ Current Behavior:
6666

6767
result
6868

69+
.. _whatsnew_0240.enhancements.output_formatting:
70+
71+
Output Formatting Enhancements
72+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
73+
74+
- `df.info()` now shows line numbers for the columns summary (:issue:`17304`)
75+
76+
.. ipython:: python
77+
78+
df = pd.DataFrame({
79+
'int_col': [1, 2, 3, 4, 5],
80+
'text_col': ['alpha', 'beta', 'gamma', 'delta', 'epsilon'],
81+
'float_col': [0.0, 0.25, 0.5, 0.75, 1.0]})
82+
df.info()
83+
84+
Previous Behavior:
85+
86+
.. code-block:: python
87+
88+
In [1]: df.info()
89+
<class 'pandas.core.frame.DataFrame'>
90+
RangeIndex: 5 entries, 0 to 4
91+
Data columns (total 3 columns):
92+
int_col 5 non-null int64
93+
text_col 5 non-null object
94+
float_col 5 non-null float64
95+
dtypes: float64(1), int64(1), object(1)
96+
memory usage: 200.0+ bytes
97+
6998
.. _whatsnew_0240.enhancements.other:
7099

71100
Other Enhancements

pandas/core/frame.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2121,9 +2121,11 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
21212121
<class 'pandas.core.frame.DataFrame'>
21222122
RangeIndex: 5 entries, 0 to 4
21232123
Data columns (total 3 columns):
2124-
int_col 5 non-null int64
2125-
text_col 5 non-null object
2126-
float_col 5 non-null float64
2124+
#. Column Non-Null Count
2125+
--- ------ --------------
2126+
0 int_col 5 non-null int64
2127+
1 text_col 5 non-null object
2128+
2 float_col 5 non-null float64
21272129
dtypes: float64(1), int64(1), object(1)
21282130
memory usage: 200.0+ bytes
21292131
@@ -2161,19 +2163,23 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
21612163
<class 'pandas.core.frame.DataFrame'>
21622164
RangeIndex: 1000000 entries, 0 to 999999
21632165
Data columns (total 3 columns):
2164-
column_1 1000000 non-null object
2165-
column_2 1000000 non-null object
2166-
column_3 1000000 non-null object
2166+
#. Column Non-Null Count
2167+
--- ------ --------------
2168+
0 column_1 1000000 non-null object
2169+
1 column_2 1000000 non-null object
2170+
2 column_3 1000000 non-null object
21672171
dtypes: object(3)
21682172
memory usage: 22.9+ MB
21692173
21702174
>>> df.info(memory_usage='deep')
21712175
<class 'pandas.core.frame.DataFrame'>
21722176
RangeIndex: 1000000 entries, 0 to 999999
21732177
Data columns (total 3 columns):
2174-
column_1 1000000 non-null object
2175-
column_2 1000000 non-null object
2176-
column_3 1000000 non-null object
2178+
#. Column Non-Null Count
2179+
--- ------ --------------
2180+
0 column_1 1000000 non-null object
2181+
1 column_2 1000000 non-null object
2182+
2 column_3 1000000 non-null object
21772183
dtypes: object(3)
21782184
memory usage: 188.8 MB
21792185
"""
@@ -2210,25 +2216,33 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
22102216
def _verbose_repr():
22112217
lines.append('Data columns (total '
22122218
'{count} columns):'.format(count=cols_count))
2213-
space = max([len(pprint_thing(k)) for k in cols])
2214-
space = max(space, len(pprint_thing('Column'))) + 4
2219+
space = max(len(pprint_thing(k)) for k in cols)
2220+
len_column = len(pprint_thing('Column'))
2221+
space = max(space, len_column) + 4
22152222
space_num = len(pprint_thing(cols_count))
2216-
space_num = max(space_num, len(pprint_thing('Index'))) + 2
2223+
len_id = len(pprint_thing(' #.'))
2224+
space_num = max(space_num, len_id) + 2
22172225
counts = None
22182226

2219-
header = _put_str('Index', space_num) + _put_str('Column', space)
2220-
tmpl = '{count}{dtype}'
2227+
header = _put_str(' #.', space_num) + _put_str('Column', space)
22212228
if show_counts:
22222229
counts = self.count()
22232230
if len(cols) != len(counts): # pragma: no cover
22242231
raise AssertionError(
22252232
'Columns must equal counts '
22262233
'({cols_count} != {count})'.format(
22272234
cols_count=cols_count, count=len(counts)))
2228-
header += 'Non-Null Count'
2235+
col_header = 'Non-Null Count'
22292236
tmpl = '{count} non-null {dtype}'
2237+
else:
2238+
col_header = 'dtype'
2239+
tmpl = '{count}{dtype}'
2240+
header += col_header
22302241

22312242
lines.append(header)
2243+
lines.append(_put_str('-' * len_id, space_num) +
2244+
_put_str('-' * len_column, space) +
2245+
'-' * len(pprint_thing(col_header)))
22322246
dtypes = self.dtypes
22332247
for i, col in enumerate(cols):
22342248
dtype = dtypes.iloc[i]

pandas/tests/frame/test_repr_info.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,9 @@ def test_info_memory(self):
217217
<class 'pandas.core.frame.DataFrame'>
218218
RangeIndex: 2 entries, 0 to 1
219219
Data columns (total 1 columns):
220-
a 2 non-null int64
220+
#. Column Non-Null Count
221+
--- ------ --------------
222+
0 a 2 non-null int64
221223
dtypes: int64(1)
222224
memory usage: {} bytes
223225
""".format(bytes))
@@ -259,8 +261,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self):
259261
frame.info(buf=io)
260262
io.seek(0)
261263
lines = io.readlines()
262-
assert ' 0 a 1 non-null int64\n' == lines[4]
263-
assert ' 1 a 1 non-null float64\n' == lines[5]
264+
assert ' 0 a 1 non-null int64\n' == lines[5]
265+
assert ' 1 a 1 non-null float64\n' == lines[6]
264266

265267
def test_info_shows_column_dtypes(self):
266268
dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]',
@@ -280,24 +282,23 @@ def test_info_shows_column_dtypes(self):
280282

281283
def test_info_max_cols(self):
282284
df = DataFrame(np.random.randn(10, 5))
283-
for len_, verbose in [(5, None), (5, False), (11, True)]:
285+
for len_, verbose in [(5, None), (5, False), (12, True)]:
284286
# For verbose always ^ setting ^ summarize ^ full output
285287
with option_context('max_info_columns', 4):
286288
buf = StringIO()
287289
df.info(buf=buf, verbose=verbose)
288290
res = buf.getvalue()
289291
assert len(res.strip().split('\n')) == len_
290292

291-
for len_, verbose in [(11, None), (5, False), (11, True)]:
292-
293+
for len_, verbose in [(12, None), (5, False), (12, True)]:
293294
# max_cols no exceeded
294295
with option_context('max_info_columns', 5):
295296
buf = StringIO()
296297
df.info(buf=buf, verbose=verbose)
297298
res = buf.getvalue()
298299
assert len(res.strip().split('\n')) == len_
299300

300-
for len_, max_cols in [(11, 5), (5, 4)]:
301+
for len_, max_cols in [(12, 5), (5, 4)]:
301302
# setting truncates
302303
with option_context('max_info_columns', 4):
303304
buf = StringIO()

0 commit comments

Comments
 (0)