Skip to content

Commit 234b042

Browse files
committed
ENH: pd.DataFrame.info() to show line numbers GH17304
1 parent 96f92eb commit 234b042

File tree

3 files changed

+23
-18
lines changed

3 files changed

+23
-18
lines changed

doc/source/whatsnew/v0.21.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ Other Enhancements
129129
- `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
130130
- :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
131131
- :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`)
132+
- :func:`DataFrame.info()` now shows line numbers for column summary (:issue:`17304`)
132133

133134

134135

pandas/core/frame.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1746,50 +1746,54 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
17461746
lines.append(self.index.summary())
17471747

17481748
if len(self.columns) == 0:
1749-
lines.append('Empty %s' % type(self).__name__)
1749+
lines.append('Empty {name}'.format(name=type(self).__name__))
17501750
_put_lines(buf, lines)
17511751
return
17521752

17531753
cols = self.columns
1754+
cols_count = len(cols)
17541755

17551756
# hack
17561757
if max_cols is None:
1757-
max_cols = get_option('display.max_info_columns',
1758-
len(self.columns) + 1)
1758+
max_cols = get_option('display.max_info_columns', cols_count + 1)
17591759

17601760
max_rows = get_option('display.max_info_rows', len(self) + 1)
17611761

17621762
if null_counts is None:
1763-
show_counts = ((len(self.columns) <= max_cols) and
1763+
show_counts = ((cols_count <= max_cols) and
17641764
(len(self) < max_rows))
17651765
else:
17661766
show_counts = null_counts
1767-
exceeds_info_cols = len(self.columns) > max_cols
1767+
exceeds_info_cols = cols_count > max_cols
17681768

17691769
def _verbose_repr():
1770-
lines.append('Data columns (total %d columns):' %
1771-
len(self.columns))
1772-
space = max([len(pprint_thing(k)) for k in self.columns]) + 4
1770+
lines.append('Data columns (total '
1771+
'{count} columns):'.format(count=cols_count))
1772+
space = max([len(pprint_thing(k)) for k in cols]) + 4
1773+
space_num = len(pprint_thing(cols_count)) + 2
17731774
counts = None
17741775

1775-
tmpl = "%s%s"
1776+
tmpl = '{count}{dtype}'
17761777
if show_counts:
17771778
counts = self.count()
17781779
if len(cols) != len(counts): # pragma: no cover
1779-
raise AssertionError('Columns must equal counts (%d != %d)'
1780-
% (len(cols), len(counts)))
1781-
tmpl = "%s non-null %s"
1780+
raise AssertionError(
1781+
'Columns must equal counts '
1782+
'({cols_count} != {count})'.format(
1783+
cols_count=cols_count, count=len(counts)))
1784+
tmpl = '{count} non-null {dtype}'
17821785

17831786
dtypes = self.dtypes
1784-
for i, col in enumerate(self.columns):
1787+
for i, col in enumerate(cols):
17851788
dtype = dtypes.iloc[i]
17861789
col = pprint_thing(col)
1787-
1788-
count = ""
1790+
line_no = '{num}. '.format(num=i + 1).rjust(space_num)
1791+
count = ''
17891792
if show_counts:
17901793
count = counts.iloc[i]
17911794

1792-
lines.append(_put_str(col, space) + tmpl % (count, dtype))
1795+
lines.append(line_no + _put_str(col, space) +
1796+
tmpl.format(count=count, dtype=dtype))
17931797

17941798
def _non_verbose_repr():
17951799
lines.append(self.columns.summary(name='Columns'))

pandas/tests/frame/test_repr_info.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self):
238238
frame.info(buf=io)
239239
io.seek(0)
240240
lines = io.readlines()
241-
assert 'a 1 non-null int64\n' == lines[3]
242-
assert 'a 1 non-null float64\n' == lines[4]
241+
assert '1. a 1 non-null int64\n' == lines[3]
242+
assert '2. a 1 non-null float64\n' == lines[4]
243243

244244
def test_info_shows_column_dtypes(self):
245245
dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]',

0 commit comments

Comments
 (0)