Skip to content

Commit ba8f01c

Browse files
pratapvardhanPratap Vardhan
authored and
Pratap Vardhan
committed
ENH: pd.DataFrame.info() to show line numbers GH17304
1 parent 384c9b3 commit ba8f01c

File tree

2 files changed

+29
-22
lines changed

2 files changed

+29
-22
lines changed

pandas/core/frame.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2192,48 +2192,54 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
21922192
return
21932193

21942194
cols = self.columns
2195+
cols_count = len(cols)
21952196

21962197
# hack
21972198
if max_cols is None:
2198-
max_cols = get_option('display.max_info_columns',
2199-
len(self.columns) + 1)
2199+
max_cols = get_option('display.max_info_columns', cols_count + 1)
22002200

22012201
max_rows = get_option('display.max_info_rows', len(self) + 1)
22022202

22032203
if null_counts is None:
2204-
show_counts = ((len(self.columns) <= max_cols) and
2204+
show_counts = ((cols_count <= max_cols) and
22052205
(len(self) < max_rows))
22062206
else:
22072207
show_counts = null_counts
2208-
exceeds_info_cols = len(self.columns) > max_cols
2208+
exceeds_info_cols = cols_count > max_cols
22092209

22102210
def _verbose_repr():
2211-
lines.append('Data columns (total %d columns):' %
2212-
len(self.columns))
2213-
space = max(len(pprint_thing(k)) for k in self.columns) + 4
2211+
lines.append('Data columns (total '
2212+
'{count} columns):'.format(count=cols_count))
2213+
space = max([len(pprint_thing(k)) for k in cols])
2214+
space = max(space, len(pprint_thing('Column'))) + 4
2215+
space_num = len(pprint_thing(cols_count))
2216+
space_num = max(space_num, len(pprint_thing('Index'))) + 2
22142217
counts = None
22152218

2216-
tmpl = "{count}{dtype}"
2219+
header = _put_str('Index', space_num) + _put_str('Column', space)
2220+
tmpl = '{count}{dtype}'
22172221
if show_counts:
22182222
counts = self.count()
22192223
if len(cols) != len(counts): # pragma: no cover
22202224
raise AssertionError(
22212225
'Columns must equal counts '
2222-
'({cols:d} != {counts:d})'.format(
2223-
cols=len(cols), counts=len(counts)))
2224-
tmpl = "{count} non-null {dtype}"
2226+
'({cols_count} != {count})'.format(
2227+
cols_count=cols_count, count=len(counts)))
2228+
header += 'Non-Null Count'
2229+
tmpl = '{count} non-null {dtype}'
22252230

2231+
lines.append(header)
22262232
dtypes = self.dtypes
2227-
for i, col in enumerate(self.columns):
2233+
for i, col in enumerate(cols):
22282234
dtype = dtypes.iloc[i]
22292235
col = pprint_thing(col)
2230-
2231-
count = ""
2236+
line_no = _put_str(' {num}'.format(num=i), space_num)
2237+
count = ''
22322238
if show_counts:
22332239
count = counts.iloc[i]
22342240

2235-
lines.append(_put_str(col, space) + tmpl.format(count=count,
2236-
dtype=dtype))
2241+
lines.append(line_no + _put_str(col, space) +
2242+
tmpl.format(count=count, dtype=dtype))
22372243

22382244
def _non_verbose_repr():
22392245
lines.append(self.columns._summary(name='Columns'))

pandas/tests/frame/test_repr_info.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self):
259259
frame.info(buf=io)
260260
io.seek(0)
261261
lines = io.readlines()
262-
assert 'a 1 non-null int64\n' == lines[3]
263-
assert 'a 1 non-null float64\n' == lines[4]
262+
assert ' 0 a 1 non-null int64\n' == lines[4]
263+
assert ' 1 a 1 non-null float64\n' == lines[5]
264264

265265
def test_info_shows_column_dtypes(self):
266266
dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]',
@@ -274,20 +274,21 @@ def test_info_shows_column_dtypes(self):
274274
df.info(buf=buf)
275275
res = buf.getvalue()
276276
for i, dtype in enumerate(dtypes):
277-
name = '%d %d non-null %s' % (i, n, dtype)
277+
name = '%s %d non-null %s' % (i, n, dtype)
278+
278279
assert name in res
279280

280281
def test_info_max_cols(self):
281282
df = DataFrame(np.random.randn(10, 5))
282-
for len_, verbose in [(5, None), (5, False), (10, True)]:
283+
for len_, verbose in [(5, None), (5, False), (11, True)]:
283284
# For verbose always ^ setting ^ summarize ^ full output
284285
with option_context('max_info_columns', 4):
285286
buf = StringIO()
286287
df.info(buf=buf, verbose=verbose)
287288
res = buf.getvalue()
288289
assert len(res.strip().split('\n')) == len_
289290

290-
for len_, verbose in [(10, None), (5, False), (10, True)]:
291+
for len_, verbose in [(11, None), (5, False), (11, True)]:
291292

292293
# max_cols no exceeded
293294
with option_context('max_info_columns', 5):
@@ -296,7 +297,7 @@ def test_info_max_cols(self):
296297
res = buf.getvalue()
297298
assert len(res.strip().split('\n')) == len_
298299

299-
for len_, max_cols in [(10, 5), (5, 4)]:
300+
for len_, max_cols in [(11, 5), (5, 4)]:
300301
# setting truncates
301302
with option_context('max_info_columns', 4):
302303
buf = StringIO()

0 commit comments

Comments
 (0)