pandas-dev · pratapvardhan · Aug 25, 2017 · Jul 9, 2018 · Jul 10, 2018 · jreback
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -66,6 +66,35 @@ Current Behavior:
 
     result
 
+.. _whatsnew_0240.enhancements.output_formatting:
+
+Output Formatting Enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- :func:`DataFrame.info` now shows line numbers for the columns summary (:issue:`17304`)
+
+.. ipython:: python
+
+    df = pd.DataFrame({
+            'int_col': [1, 2, 3, 4, 5],
+            'text_col': ['alpha', 'beta', 'gamma', 'delta', 'epsilon'],
+            'float_col': [0.0, 0.25, 0.5, 0.75, 1.0]})
+    df.info()
+
+Previous Behavior:
+
+.. code-block:: python
+
+    In [1]: df.info()
+    <class 'pandas.core.frame.DataFrame'>
+    RangeIndex: 5 entries, 0 to 4
+    Data columns (total 3 columns):
+    int_col      5 non-null int64
+    text_col     5 non-null object
+    float_col    5 non-null float64
+    dtypes: float64(1), int64(1), object(1)
+    memory usage: 200.0+ bytes
+
 .. _whatsnew_0240.enhancements.other:
 
 Other Enhancements

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2121,9 +2121,11 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 5 entries, 0 to 4
         Data columns (total 3 columns):
-        int_col      5 non-null int64
-        text_col     5 non-null object
-        float_col    5 non-null float64
+         #.  Column       Non-Null Count & Dtype
+        ---  ------       ----------------------
+         0   int_col      5 non-null int64
+         1   text_col     5 non-null object
+         2   float_col    5 non-null float64
         dtypes: float64(1), int64(1), object(1)
         memory usage: 200.0+ bytes
 
@@ -2161,19 +2163,23 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 1000000 entries, 0 to 999999
         Data columns (total 3 columns):
-        column_1    1000000 non-null object
-        column_2    1000000 non-null object
-        column_3    1000000 non-null object
+         #.  Column      Non-Null Count & Dtype
+        ---  ------      ----------------------
+         0   column_1    1000000 non-null object
+         1   column_2    1000000 non-null object
+         2   column_3    1000000 non-null object
         dtypes: object(3)
         memory usage: 22.9+ MB
 
         >>> df.info(memory_usage='deep')
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 1000000 entries, 0 to 999999
         Data columns (total 3 columns):
-        column_1    1000000 non-null object
-        column_2    1000000 non-null object
-        column_3    1000000 non-null object
+         #.  Column      Non-Null Count & Dtype
+        ---  ------      ----------------------
+         0   column_1    1000000 non-null object
+         1   column_2    1000000 non-null object
+         2   column_3    1000000 non-null object
         dtypes: object(3)
         memory usage: 188.8 MB
         """
@@ -2192,48 +2198,62 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
             return
 
         cols = self.columns
+        cols_count = len(cols)
 
         # hack
         if max_cols is None:
-            max_cols = get_option('display.max_info_columns',
-                                  len(self.columns) + 1)
+            max_cols = get_option('display.max_info_columns', cols_count + 1)
 
         max_rows = get_option('display.max_info_rows', len(self) + 1)
 
         if null_counts is None:
-            show_counts = ((len(self.columns) <= max_cols) and
+            show_counts = ((cols_count <= max_cols) and
                            (len(self) < max_rows))
         else:
             show_counts = null_counts
-        exceeds_info_cols = len(self.columns) > max_cols
+        exceeds_info_cols = cols_count > max_cols
 
         def _verbose_repr():
-            lines.append('Data columns (total %d columns):' %
-                         len(self.columns))
-            space = max(len(pprint_thing(k)) for k in self.columns) + 4
+            lines.append('Data columns (total '
+                         '{count} columns):'.format(count=cols_count))
+            space = max(len(pprint_thing(k)) for k in cols)
+            len_column = len(pprint_thing('Column'))
+            space = max(space, len_column) + 4
+            space_num = len(pprint_thing(cols_count))
+            len_id = len(pprint_thing(' #.'))
+            space_num = max(space_num, len_id) + 2
             counts = None
 
-            tmpl = "{count}{dtype}"
+            header = _put_str(' #.', space_num) + _put_str('Column', space)
             if show_counts:
                 counts = self.count()
                 if len(cols) != len(counts):  # pragma: no cover
                     raise AssertionError(
                         'Columns must equal counts '
-                        '({cols:d} != {counts:d})'.format(
-                            cols=len(cols), counts=len(counts)))
-                tmpl = "{count} non-null {dtype}"
-
+                        '({cols_count} != {count})'.format(
+                            cols_count=cols_count, count=len(counts)))
+                col_header = 'Non-Null Count & Dtype'
+                tmpl = '{count} non-null {dtype}'
+            else:
+                col_header = 'Dtype'
+                tmpl = '{count}{dtype}'
+            header += col_header
+
+            lines.append(header)
+            lines.append(_put_str('-' * len_id, space_num) +
+                         _put_str('-' * len_column, space) +
+                         '-' * len(pprint_thing(col_header)))
             dtypes = self.dtypes
-            for i, col in enumerate(self.columns):
+            for i, col in enumerate(cols):
                 dtype = dtypes.iloc[i]
                 col = pprint_thing(col)
-
-                count = ""
+                line_no = _put_str(' {num}'.format(num=i), space_num)
+                count = ''
                 if show_counts:
                     count = counts.iloc[i]
 
-                lines.append(_put_str(col, space) + tmpl.format(count=count,
-                                                                dtype=dtype))
+                lines.append(line_no + _put_str(col, space) +
+                             tmpl.format(count=count, dtype=dtype))
 
         def _non_verbose_repr():
             lines.append(self.columns._summary(name='Columns'))

diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py
@@ -217,13 +217,33 @@ def test_info_memory(self):
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 2 entries, 0 to 1
         Data columns (total 1 columns):
-        a    2 non-null int64
+         #.  Column    Non-Null Count & Dtype
+        ---  ------    ----------------------
+         0   a         2 non-null int64
         dtypes: int64(1)
         memory usage: {} bytes
         """.format(bytes))
 
         assert result == expected
 
+    def test_info_without_null_counts(self):
+        df = pd.DataFrame({'a': [1, 2]})
+        buf = StringIO()
+        df.info(buf=buf, null_counts=False)
+        buf.seek(0)
+        lines = buf.readlines()
+        result = ''.join(lines[:-1])
+        expected = textwrap.dedent('''\
+        <class 'pandas.core.frame.DataFrame'>
+        RangeIndex: 2 entries, 0 to 1
+        Data columns (total 1 columns):
+         #.  Column    Dtype
+        ---  ------    -----
+         0   a         int64
+        dtypes: int64(1)
+        ''')
+        assert result == expected
+
     def test_info_wide(self):
         from pandas import set_option, reset_option
         io = StringIO()
@@ -259,8 +279,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self):
         frame.info(buf=io)
         io.seek(0)
         lines = io.readlines()
-        assert 'a    1 non-null int64\n' == lines[3]
-        assert 'a    1 non-null float64\n' == lines[4]
+        assert ' 0   a         1 non-null int64\n' == lines[5]
+        assert ' 1   a         1 non-null float64\n' == lines[6]
 
     def test_info_shows_column_dtypes(self):
         dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]',
@@ -274,29 +294,29 @@ def test_info_shows_column_dtypes(self):
         df.info(buf=buf)
         res = buf.getvalue()
         for i, dtype in enumerate(dtypes):
-            name = '%d    %d non-null %s' % (i, n, dtype)
+            name = '%s         %d non-null %s' % (i, n, dtype)
+
             assert name in res
 
     def test_info_max_cols(self):
         df = DataFrame(np.random.randn(10, 5))
-        for len_, verbose in [(5, None), (5, False), (10, True)]:
+        for len_, verbose in [(5, None), (5, False), (12, True)]:
             # For verbose always      ^ setting  ^ summarize ^ full output
             with option_context('max_info_columns', 4):
                 buf = StringIO()
                 df.info(buf=buf, verbose=verbose)
                 res = buf.getvalue()
                 assert len(res.strip().split('\n')) == len_
 
-        for len_, verbose in [(10, None), (5, False), (10, True)]:
-
+        for len_, verbose in [(12, None), (5, False), (12, True)]:
             # max_cols no exceeded
             with option_context('max_info_columns', 5):
                 buf = StringIO()
                 df.info(buf=buf, verbose=verbose)
                 res = buf.getvalue()
                 assert len(res.strip().split('\n')) == len_
 
-        for len_, max_cols in [(10, 5), (5, 4)]:
+        for len_, max_cols in [(12, 5), (5, 4)]:
             # setting truncates
             with option_context('max_info_columns', 4):
                 buf = StringIO()