DEPR, DOC: Deprecate buffer_lines in read_csv

gfyoung · gfyoung · commit a72ecbe84c1a · 2016-06-04T21:58:19.000+01:00
The 'buffer_lines' parameter is not even respected
in the implementation, as it is determined internally
to the C parser.

[ci skip]
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -176,6 +176,12 @@ low_memory : boolean, default ``True``
   Note that the entire file is read into a single DataFrame regardless,
   use the ``chunksize`` or ``iterator`` parameter to return the data in chunks.
   (Only valid with C parser)
+buffer_lines : int, default None
+    DEPRECATED: this argument will be removed in a future version because its
+    value is not respected by the parser
+
+    If ``low_memory`` is ``True``, specify the number of rows to be read for
+    each chunk. (Only valid with C parser)
 compact_ints : boolean, default False
   DEPRECATED: this argument will be removed in a future version
 
diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
@@ -294,6 +294,7 @@ Deprecations
 ^^^^^^^^^^^^
 
 - ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv`` and will be removed in a future version (:issue:`13320`)
+- ``buffer_lines`` has been deprecated in ``pd.read_csv`` and will be removed in a future version (:issue:`13360`)
 
 .. _whatsnew_0182.performance:
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -227,14 +227,19 @@
     Note that the entire file is read into a single DataFrame regardless,
     use the `chunksize` or `iterator` parameter to return the data in chunks.
     (Only valid with C parser)
+buffer_lines : int, default None
+    DEPRECATED: this argument will be removed in a future version because its
+    value is not respected by the parser
+
+    If low_memory is True, specify the number of rows to be read for each
+    chunk. (Only valid with C parser)
 compact_ints : boolean, default False
     DEPRECATED: this argument will be removed in a future version
 
     If compact_ints is True, then for any column that is of integer dtype,
     the parser will attempt to cast it as the smallest integer dtype possible,
     either signed or unsigned depending on the specification from the
     `use_unsigned` parameter.
-
 use_unsigned : boolean, default False
     DEPRECATED: this argument will be removed in a future version
 
@@ -448,6 +453,7 @@ def _read(filepath_or_buffer, kwds):
     'float_precision',
 ])
 _deprecated_args = set([
+    'buffer_lines',
     'compact_ints',
     'use_unsigned',
 ])
@@ -806,7 +812,8 @@ def _clean_options(self, options, engine):
         _validate_header_arg(options['header'])
 
         for arg in _deprecated_args:
-            if result[arg] != _c_parser_defaults[arg]:
+            parser_default = _c_parser_defaults[arg]
+            if result.get(arg, parser_default) != parser_default:
                 warnings.warn("The '{arg}' argument has been deprecated "
                               "and will be removed in a future version"
                               .format(arg=arg), FutureWarning, stacklevel=2)
diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py
@@ -72,14 +72,12 @@ def read_csv(self, *args, **kwds):
         kwds = kwds.copy()
         kwds['engine'] = self.engine
         kwds['low_memory'] = self.low_memory
-        kwds['buffer_lines'] = 2
         return read_csv(*args, **kwds)
 
     def read_table(self, *args, **kwds):
         kwds = kwds.copy()
         kwds['engine'] = self.engine
         kwds['low_memory'] = True
-        kwds['buffer_lines'] = 2
         return read_table(*args, **kwds)
 
 
diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py
@@ -124,6 +124,7 @@ def test_deprecated_args(self):
 
         # deprecated arguments with non-default values
         deprecated = {
+            'buffer_lines': True,
             'compact_ints': True,
             'use_unsigned': True,
         }
@@ -132,6 +133,10 @@ def test_deprecated_args(self):
 
         for engine in engines:
             for arg, non_default_val in deprecated.items():
+                if engine == 'python' and arg == 'buffer_lines':
+                    # unsupported --> exception is raised first
+                    continue
+
                 with tm.assert_produces_warning(
                         FutureWarning, check_stacklevel=False):
                     kwargs = {arg: non_default_val}