pandas-dev · jreback · Sep 15, 2018 · Sep 9, 2018 · WillAyd · Sep 11, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -734,7 +734,7 @@ I/O
 - :func:`read_html()` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
 - :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
 - :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`)
--
+- :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`)
 
 Plotting
 ^^^^^^^^

diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
@@ -614,7 +614,7 @@ def read(self, nrows=None):
         ns = (self.column_types == b's').sum()
 
         self._string_chunk = np.empty((ns, nrows), dtype=np.object)
-        self._byte_chunk = np.empty((nd, 8 * nrows), dtype=np.uint8)
+        self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8)
 
         self._current_row_in_chunk_index = 0
         p = Parser(self)

diff --git a/pandas/tests/io/sas/data/cars.sas7bdat b/pandas/tests/io/sas/data/cars.sas7bdat
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
@@ -183,6 +183,22 @@ def test_date_time(datapath):
     tm.assert_frame_equal(df, df0)
 
 
+def test_compact_numerical_values(datapath):
+    # Regression test for #21616
+    fname = datapath("io", "sas", "data", "cars.sas7bdat")
+    df = pd.read_sas(fname, encoding='latin-1')
+    # The two columns CYL and WGT in cars.sas7bdat have column
+    # width < 8 and only contain integral values.
+    # Test that pandas doesn't corrupt the numbers by adding
+    # decimals.
+    result = df['WGT']
+    expected = df['WGT'].round()
+    tm.assert_series_equal(result, expected, check_exact=True)
+    result = df['CYL']
+    expected = df['CYL'].round()
+    tm.assert_series_equal(result, expected, check_exact=True)
+
+
 def test_zero_variables(datapath):
     # Check if the SAS file has zero variables (PR #18184)
     fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")