BUG: float truncation in eval with py 2

chris-b1 · jreback · commit 14a1c80fd01d · 2016-09-22T06:18:57.000-04:00
closes #14241 Python 2 only - apparently `str()` rounds shorter than `repr()` ``` In [1]: f = 1000000000.006 In [2]: str(f) Out[2]: '1000000000.01' In [3]: repr(f) Out[3]: '1000000000.006' ``` Author: Chris <cbartak@gmail.com> Closes #14255 from chris-b1/eval-float-cast and squashes the following commits: d679aa3 [Chris] actual lint fix 8b0cb57 [Chris] lint 87df38d [Chris] better tests c1e77b6 [Chris] lint fixup 05de4ae [Chris] fix rounding in pytables query b386184 [Chris] BUG: float trunc in eval with py 2
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -1568,7 +1568,7 @@ Bug Fixes
 - Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`)
 - Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`)
 - Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`)
-
+- Bug in ``pd.eval()`` and ``HDFStore`` query truncating long float literals with python 2 (:issue:`14241`)
 - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
 - Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
 - Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment.
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
@@ -166,6 +166,11 @@ def _resolve_name(self):
     def name(self):
         return self.value
 
+    def __unicode__(self):
+        # in python 2 str() of float
+        # can truncate shorter than repr()
+        return repr(self.name)
+
 
 _bool_op_map = {'not': '~', 'and': '&', 'or': '|'}
 
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
@@ -611,10 +611,14 @@ def __init__(self, value, converted, kind):
     def tostring(self, encoding):
         """ quote the string if not encoded
             else encode and return """
-        if self.kind == u('string'):
+        if self.kind == u'string':
             if encoding is not None:
                 return self.converted
             return '"%s"' % self.converted
+        elif self.kind == u'float':
+            # python 2 str(float) is not always
+            # round-trippable so use repr()
+            return repr(self.converted)
         return self.converted
 
 
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
@@ -678,6 +678,31 @@ def test_line_continuation(self):
         result = pd.eval(exp, engine=self.engine, parser=self.parser)
         self.assertEqual(result, 12)
 
+    def test_float_truncation(self):
+        # GH 14241
+        exp = '1000000000.006'
+        result = pd.eval(exp, engine=self.engine, parser=self.parser)
+        expected = np.float64(exp)
+        self.assertEqual(result, expected)
+
+        df = pd.DataFrame({'A': [1000000000.0009,
+                                 1000000000.0011,
+                                 1000000000.0015]})
+        cutoff = 1000000000.0006
+        result = df.query("A < %.4f" % cutoff)
+        self.assertTrue(result.empty)
+
+        cutoff = 1000000000.0010
+        result = df.query("A > %.4f" % cutoff)
+        expected = df.loc[[1, 2], :]
+        tm.assert_frame_equal(expected, result)
+
+        exact = 1000000000.0011
+        result = df.query('A == %.4f' % exact)
+        expected = df.loc[[1], :]
+        tm.assert_frame_equal(expected, result)
+
+
 
 class TestEvalNumexprPython(TestEvalNumexprPandas):
 
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
@@ -5002,6 +5002,29 @@ def test_read_from_py_localpath(self):
 
         tm.assert_frame_equal(expected, actual)
 
+    def test_query_long_float_literal(self):
+        # GH 14241
+        df = pd.DataFrame({'A': [1000000000.0009,
+                                 1000000000.0011,
+                                 1000000000.0015]})
+
+        with ensure_clean_store(self.path) as store:
+            store.append('test', df, format='table', data_columns=True)
+
+            cutoff = 1000000000.0006
+            result = store.select('test', "A < %.4f" % cutoff)
+            self.assertTrue(result.empty)
+
+            cutoff = 1000000000.0010
+            result = store.select('test', "A > %.4f" % cutoff)
+            expected = df.loc[[1, 2], :]
+            tm.assert_frame_equal(expected, result)
+
+            exact = 1000000000.0011
+            result = store.select('test', 'A == %.4f' % exact)
+            expected = df.loc[[1], :]
+            tm.assert_frame_equal(expected, result)
+
 
 class TestHDFComplexValues(Base):
     # GH10447