Skip to content

Commit 14a1c80

Browse files
chris-b1jreback
authored andcommitted
BUG: float truncation in eval with py 2
closes #14241 Python 2 only - apparently `str()` rounds shorter than `repr()` ``` In [1]: f = 1000000000.006 In [2]: str(f) Out[2]: '1000000000.01' In [3]: repr(f) Out[3]: '1000000000.006' ``` Author: Chris <[email protected]> Closes #14255 from chris-b1/eval-float-cast and squashes the following commits: d679aa3 [Chris] actual lint fix 8b0cb57 [Chris] lint 87df38d [Chris] better tests c1e77b6 [Chris] lint fixup 05de4ae [Chris] fix rounding in pytables query b386184 [Chris] BUG: float trunc in eval with py 2
1 parent 3c96442 commit 14a1c80

File tree

5 files changed

+59
-2
lines changed

5 files changed

+59
-2
lines changed

doc/source/whatsnew/v0.19.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1568,7 +1568,7 @@ Bug Fixes
15681568
- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`)
15691569
- Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`)
15701570
- Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`)
1571-
1571+
- Bug in ``pd.eval()`` and ``HDFStore`` query truncating long float literals with python 2 (:issue:`14241`)
15721572
- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
15731573
- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
15741574
- Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment.

pandas/computation/ops.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,11 @@ def _resolve_name(self):
166166
def name(self):
167167
return self.value
168168

169+
def __unicode__(self):
170+
# in python 2 str() of float
171+
# can truncate shorter than repr()
172+
return repr(self.name)
173+
169174

170175
_bool_op_map = {'not': '~', 'and': '&', 'or': '|'}
171176

pandas/computation/pytables.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -611,10 +611,14 @@ def __init__(self, value, converted, kind):
611611
def tostring(self, encoding):
612612
""" quote the string if not encoded
613613
else encode and return """
614-
if self.kind == u('string'):
614+
if self.kind == u'string':
615615
if encoding is not None:
616616
return self.converted
617617
return '"%s"' % self.converted
618+
elif self.kind == u'float':
619+
# python 2 str(float) is not always
620+
# round-trippable so use repr()
621+
return repr(self.converted)
618622
return self.converted
619623

620624

pandas/computation/tests/test_eval.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,31 @@ def test_line_continuation(self):
678678
result = pd.eval(exp, engine=self.engine, parser=self.parser)
679679
self.assertEqual(result, 12)
680680

681+
def test_float_truncation(self):
682+
# GH 14241
683+
exp = '1000000000.006'
684+
result = pd.eval(exp, engine=self.engine, parser=self.parser)
685+
expected = np.float64(exp)
686+
self.assertEqual(result, expected)
687+
688+
df = pd.DataFrame({'A': [1000000000.0009,
689+
1000000000.0011,
690+
1000000000.0015]})
691+
cutoff = 1000000000.0006
692+
result = df.query("A < %.4f" % cutoff)
693+
self.assertTrue(result.empty)
694+
695+
cutoff = 1000000000.0010
696+
result = df.query("A > %.4f" % cutoff)
697+
expected = df.loc[[1, 2], :]
698+
tm.assert_frame_equal(expected, result)
699+
700+
exact = 1000000000.0011
701+
result = df.query('A == %.4f' % exact)
702+
expected = df.loc[[1], :]
703+
tm.assert_frame_equal(expected, result)
704+
705+
681706

682707
class TestEvalNumexprPython(TestEvalNumexprPandas):
683708

pandas/io/tests/test_pytables.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5002,6 +5002,29 @@ def test_read_from_py_localpath(self):
50025002

50035003
tm.assert_frame_equal(expected, actual)
50045004

5005+
def test_query_long_float_literal(self):
5006+
# GH 14241
5007+
df = pd.DataFrame({'A': [1000000000.0009,
5008+
1000000000.0011,
5009+
1000000000.0015]})
5010+
5011+
with ensure_clean_store(self.path) as store:
5012+
store.append('test', df, format='table', data_columns=True)
5013+
5014+
cutoff = 1000000000.0006
5015+
result = store.select('test', "A < %.4f" % cutoff)
5016+
self.assertTrue(result.empty)
5017+
5018+
cutoff = 1000000000.0010
5019+
result = store.select('test', "A > %.4f" % cutoff)
5020+
expected = df.loc[[1, 2], :]
5021+
tm.assert_frame_equal(expected, result)
5022+
5023+
exact = 1000000000.0011
5024+
result = store.select('test', 'A == %.4f' % exact)
5025+
expected = df.loc[[1], :]
5026+
tm.assert_frame_equal(expected, result)
5027+
50055028

50065029
class TestHDFComplexValues(Base):
50075030
# GH10447

0 commit comments

Comments
 (0)