Skip to content

Commit f1995f8

Browse files
authored
BUG: use greater precision when serializing floating points (#336)
* BUG: use greater precision when serializing floating points This allows the exact binary representation to be transferred correctly, round-trip. * blacken * remove f-string * adjust string formatting
1 parent 94a93ca commit f1995f8

File tree

5 files changed

+86
-10
lines changed

5 files changed

+86
-10
lines changed

conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Shared pytest fixtures for system tests."""
1+
"""Shared pytest fixtures for `tests/system` and `samples/tests` tests."""
22

33
import os
44
import os.path

docs/source/changelog.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
Changelog
22
=========
33

4+
.. _changelog-0.14.1:
5+
6+
0.14.1 / TBD
7+
------------
8+
9+
Bug fixes
10+
~~~~~~~~~
11+
12+
- Encode floating point values with greater precision. (:issue:`326`)
13+
14+
415
.. _changelog-0.14.0:
516

617
0.14.0 / 2020-10-05

pandas_gbq/load.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def encode_chunk(dataframe):
1919
index=False,
2020
header=False,
2121
encoding="utf-8",
22-
float_format="%.15g",
22+
float_format="%.17g",
2323
date_format="%Y-%m-%d %H:%M:%S.%f",
2424
)
2525

tests/system/test_to_gbq.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import functools
2+
import pandas
3+
import pandas.testing
4+
5+
import pytest
6+
7+
8+
pytest.importorskip("google.cloud.bigquery", minversion="1.24.0")
9+
10+
11+
@pytest.fixture
12+
def method_under_test(credentials):
13+
import pandas_gbq
14+
15+
return functools.partial(pandas_gbq.to_gbq, credentials=credentials)
16+
17+
18+
def test_float_round_trip(
19+
method_under_test, random_dataset_id, bigquery_client
20+
):
21+
"""Ensure that 64-bit floating point numbers are unchanged.
22+
23+
See: https://github.com/pydata/pandas-gbq/issues/326
24+
"""
25+
26+
table_id = "{}.float_round_trip".format(random_dataset_id)
27+
input_floats = pandas.Series(
28+
[
29+
0.14285714285714285,
30+
0.4406779661016949,
31+
1.05148,
32+
1.05153,
33+
1.8571428571428572,
34+
2.718281828459045,
35+
3.141592653589793,
36+
2.0988936657440586e43,
37+
],
38+
name="float_col",
39+
)
40+
df = pandas.DataFrame({"float_col": input_floats})
41+
method_under_test(df, table_id)
42+
43+
round_trip = bigquery_client.list_rows(table_id).to_dataframe()
44+
round_trip_floats = round_trip["float_col"].sort_values()
45+
pandas.testing.assert_series_equal(
46+
round_trip_floats,
47+
input_floats,
48+
check_exact=True,
49+
)

tests/unit/test_load.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3+
import textwrap
34
from io import StringIO
45

56
import numpy
@@ -24,17 +25,32 @@ def test_encode_chunk_with_unicode():
2425

2526

2627
def test_encode_chunk_with_floats():
27-
"""Test that floats in a dataframe are encoded with at most 15 significant
28+
"""Test that floats in a dataframe are encoded with at most 17 significant
2829
figures.
2930
30-
See: https://github.com/pydata/pandas-gbq/issues/192
31+
See: https://github.com/pydata/pandas-gbq/issues/192 and
32+
https://github.com/pydata/pandas-gbq/issues/326
3133
"""
32-
input_csv = StringIO(u"01/01/17 23:00,1.05148,1.05153,1.05148,1.05153,4")
33-
df = pandas.read_csv(input_csv, header=None)
34-
csv_buffer = load.encode_chunk(df)
35-
csv_bytes = csv_buffer.read()
36-
csv_string = csv_bytes.decode("utf-8")
37-
assert "1.05153" in csv_string
34+
input_csv = textwrap.dedent(
35+
"""01/01/17 23:00,0.14285714285714285,4
36+
01/02/17 22:00,1.05148,3
37+
01/03/17 21:00,1.05153,2
38+
01/04/17 20:00,3.141592653589793,1
39+
01/05/17 19:00,2.0988936657440586e+43,0
40+
"""
41+
)
42+
input_df = pandas.read_csv(
43+
StringIO(input_csv), header=None, float_precision="round_trip"
44+
)
45+
csv_buffer = load.encode_chunk(input_df)
46+
round_trip = pandas.read_csv(
47+
csv_buffer, header=None, float_precision="round_trip"
48+
)
49+
pandas.testing.assert_frame_equal(
50+
round_trip,
51+
input_df,
52+
check_exact=True,
53+
)
3854

3955

4056
def test_encode_chunk_with_newlines():

0 commit comments

Comments
 (0)