Skip to content

Commit 933d470

Browse files
committed
fix: read out-of-bounds DATETIME values such as 0001-01-01 00:00:00
deps: require google-cloud-bigquery 1.26.1 or later
1 parent 928e47b commit 933d470

File tree

5 files changed

+66
-11
lines changed

5 files changed

+66
-11
lines changed

ci/requirements-3.7-0.24.2.conda

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
codecov
22
coverage
3-
db-dtypes==0.3.0
3+
db-dtypes==0.3.1
44
fastavro
55
flake8
66
numpy==1.16.6
7-
google-cloud-bigquery==1.11.1
7+
google-cloud-bigquery==1.26.1
8+
google-cloud-bigquery-storage==1.1.0
89
pyarrow==3.0.0
910
pydata-google-auth
1011
pytest

pandas_gbq/load.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,13 @@ def cast_dataframe_for_parquet(
9494
# .astype() with DateDtype. With .astype(), I get the error:
9595
#
9696
# TypeError: Cannot interpret '<db_dtypes.DateDtype ...>' as a data type
97-
cast_column = pandas.Series(
98-
dataframe[column_name], dtype=db_dtypes.DateDtype()
97+
cast_column = dataframe[column_name].astype(
98+
dtype=db_dtypes.DateDtype(),
99+
# Return the original column if there was an error converting
100+
# to the dtype, such as is there is a date outside the
101+
# supported range.
102+
# https://github.com/googleapis/python-bigquery-pandas/issues/441
103+
errors="ignore",
99104
)
100105
elif column_type in {"NUMERIC", "DECIMAL", "BIGNUMERIC", "BIGDECIMAL"}:
101106
cast_column = dataframe[column_name].map(decimal.Decimal)

setup.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,17 @@
2323
release_status = "Development Status :: 4 - Beta"
2424
dependencies = [
2525
"setuptools",
26-
"db-dtypes >=0.3.0,<2.0.0",
27-
"numpy>=1.16.6",
28-
"pandas>=0.24.2",
26+
"db-dtypes >=0.3.1,<2.0.0",
27+
"numpy >=1.16.6",
28+
"pandas >=0.24.2",
2929
"pyarrow >=3.0.0, <7.0dev",
3030
"pydata-google-auth",
3131
"google-auth",
3232
"google-auth-oauthlib",
3333
# 2.4.* has a bug where waiting for the query can hang indefinitely.
3434
# https://github.com/pydata/pandas-gbq/issues/343
35-
"google-cloud-bigquery[bqstorage,pandas]>=1.11.1,<3.0.0dev,!=2.4.*",
35+
"google-cloud-bigquery >=1.26.1,<3.0.0dev,!=2.4.*",
36+
"google-cloud-bigquery-storage >=1.1.0,<3.0.0dev",
3637
]
3738
extras = {
3839
"tqdm": "tqdm>=4.23.0",

testing/constraints-3.7.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
#
66
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
77
# Then this file should have foo==1.14.0
8-
db-dtypes==0.3.0
9-
google-auth==1.4.1
8+
db-dtypes==0.3.1
9+
google-auth==1.18.0
1010
google-auth-oauthlib==0.0.1
11-
google-cloud-bigquery==1.11.1
11+
google-cloud-bigquery==1.26.1
1212
google-cloud-bigquery-storage==1.1.0
1313
numpy==1.16.6
1414
pandas==0.24.2

tests/system/test_to_gbq.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,54 @@ def test_series_round_trip(
188188
{"name": "num_col", "type": "NUMERIC"},
189189
],
190190
),
191+
pytest.param(
192+
*DataFrameRoundTripTestCase(
193+
input_df=pandas.DataFrame(
194+
{
195+
"row_num": [1, 2, 3],
196+
# DATE valuess outside the pandas range for timestamp
197+
# aren't supported by the db-dtypes package.
198+
# https://github.com/googleapis/python-bigquery-pandas/issues/441
199+
"date_col": [
200+
datetime.date(1, 1, 1),
201+
datetime.date(1970, 1, 1),
202+
datetime.date(9999, 12, 31),
203+
],
204+
# DATETIME values outside of the range for pandas timestamp
205+
# require `date_as_object` parameter in
206+
# google-cloud-bigquery versions 1.x and 2.x.
207+
# https://github.com/googleapis/python-bigquery-pandas/issues/365
208+
"datetime_col": [
209+
datetime.datetime(1, 1, 1),
210+
datetime.datetime(1970, 1, 1),
211+
datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
212+
],
213+
"timestamp_col": [
214+
datetime.datetime(1, 1, 1, tzinfo=datetime.timezone.utc),
215+
datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc),
216+
datetime.datetime(
217+
9999,
218+
12,
219+
31,
220+
23,
221+
59,
222+
59,
223+
999999,
224+
tzinfo=datetime.timezone.utc,
225+
),
226+
],
227+
},
228+
columns=["row_num", "date_col", "datetime_col", "timestamp_col"],
229+
),
230+
table_schema=[
231+
{"name": "row_num", "type": "INTEGER"},
232+
{"name": "date_col", "type": "DATE"},
233+
{"name": "datetime_col", "type": "DATETIME"},
234+
{"name": "timestamp_col", "type": "TIMESTAMP"},
235+
],
236+
),
237+
id="issue365-extreme-datetimes",
238+
),
191239
]
192240

193241

0 commit comments

Comments
 (0)