Skip to content

Commit 2104b71

Browse files
fix!: to_gbq uploads ArrowDtype(pa.timestamp(...) without timezone as DATETIME type (#832)
* fix!: `to_gbq` uploads `ArrowDtype(pa.timestamp(...)` without timezone as `DATETIME` type Release-As: 0.25.0 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 78aa01e commit 2104b71

File tree

2 files changed

+43
-2
lines changed

2 files changed

+43
-2
lines changed

pandas_gbq/schema/pyarrow_to_bigquery.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@
3838

3939

4040
def arrow_type_to_bigquery_field(name, type_) -> Optional[schema.SchemaField]:
41+
# Since both TIMESTAMP/DATETIME use pyarrow.timestamp(...), we need to use
42+
# a special case to disambiguate them. See:
43+
# https://github.com/googleapis/python-bigquery-pandas/issues/450
44+
if pyarrow.types.is_timestamp(type_):
45+
if type_.tz is None:
46+
return schema.SchemaField(name, "DATETIME")
47+
else:
48+
return schema.SchemaField(name, "TIMESTAMP")
49+
4150
detected_type = _ARROW_SCALAR_IDS_TO_BQ.get(type_.id, None)
4251
if detected_type is not None:
4352
return schema.SchemaField(name, detected_type)

tests/unit/schema/test_pyarrow_to_bigquery.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,53 @@
22
# Use of this source code is governed by a BSD-style
33
# license that can be found in the LICENSE file.
44

5+
from google.cloud import bigquery
56
import pyarrow
7+
import pytest
68

79
from pandas_gbq.schema import pyarrow_to_bigquery
810

911

12+
@pytest.mark.parametrize(
13+
(
14+
"pyarrow_type",
15+
"bigquery_type",
16+
),
17+
(
18+
# All integer types should map to BigQuery INT64 (or INTEGER since
19+
# SchemaField uses the legacy SQL names). See:
20+
# https://github.com/googleapis/python-bigquery-pandas/issues/616
21+
(pyarrow.int8(), "INTEGER"),
22+
(pyarrow.int16(), "INTEGER"),
23+
(pyarrow.int32(), "INTEGER"),
24+
(pyarrow.int64(), "INTEGER"),
25+
(pyarrow.uint8(), "INTEGER"),
26+
(pyarrow.uint16(), "INTEGER"),
27+
(pyarrow.uint32(), "INTEGER"),
28+
(pyarrow.uint64(), "INTEGER"),
29+
# If there is no associated timezone, assume a naive (timezone-less)
30+
# DATETIME. See:
31+
# https://github.com/googleapis/python-bigquery-pandas/issues/450
32+
(pyarrow.timestamp("ns"), "DATETIME"),
33+
(pyarrow.timestamp("ns", tz="UTC"), "TIMESTAMP"),
34+
),
35+
)
36+
def test_arrow_type_to_bigquery_field_scalar_types(pyarrow_type, bigquery_type):
37+
field: bigquery.SchemaField = pyarrow_to_bigquery.arrow_type_to_bigquery_field(
38+
"test_name", pyarrow_type
39+
)
40+
assert field.name == "test_name"
41+
assert field.field_type == bigquery_type
42+
43+
1044
def test_arrow_type_to_bigquery_field_unknown():
11-
# Default types should be picked at a higher layer.
1245
assert (
1346
pyarrow_to_bigquery.arrow_type_to_bigquery_field("test_name", pyarrow.null())
1447
is None
1548
)
1649

1750

1851
def test_arrow_type_to_bigquery_field_list_of_unknown():
19-
# Default types should be picked at a higher layer.
2052
assert (
2153
pyarrow_to_bigquery.arrow_type_to_bigquery_field(
2254
"test_name", pyarrow.list_(pyarrow.null())

0 commit comments

Comments
 (0)