Skip to content

Commit 0e3e3f0

Browse files
authored
BUG: update library to support google-cloud-bigquery 2.0 (#334)
* BUG: update library to support google-cloud-bigquery 2.0 Removes references to BQ Storage API beta endpoint. * fix unit test mocks * add date to changelog
1 parent 46c579a commit 0e3e3f0

File tree

8 files changed

+85
-56
lines changed

8 files changed

+85
-56
lines changed

conftest.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,12 @@ def bigquery_client(project_id, private_key_path):
5656
@pytest.fixture()
5757
def random_dataset_id(bigquery_client):
5858
import google.api_core.exceptions
59+
from google.cloud import bigquery
5960

6061
dataset_id = "".join(["pandas_gbq_", str(uuid.uuid4()).replace("-", "_")])
61-
dataset_ref = bigquery_client.dataset(dataset_id)
62+
dataset_ref = bigquery.DatasetReference(
63+
bigquery_client.project, dataset_id
64+
)
6265
yield dataset_id
6366
try:
6467
bigquery_client.delete_dataset(dataset_ref, delete_contents=True)

docs/source/changelog.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ Changelog
33

44
.. _changelog-0.14.0:
55

6-
0.14.0 / TBD
7-
------------
6+
0.14.0 / 2020-10-05
7+
-------------------
88

99
- Add ``dtypes`` argument to ``read_gbq``. Use this argument to override the
1010
default ``dtype`` for a particular column in the query results. For
@@ -22,6 +22,7 @@ Changelog
2222
Dependency updates
2323
~~~~~~~~~~~~~~~~~~
2424

25+
- Support ``google-cloud-bigquery-storage`` 2.0 and higher. (:issue:`329`)
2526
- Update the minimum version of ``pandas`` to 0.20.1.
2627
(:issue:`331`)
2728

pandas_gbq/exceptions.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,12 @@ class InvalidPrivateKeyFormat(ValueError):
1212
"""
1313

1414
pass
15+
16+
17+
class PerformanceWarning(RuntimeWarning):
18+
"""
19+
Raised when a performance-related feature is requested, but unsupported.
20+
21+
Such warnings can occur when dependencies for the requested feature
22+
aren't up-to-date.
23+
"""

pandas_gbq/gbq.py

Lines changed: 35 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,8 @@
1414
bigquery = None
1515
google_exceptions = None
1616

17-
try:
18-
# The BigQuery Storage API client is an optional dependency. It is only
19-
# required when use_bqstorage_api=True.
20-
from google.cloud import bigquery_storage_v1beta1
21-
except ImportError: # pragma: NO COVER
22-
bigquery_storage_v1beta1 = None
23-
2417
from pandas_gbq.exceptions import AccessDenied
18+
from pandas_gbq.exceptions import PerformanceWarning
2519
import pandas_gbq.schema
2620
import pandas_gbq.timestamp
2721

@@ -30,7 +24,9 @@
3024

3125
BIGQUERY_INSTALLED_VERSION = None
3226
BIGQUERY_CLIENT_INFO_VERSION = "1.12.0"
27+
BIGQUERY_BQSTORAGE_VERSION = "1.24.0"
3328
HAS_CLIENT_INFO = False
29+
HAS_BQSTORAGE_SUPPORT = False
3430

3531
try:
3632
import tqdm # noqa
@@ -39,26 +35,32 @@
3935

4036

4137
def _check_google_client_version():
42-
global BIGQUERY_INSTALLED_VERSION, HAS_CLIENT_INFO, SHOW_VERBOSE_DEPRECATION
38+
global BIGQUERY_INSTALLED_VERSION, HAS_CLIENT_INFO, HAS_BQSTORAGE_SUPPORT, SHOW_VERBOSE_DEPRECATION
4339

4440
try:
4541
import pkg_resources
4642

4743
except ImportError:
4844
raise ImportError("Could not import pkg_resources (setuptools).")
4945

50-
# https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/bigquery/CHANGELOG.md
46+
# https://github.com/googleapis/python-bigquery/blob/master/CHANGELOG.md
5147
bigquery_minimum_version = pkg_resources.parse_version("1.11.0")
5248
bigquery_client_info_version = pkg_resources.parse_version(
5349
BIGQUERY_CLIENT_INFO_VERSION
5450
)
51+
bigquery_bqstorage_version = pkg_resources.parse_version(
52+
BIGQUERY_BQSTORAGE_VERSION
53+
)
5554
BIGQUERY_INSTALLED_VERSION = pkg_resources.get_distribution(
5655
"google-cloud-bigquery"
5756
).parsed_version
5857

5958
HAS_CLIENT_INFO = (
6059
BIGQUERY_INSTALLED_VERSION >= bigquery_client_info_version
6160
)
61+
HAS_BQSTORAGE_SUPPORT = (
62+
BIGQUERY_INSTALLED_VERSION >= bigquery_bqstorage_version
63+
)
6264

6365
if BIGQUERY_INSTALLED_VERSION < bigquery_minimum_version:
6466
raise ImportError(
@@ -548,14 +550,30 @@ def _download_results(
548550
if user_dtypes is None:
549551
user_dtypes = {}
550552

551-
try:
552-
bqstorage_client = None
553-
if max_results is None:
554-
# Only use the BigQuery Storage API if the full result set is requested.
555-
bqstorage_client = _make_bqstorage_client(
556-
self.use_bqstorage_api, self.credentials
557-
)
553+
if self.use_bqstorage_api and not HAS_BQSTORAGE_SUPPORT:
554+
warnings.warn(
555+
(
556+
"use_bqstorage_api was set, but have google-cloud-bigquery "
557+
"version {}. Requires google-cloud-bigquery version "
558+
"{} or later."
559+
).format(
560+
BIGQUERY_INSTALLED_VERSION, BIGQUERY_BQSTORAGE_VERSION
561+
),
562+
PerformanceWarning,
563+
stacklevel=4,
564+
)
565+
566+
create_bqstorage_client = self.use_bqstorage_api
567+
if max_results is not None:
568+
create_bqstorage_client = False
569+
570+
to_dataframe_kwargs = {}
571+
if HAS_BQSTORAGE_SUPPORT:
572+
to_dataframe_kwargs[
573+
"create_bqstorage_client"
574+
] = create_bqstorage_client
558575

576+
try:
559577
query_job.result()
560578
# Get the table schema, so that we can list rows.
561579
destination = self.client.get_table(query_job.destination)
@@ -568,16 +586,11 @@ def _download_results(
568586
conversion_dtypes.update(user_dtypes)
569587
df = rows_iter.to_dataframe(
570588
dtypes=conversion_dtypes,
571-
bqstorage_client=bqstorage_client,
572589
progress_bar_type=progress_bar_type,
590+
**to_dataframe_kwargs
573591
)
574592
except self.http_error as ex:
575593
self.process_http_error(ex)
576-
finally:
577-
if bqstorage_client:
578-
# Clean up open socket resources. See:
579-
# https://github.com/pydata/pandas-gbq/issues/294
580-
bqstorage_client.transport.channel.close()
581594

582595
if df.empty:
583596
df = _cast_empty_df_dtypes(schema_fields, df)
@@ -763,27 +776,6 @@ def _cast_empty_df_dtypes(schema_fields, df):
763776
return df
764777

765778

766-
def _make_bqstorage_client(use_bqstorage_api, credentials):
767-
if not use_bqstorage_api:
768-
return None
769-
770-
if bigquery_storage_v1beta1 is None:
771-
raise ImportError(
772-
"Install the google-cloud-bigquery-storage and fastavro/pyarrow "
773-
"packages to use the BigQuery Storage API."
774-
)
775-
776-
import google.api_core.gapic_v1.client_info
777-
import pandas
778-
779-
client_info = google.api_core.gapic_v1.client_info.ClientInfo(
780-
user_agent="pandas-{}".format(pandas.__version__)
781-
)
782-
return bigquery_storage_v1beta1.BigQueryStorageClient(
783-
credentials=credentials, client_info=client_info
784-
)
785-
786-
787779
def read_gbq(
788780
query,
789781
project_id=None,

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def readme():
2222
"pydata-google-auth",
2323
"google-auth",
2424
"google-auth-oauthlib",
25-
"google-cloud-bigquery[bqstorage,pandas]>=1.11.1,<2.0.0dev",
25+
"google-cloud-bigquery[bqstorage,pandas]>=1.11.1,<3.0.0dev",
2626
]
2727

2828
extras = {"tqdm": "tqdm>=4.23.0"}

tests/system/conftest.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ def gbq_connector(project, credentials):
2828
def random_dataset(bigquery_client, random_dataset_id):
2929
from google.cloud import bigquery
3030

31-
dataset_ref = bigquery_client.dataset(random_dataset_id)
31+
dataset_ref = bigquery.DatasetReference(
32+
bigquery_client.project, random_dataset_id
33+
)
3234
dataset = bigquery.Dataset(dataset_ref)
3335
bigquery_client.create_dataset(dataset)
3436
return dataset
@@ -38,7 +40,9 @@ def random_dataset(bigquery_client, random_dataset_id):
3840
def tokyo_dataset(bigquery_client, random_dataset_id):
3941
from google.cloud import bigquery
4042

41-
dataset_ref = bigquery_client.dataset(random_dataset_id)
43+
dataset_ref = bigquery.DatasetReference(
44+
bigquery_client.project, random_dataset_id
45+
)
4246
dataset = bigquery.Dataset(dataset_ref)
4347
dataset.location = "asia-northeast1"
4448
bigquery_client.create_dataset(dataset)

tests/system/test_read_gbq_with_bqstorage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88

9-
pytest.importorskip("google.cloud.bigquery_storage_v1beta1")
9+
pytest.importorskip("google.cloud.bigquery", minversion="1.24.0")
1010

1111

1212
@pytest.fixture

tests/unit/test_gbq.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pandas_gbq import gbq
1414

1515

16-
pytestmark = pytest.mark.filter_warnings(
16+
pytestmark = pytest.mark.filterwarnings(
1717
"ignore:credentials from Google Cloud SDK"
1818
)
1919
pandas_installed_version = pkg_resources.get_distribution(
@@ -490,9 +490,30 @@ def test_read_gbq_passes_dtypes(
490490

491491
mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100)
492492

493+
_, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args
494+
assert to_dataframe_kwargs["dtypes"] == {"int_col": "my-custom-dtype"}
495+
496+
497+
def test_read_gbq_use_bqstorage_api(
498+
mock_bigquery_client, mock_service_account_credentials
499+
):
500+
gbq._check_google_client_version()
501+
if not gbq.HAS_BQSTORAGE_SUPPORT:
502+
pytest.skip("requires BigQuery Storage API")
503+
504+
mock_service_account_credentials.project_id = "service_account_project_id"
505+
df = gbq.read_gbq(
506+
"SELECT 1 AS int_col",
507+
dialect="standard",
508+
credentials=mock_service_account_credentials,
509+
use_bqstorage_api=True,
510+
)
511+
assert df is not None
512+
513+
mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100)
493514
mock_list_rows.to_dataframe.assert_called_once_with(
494-
dtypes={"int_col": "my-custom-dtype"},
495-
bqstorage_client=mock.ANY,
515+
create_bqstorage_client=True,
516+
dtypes=mock.ANY,
496517
progress_bar_type=mock.ANY,
497518
)
498519

@@ -511,6 +532,5 @@ def test_read_gbq_calls_tqdm(
511532

512533
mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100)
513534

514-
mock_list_rows.to_dataframe.assert_called_once_with(
515-
dtypes=mock.ANY, bqstorage_client=mock.ANY, progress_bar_type="foobar"
516-
)
535+
_, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args
536+
assert to_dataframe_kwargs["progress_bar_type"] == "foobar"

0 commit comments

Comments
 (0)