-
Notifications
You must be signed in to change notification settings - Fork 125
test: improve to_gbq
logic unit test coverage
#449
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 38 commits
9a9d3fd
6adf233
73a791a
9b1eb0d
ec9ddaf
9cc7c74
dd51ad8
e1ad679
d29bc2a
cb8f24f
56b73b2
8a61e97
3f7900b
3c53f1f
5ce125f
ea660f4
3d93c78
93e872e
e0ae455
28b72f0
ed61f6d
1d6831f
fcf8276
dfb107e
76b38a3
2fd1e32
d97102e
c1f8055
c48f997
9e67138
83c4513
4f12c78
4fedaaf
2bfd5a1
24574a8
3cb788e
95f0478
0f9baa8
7ac9b6a
d562ee9
5599915
5223fa4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -121,7 +121,20 @@ class InvalidSchema(ValueError): | |
table in BigQuery. | ||
""" | ||
|
||
pass | ||
def __init__( | ||
self, message: str, local_schema: Dict[str, Any], remote_schema: Dict[str, Any] | ||
plamut marked this conversation as resolved.
Show resolved
Hide resolved
|
||
): | ||
super().__init__(message) | ||
self._local_schema = local_schema | ||
self._remote_schema = remote_schema | ||
|
||
@property | ||
def local_schema(self) -> Dict[str, Any]: | ||
return self._local_schema | ||
|
||
@property | ||
def remote_schema(self) -> Dict[str, Any]: | ||
return self._remote_schema | ||
|
||
|
||
class NotFoundException(ValueError): | ||
|
@@ -354,19 +367,12 @@ def sizeof_fmt(num, suffix="B"): | |
return fmt % (num, "Y", suffix) | ||
|
||
def get_client(self): | ||
import google.api_core.client_info | ||
import pandas | ||
|
||
try: | ||
# This module was added in google-api-core 1.11.0. | ||
# We don't have a hard requirement on that version, so only | ||
# populate the client_info if available. | ||
import google.api_core.client_info | ||
|
||
client_info = google.api_core.client_info.ClientInfo( | ||
user_agent="pandas-{}".format(pandas.__version__) | ||
) | ||
except ImportError: | ||
client_info = None | ||
client_info = google.api_core.client_info.ClientInfo( | ||
user_agent="pandas-{}".format(pandas.__version__) | ||
) | ||
|
||
# In addition to new enough version of google-api-core, a new enough | ||
# version of google-cloud-bigquery is required to populate the | ||
|
@@ -1057,7 +1063,7 @@ def to_gbq( | |
DeprecationWarning, | ||
stacklevel=2, | ||
) | ||
elif api_method == "load_csv": | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's already a check above for known |
||
warnings.warn( | ||
"chunksize will be ignored when using api_method='load_csv' in a future version of pandas-gbq", | ||
PendingDeprecationWarning, | ||
|
@@ -1122,12 +1128,14 @@ def to_gbq( | |
) | ||
elif if_exists == "replace": | ||
connector.delete_and_recreate_table(dataset_id, table_id, table_schema) | ||
elif if_exists == "append": | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's already a check above for known |
||
if not pandas_gbq.schema.schema_is_subset(original_schema, table_schema): | ||
raise InvalidSchema( | ||
"Please verify that the structure and " | ||
"data types in the DataFrame match the " | ||
"schema of the destination table." | ||
"schema of the destination table.", | ||
table_schema, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Towards #349 |
||
original_schema, | ||
) | ||
|
||
# Update the local `table_schema` so mode (NULLABLE/REQUIRED) | ||
|
@@ -1283,9 +1291,6 @@ def delete(self, table_id): | |
""" | ||
from google.api_core.exceptions import NotFound | ||
|
||
if not self.exists(table_id): | ||
raise NotFoundException("Table does not exist") | ||
|
||
table_ref = self._table_ref(table_id) | ||
try: | ||
self.client.delete_table(table_ref) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,36 @@ | |
import pytest | ||
|
||
|
||
def mock_get_credentials(*args, **kwargs): | ||
import google.auth.credentials | ||
|
||
mock_credentials = mock.create_autospec(google.auth.credentials.Credentials) | ||
return mock_credentials, "default-project" | ||
|
||
|
||
@pytest.fixture | ||
def mock_service_account_credentials(): | ||
import google.oauth2.service_account | ||
|
||
mock_credentials = mock.create_autospec(google.oauth2.service_account.Credentials) | ||
return mock_credentials | ||
|
||
|
||
@pytest.fixture | ||
def mock_compute_engine_credentials(): | ||
import google.auth.compute_engine | ||
|
||
mock_credentials = mock.create_autospec(google.auth.compute_engine.Credentials) | ||
return mock_credentials | ||
|
||
|
||
@pytest.fixture(autouse=True) | ||
def no_auth(monkeypatch): | ||
import pydata_google_auth | ||
|
||
monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials) | ||
|
||
|
||
@pytest.fixture(autouse=True, scope="function") | ||
def reset_context(): | ||
import pandas_gbq | ||
|
@@ -20,41 +50,12 @@ def reset_context(): | |
@pytest.fixture(autouse=True) | ||
def mock_bigquery_client(monkeypatch): | ||
import google.cloud.bigquery | ||
import google.cloud.bigquery.table | ||
|
||
mock_client = mock.create_autospec(google.cloud.bigquery.Client) | ||
# Constructor returns the mock itself, so this mock can be treated as the | ||
# constructor or the instance. | ||
mock_client.return_value = mock_client | ||
|
||
mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob) | ||
mock_query.job_id = "some-random-id" | ||
mock_query.state = "DONE" | ||
mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator) | ||
mock_rows.total_rows = 1 | ||
|
||
mock_rows.__iter__.return_value = [(1,)] | ||
mock_query.result.return_value = mock_rows | ||
mock_client.list_rows.return_value = mock_rows | ||
mock_client.query.return_value = mock_query | ||
# Mock table creation. | ||
monkeypatch.setattr(google.cloud.bigquery, "Client", mock_client) | ||
mock_client.reset_mock() | ||
|
||
# Mock out SELECT 1 query results. | ||
def generate_schema(): | ||
query = mock_client.query.call_args[0][0] if mock_client.query.call_args else "" | ||
if query == "SELECT 1 AS int_col": | ||
return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")] | ||
else: | ||
return [google.cloud.bigquery.SchemaField("_f0", "INTEGER")] | ||
|
||
type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema) | ||
|
||
# Mock out get_table. | ||
def get_table(table_ref_or_id, **kwargs): | ||
return google.cloud.bigquery.Table(table_ref_or_id) | ||
|
||
mock_client.get_table.side_effect = get_table | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was messing with the |
||
|
||
return mock_client |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still not 100, but it's solid progress. I don't want this PR to get too long. Mostly I started working on unit tests because they're just mindless enough that I can handle it while I'm sick this week.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's fine to merge a meaningful piece of work into
main
, it doesn't have to be all-or-nothing. This PR by itself is already a significant improvement.(I will review this tomorrow, did not manage to go through everything today)