10
10
import numpy as np
11
11
import pandas
12
12
import pandas .api .types
13
- import pandas .util . testing as tm
13
+ import pandas .testing as tm
14
14
from pandas import DataFrame , NaT
15
15
16
16
try :
21
21
import pytz
22
22
23
23
from pandas_gbq import gbq
24
+ from pandas_gbq .features import FEATURES
24
25
import pandas_gbq .schema
25
26
26
27
@@ -32,6 +33,18 @@ def test_imports():
32
33
gbq ._test_google_api_imports ()
33
34
34
35
36
+ def make_mixed_dataframe_v1 ():
37
+ # Re-implementation of private pandas.util.testing.makeMixedDataFrame
38
+ return pandas .DataFrame (
39
+ {
40
+ "A" : [0.0 , 1.0 , 2.0 , 3.0 , 4.0 ],
41
+ "B" : [0.0 , 1.0 , 0.0 , 1.0 , 0.0 ],
42
+ "C" : ["foo1" , "foo2" , "foo3" , "foo4" , "foo5" ],
43
+ "D" : pandas .bdate_range ("1/1/2009" , periods = 5 ),
44
+ }
45
+ )
46
+
47
+
35
48
def make_mixed_dataframe_v2 (test_size ):
36
49
# create df to test for all BQ datatypes except RECORD
37
50
bools = np .random .randint (2 , size = (1 , test_size )).astype (bool )
@@ -168,7 +181,7 @@ def test_should_properly_handle_valid_integers(self, project_id):
168
181
credentials = self .credentials ,
169
182
dialect = "standard" ,
170
183
)
171
- tm .assert_frame_equal (df , DataFrame ({"valid_integer" : [3 ]}))
184
+ tm .assert_frame_equal (df , DataFrame ({"valid_integer" : [3 ]}, dtype = "Int64" ))
172
185
173
186
def test_should_properly_handle_nullable_integers (self , project_id ):
174
187
query = """SELECT * FROM
@@ -194,7 +207,7 @@ def test_should_properly_handle_valid_longs(self, project_id):
194
207
credentials = self .credentials ,
195
208
dialect = "standard" ,
196
209
)
197
- tm .assert_frame_equal (df , DataFrame ({"valid_long" : [1 << 62 ]}))
210
+ tm .assert_frame_equal (df , DataFrame ({"valid_long" : [1 << 62 ]}, dtype = "Int64" ))
198
211
199
212
def test_should_properly_handle_nullable_longs (self , project_id ):
200
213
query = """SELECT * FROM
@@ -433,7 +446,10 @@ def test_should_properly_handle_null_boolean(self, project_id):
433
446
credentials = self .credentials ,
434
447
dialect = "legacy" ,
435
448
)
436
- tm .assert_frame_equal (df , DataFrame ({"null_boolean" : [None ]}))
449
+ expected_dtype = "boolean" if FEATURES .pandas_has_boolean_dtype else None
450
+ tm .assert_frame_equal (
451
+ df , DataFrame ({"null_boolean" : [None ]}, dtype = expected_dtype )
452
+ )
437
453
438
454
def test_should_properly_handle_nullable_booleans (self , project_id ):
439
455
query = """SELECT * FROM
@@ -445,8 +461,9 @@ def test_should_properly_handle_nullable_booleans(self, project_id):
445
461
credentials = self .credentials ,
446
462
dialect = "legacy" ,
447
463
)
464
+ expected_dtype = "boolean" if FEATURES .pandas_has_boolean_dtype else None
448
465
tm .assert_frame_equal (
449
- df , DataFrame ({"nullable_boolean" : [True , None ]}). astype ( object )
466
+ df , DataFrame ({"nullable_boolean" : [True , None ]}, dtype = expected_dtype )
450
467
)
451
468
452
469
def test_unicode_string_conversion_and_normalization (self , project_id ):
@@ -629,7 +646,7 @@ def test_one_row_one_column(self, project_id):
629
646
credentials = self .credentials ,
630
647
dialect = "standard" ,
631
648
)
632
- expected_result = DataFrame (dict (v = [3 ]))
649
+ expected_result = DataFrame (dict (v = [3 ]), dtype = "Int64" )
633
650
tm .assert_frame_equal (df , expected_result )
634
651
635
652
def test_legacy_sql (self , project_id ):
@@ -719,7 +736,7 @@ def test_query_with_parameters(self, project_id):
719
736
configuration = config ,
720
737
dialect = "legacy" ,
721
738
)
722
- tm .assert_frame_equal (df , DataFrame ({"valid_result" : [3 ]}))
739
+ tm .assert_frame_equal (df , DataFrame ({"valid_result" : [3 ]}, dtype = "Int64" ))
723
740
724
741
def test_query_inside_configuration (self , project_id ):
725
742
query_no_use = 'SELECT "PI_WRONG" AS valid_string'
@@ -842,7 +859,11 @@ def test_struct(self, project_id):
842
859
dialect = "standard" ,
843
860
)
844
861
expected = DataFrame (
845
- [[1 , {"letter" : "a" , "num" : 1 }]], columns = ["int_field" , "struct_field" ],
862
+ {
863
+ "int_field" : pandas .Series ([1 ], dtype = "Int64" ),
864
+ "struct_field" : [{"letter" : "a" , "num" : 1 }],
865
+ },
866
+ columns = ["int_field" , "struct_field" ],
846
867
)
847
868
tm .assert_frame_equal (df , expected )
848
869
@@ -874,7 +895,12 @@ def test_array_length_zero(self, project_id):
874
895
dialect = "standard" ,
875
896
)
876
897
expected = DataFrame (
877
- [["a" , ["" ], 1 ], ["b" , [], 0 ]], columns = ["letter" , "array_field" , "len" ],
898
+ {
899
+ "letter" : ["a" , "b" ],
900
+ "array_field" : [["" ], []],
901
+ "len" : pandas .Series ([1 , 0 ], dtype = "Int64" ),
902
+ },
903
+ columns = ["letter" , "array_field" , "len" ],
878
904
)
879
905
tm .assert_frame_equal (df , expected )
880
906
@@ -908,7 +934,13 @@ def test_array_of_floats(self, project_id):
908
934
credentials = self .credentials ,
909
935
dialect = "standard" ,
910
936
)
911
- tm .assert_frame_equal (df , DataFrame ([[[1.1 , 2.2 , 3.3 ], 4 ]], columns = ["a" , "b" ]))
937
+ tm .assert_frame_equal (
938
+ df ,
939
+ DataFrame (
940
+ {"a" : [[1.1 , 2.2 , 3.3 ]], "b" : pandas .Series ([4 ], dtype = "Int64" )},
941
+ columns = ["a" , "b" ],
942
+ ),
943
+ )
912
944
913
945
def test_tokyo (self , tokyo_dataset , tokyo_table , project_id ):
914
946
df = gbq .read_gbq (
@@ -1021,7 +1053,7 @@ def test_upload_data_if_table_exists_append(self, project_id):
1021
1053
test_id = "3"
1022
1054
test_size = 10
1023
1055
df = make_mixed_dataframe_v2 (test_size )
1024
- df_different_schema = tm . makeMixedDataFrame ()
1056
+ df_different_schema = make_mixed_dataframe_v1 ()
1025
1057
1026
1058
# Initialize table with sample data
1027
1059
gbq .to_gbq (
@@ -1101,7 +1133,7 @@ def test_upload_data_if_table_exists_replace(self, project_id):
1101
1133
test_id = "4"
1102
1134
test_size = 10
1103
1135
df = make_mixed_dataframe_v2 (test_size )
1104
- df_different_schema = tm . makeMixedDataFrame ()
1136
+ df_different_schema = make_mixed_dataframe_v1 ()
1105
1137
1106
1138
# Initialize table with sample data
1107
1139
gbq .to_gbq (
@@ -1225,7 +1257,7 @@ def test_upload_data_with_newlines(self, project_id):
1225
1257
result = result_df ["s" ].sort_values ()
1226
1258
expected = df ["s" ].sort_values ()
1227
1259
1228
- tm .assert_numpy_array_equal (expected . values , result . values )
1260
+ tm .assert_series_equal (expected , result )
1229
1261
1230
1262
def test_upload_data_flexible_column_order (self , project_id ):
1231
1263
test_id = "13"
@@ -1254,7 +1286,7 @@ def test_upload_data_flexible_column_order(self, project_id):
1254
1286
def test_upload_data_with_valid_user_schema (self , project_id ):
1255
1287
# Issue #46; tests test scenarios with user-provided
1256
1288
# schemas
1257
- df = tm . makeMixedDataFrame ()
1289
+ df = make_mixed_dataframe_v1 ()
1258
1290
test_id = "18"
1259
1291
test_schema = [
1260
1292
{"name" : "A" , "type" : "FLOAT" },
@@ -1276,7 +1308,7 @@ def test_upload_data_with_valid_user_schema(self, project_id):
1276
1308
)
1277
1309
1278
1310
def test_upload_data_with_invalid_user_schema_raises_error (self , project_id ):
1279
- df = tm . makeMixedDataFrame ()
1311
+ df = make_mixed_dataframe_v1 ()
1280
1312
test_id = "19"
1281
1313
test_schema = [
1282
1314
{"name" : "A" , "type" : "FLOAT" },
@@ -1295,7 +1327,7 @@ def test_upload_data_with_invalid_user_schema_raises_error(self, project_id):
1295
1327
)
1296
1328
1297
1329
def test_upload_data_with_missing_schema_fields_raises_error (self , project_id ):
1298
- df = tm . makeMixedDataFrame ()
1330
+ df = make_mixed_dataframe_v1 ()
1299
1331
test_id = "20"
1300
1332
test_schema = [
1301
1333
{"name" : "A" , "type" : "FLOAT" },
@@ -1351,7 +1383,7 @@ def test_upload_data_with_timestamp(self, project_id):
1351
1383
tm .assert_series_equal (expected , result )
1352
1384
1353
1385
def test_upload_data_with_different_df_and_user_schema (self , project_id ):
1354
- df = tm . makeMixedDataFrame ()
1386
+ df = make_mixed_dataframe_v1 ()
1355
1387
df ["A" ] = df ["A" ].astype (str )
1356
1388
df ["B" ] = df ["B" ].astype (str )
1357
1389
test_id = "22"
@@ -1460,13 +1492,13 @@ def test_dataset_does_not_exist(gbq_dataset, random_dataset_id):
1460
1492
1461
1493
1462
1494
def test_create_table (gbq_table ):
1463
- schema = gbq ._generate_bq_schema (tm . makeMixedDataFrame ())
1495
+ schema = gbq ._generate_bq_schema (make_mixed_dataframe_v1 ())
1464
1496
gbq_table .create ("test_create_table" , schema )
1465
1497
assert gbq_table .exists ("test_create_table" )
1466
1498
1467
1499
1468
1500
def test_create_table_already_exists (gbq_table ):
1469
- schema = gbq ._generate_bq_schema (tm . makeMixedDataFrame ())
1501
+ schema = gbq ._generate_bq_schema (make_mixed_dataframe_v1 ())
1470
1502
gbq_table .create ("test_create_table_exists" , schema )
1471
1503
with pytest .raises (gbq .TableCreationError ):
1472
1504
gbq_table .create ("test_create_table_exists" , schema )
0 commit comments