@@ -1834,8 +1834,7 @@ For example:
1834
1834
1835
1835
.. code-block :: python
1836
1836
1837
- DataFrame([1.0 , 2.0 , complex (1.0 , 2.0 )]).to_json() # raises
1838
-
1837
+ >> > DataFrame([1.0 , 2.0 , complex (1.0 , 2.0 )]).to_json() # raises
1839
1838
RuntimeError : Unhandled numpy dtype 15
1840
1839
1841
1840
can be dealt with by specifying a simple ``default_handler ``:
@@ -2411,8 +2410,8 @@ columns to strings.
2411
2410
.. code-block :: python
2412
2411
2413
2412
url_mcc = ' https://en.wikipedia.org/wiki/Mobile_country_code'
2414
- dfs = pd.read_html(url_mcc, match = ' Telekom Albania' , header = 0 , converters = { ' MNC ' :
2415
- str })
2413
+ dfs = pd.read_html(url_mcc, match = ' Telekom Albania' , header = 0 ,
2414
+ converters = { ' MNC ' : str })
2416
2415
2417
2416
.. versionadded :: 0.19
2418
2417
@@ -2724,7 +2723,8 @@ different parameters:
2724
2723
data = {}
2725
2724
# For when Sheet1's format differs from Sheet2
2726
2725
with pd.ExcelFile(' path_to_file.xls' ) as xls:
2727
- data[' Sheet1' ] = pd.read_excel(xls, ' Sheet1' , index_col = None , na_values = [' NA' ])
2726
+ data[' Sheet1' ] = pd.read_excel(xls, ' Sheet1' , index_col = None ,
2727
+ na_values = [' NA' ])
2728
2728
data[' Sheet2' ] = pd.read_excel(xls, ' Sheet2' , index_col = 1 )
2729
2729
2730
2730
Note that if the same parsing parameters are used for all sheets, a list
@@ -2735,11 +2735,14 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc
2735
2735
# using the ExcelFile class
2736
2736
data = {}
2737
2737
with pd.ExcelFile(' path_to_file.xls' ) as xls:
2738
- data[' Sheet1' ] = read_excel(xls, ' Sheet1' , index_col = None , na_values = [' NA' ])
2739
- data[' Sheet2' ] = read_excel(xls, ' Sheet2' , index_col = None , na_values = [' NA' ])
2738
+ data[' Sheet1' ] = read_excel(xls, ' Sheet1' , index_col = None ,
2739
+ na_values = [' NA' ])
2740
+ data[' Sheet2' ] = read_excel(xls, ' Sheet2' , index_col = None ,
2741
+ na_values = [' NA' ])
2740
2742
2741
2743
# equivalent using the read_excel function
2742
- data = read_excel(' path_to_file.xls' , [' Sheet1' , ' Sheet2' ], index_col = None , na_values = [' NA' ])
2744
+ data = read_excel(' path_to_file.xls' , [' Sheet1' , ' Sheet2' ],
2745
+ index_col = None , na_values = [' NA' ])
2743
2746
2744
2747
.. _io.excel.specifying_sheets :
2745
2748
@@ -2899,7 +2902,10 @@ missing data to recover integer dtype:
2899
2902
2900
2903
.. code-block :: python
2901
2904
2902
- cfun = lambda x : int (x) if x else - 1
2905
+ def cfun (x ):
2906
+ return int (x) if x else - 1
2907
+
2908
+
2903
2909
read_excel(' path_to_file.xls' , ' Sheet1' , converters = {' MyInts' : cfun})
2904
2910
2905
2911
dtype Specifications
@@ -3040,7 +3046,7 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are:
3040
3046
writer = ExcelWriter(' path_to_file.xlsx' , engine = ' xlsxwriter' )
3041
3047
3042
3048
# Or via pandas configuration.
3043
- from pandas import options
3049
+ from pandas import options # noqa: E402
3044
3050
options.io.excel.xlsx.writer = ' xlsxwriter'
3045
3051
3046
3052
df.to_excel(' path_to_file.xlsx' , sheet_name = ' Sheet1' )
@@ -3067,7 +3073,7 @@ which takes the contents of the clipboard buffer and passes them to the
3067
3073
``read_csv `` method. For instance, you can copy the following text to the
3068
3074
clipboard (CTRL-C on many operating systems):
3069
3075
3070
- .. code-block :: python
3076
+ .. code-block :: console
3071
3077
3072
3078
A B C
3073
3079
x 1 4 p
@@ -3476,9 +3482,9 @@ This format is specified by default when using ``put`` or ``to_hdf`` or by ``for
3476
3482
3477
3483
.. code-block :: python
3478
3484
3479
- pd.DataFrame(randn(10 , 2 )).to_hdf(' test_fixed.h5' , ' df' )
3485
+ >> > pd.DataFrame(randn(10 , 2 )).to_hdf(' test_fixed.h5' , ' df' )
3480
3486
3481
- pd.read_hdf(' test_fixed.h5' , ' df' , where = ' index>5' )
3487
+ >> > pd.read_hdf(' test_fixed.h5' , ' df' , where = ' index>5' )
3482
3488
TypeError : cannot pass a where specification when reading a fixed format .
3483
3489
this store must be selected in its entirety
3484
3490
@@ -3576,12 +3582,11 @@ will yield a tuple for each group key along with the relative keys of its conten
3576
3582
3577
3583
.. code-block :: python
3578
3584
3579
- In [ 8 ]: store.foo.bar.bah
3585
+ >> > store.foo.bar.bah
3580
3586
AttributeError : ' HDFStore' object has no attribute ' foo'
3581
3587
3582
3588
# you can directly access the actual PyTables node but using the root node
3583
- In [9 ]: store.root.foo.bar.bah
3584
- Out[9 ]:
3589
+ >> > store.root.foo.bar.bah
3585
3590
/ foo/ bar/ bah (Group) ' '
3586
3591
children := [' block0_items' (Array), ' block0_values' (Array), ' axis0' (Array), ' axis1' (Array)]
3587
3592
@@ -3735,7 +3740,7 @@ The right-hand side of the sub-expression (after a comparison operator) can be:
3735
3740
.. code-block :: python
3736
3741
3737
3742
string = " HolyMoly'"
3738
- store.select(' df' , ' index == %s ' % string)
3743
+ store.select(' df' , ' index == %s ' % string)
3739
3744
3740
3745
The latter will **not ** work and will raise a ``SyntaxError ``.Note that
3741
3746
there's a single quote followed by a double quote in the ``string ``
@@ -3941,7 +3946,7 @@ The default is 50,000 rows returned in a chunk.
3941
3946
3942
3947
.. code-block :: python
3943
3948
3944
- for df in pd.read_hdf(' store.h5' ,' df' , chunksize = 3 ):
3949
+ for df in pd.read_hdf(' store.h5' , ' df' , chunksize = 3 ):
3945
3950
print (df)
3946
3951
3947
3952
Note, that the chunksize keyword applies to the **source ** rows. So if you
@@ -4841,7 +4846,8 @@ to pass to :func:`pandas.to_datetime`:
4841
4846
.. code-block :: python
4842
4847
4843
4848
pd.read_sql_table(' data' , engine, parse_dates = {' Date' : ' %Y-%m-%d ' })
4844
- pd.read_sql_table(' data' , engine, parse_dates = {' Date' : {' format' : ' %Y-%m-%d %H:%M:%S' }})
4849
+ pd.read_sql_table(' data' , engine,
4850
+ parse_dates = {' Date' : {' format' : ' %Y-%m-%d %H:%M:%S' }})
4845
4851
4846
4852
4847
4853
You can check if a table exists using :func: `~pandas.io.sql.has_table `
@@ -5349,62 +5355,81 @@ And here's the code:
5349
5355
sz = 1000000
5350
5356
df = pd.DataFrame({' A' : randn(sz), ' B' : [1 ] * sz})
5351
5357
5358
+
5352
5359
def test_sql_write (df ):
5353
5360
if os.path.exists(' test.sql' ):
5354
5361
os.remove(' test.sql' )
5355
5362
sql_db = sqlite3.connect(' test.sql' )
5356
5363
df.to_sql(name = ' test_table' , con = sql_db)
5357
5364
sql_db.close()
5358
5365
5366
+
5359
5367
def test_sql_read ():
5360
5368
sql_db = sqlite3.connect(' test.sql' )
5361
5369
pd.read_sql_query(" select * from test_table" , sql_db)
5362
5370
sql_db.close()
5363
5371
5372
+
5364
5373
def test_hdf_fixed_write (df ):
5365
5374
df.to_hdf(' test_fixed.hdf' , ' test' , mode = ' w' )
5366
5375
5376
+
5367
5377
def test_hdf_fixed_read ():
5368
5378
pd.read_hdf(' test_fixed.hdf' , ' test' )
5369
5379
5380
+
5370
5381
def test_hdf_fixed_write_compress (df ):
5371
5382
df.to_hdf(' test_fixed_compress.hdf' , ' test' , mode = ' w' , complib = ' blosc' )
5372
5383
5384
+
5373
5385
def test_hdf_fixed_read_compress ():
5374
5386
pd.read_hdf(' test_fixed_compress.hdf' , ' test' )
5375
5387
5388
+
5376
5389
def test_hdf_table_write (df ):
5377
5390
df.to_hdf(' test_table.hdf' , ' test' , mode = ' w' , format = ' table' )
5378
5391
5392
+
5379
5393
def test_hdf_table_read ():
5380
5394
pd.read_hdf(' test_table.hdf' , ' test' )
5381
5395
5396
+
5382
5397
def test_hdf_table_write_compress (df ):
5383
- df.to_hdf(' test_table_compress.hdf' , ' test' , mode = ' w' , complib = ' blosc' , format = ' table' )
5398
+ df.to_hdf(' test_table_compress.hdf' , ' test' , mode = ' w' ,
5399
+ complib = ' blosc' , format = ' table' )
5400
+
5384
5401
5385
5402
def test_hdf_table_read_compress ():
5386
5403
pd.read_hdf(' test_table_compress.hdf' , ' test' )
5387
5404
5405
+
5388
5406
def test_csv_write (df ):
5389
5407
df.to_csv(' test.csv' , mode = ' w' )
5390
5408
5409
+
5391
5410
def test_csv_read ():
5392
5411
pd.read_csv(' test.csv' , index_col = 0 )
5393
5412
5413
+
5394
5414
def test_feather_write (df ):
5395
5415
df.to_feather(' test.feather' )
5396
5416
5417
+
5397
5418
def test_feather_read ():
5398
5419
pd.read_feather(' test.feather' )
5399
5420
5421
+
5400
5422
def test_pickle_write (df ):
5401
5423
df.to_pickle(' test.pkl' )
5402
5424
5425
+
5403
5426
def test_pickle_read ():
5404
5427
pd.read_pickle(' test.pkl' )
5405
5428
5429
+
5406
5430
def test_pickle_write_compress (df ):
5407
5431
df.to_pickle(' test.pkl.compress' , compression = ' xz' )
5408
5432
5433
+
5409
5434
def test_pickle_read_compress ():
5410
5435
pd.read_pickle(' test.pkl.compress' , compression = ' xz' )
0 commit comments