Skip to content

Commit 4b73d6a

Browse files
committed
ENH: Add if_exists parameter to ExcelWriter
1 parent 81114eb commit 4b73d6a

File tree

4 files changed

+94
-2
lines changed

4 files changed

+94
-2
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ Other enhancements
125125
- :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
126126
- Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
127127
- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
128+
- :class:`pandas.ExcelWriter` now accepts an ``if_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`)
128129
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
129130
- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
130131
- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)

pandas/io/excel/_base.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,16 @@ class ExcelWriter(metaclass=abc.ABCMeta):
666666
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".
667667
668668
.. versionadded:: 1.2.0
669+
if_exists : {'new_sheet', 'overwrite_sheet', 'overwrite_cells'}, default 'new_sheet'
670+
How to behave when trying to write to a sheet that already
671+
exists (append mode only).
672+
673+
* new_sheet: Create a new sheet with a different name.
674+
* overwrite_sheet: Delete the contents of the sheet, then write to it.
675+
* overwrite_cells: Write directly to the named sheet
676+
without deleting the previous contents.
677+
678+
.. versionadded:: 1.3.0
669679
670680
Attributes
671681
----------
@@ -834,6 +844,7 @@ def __init__(
834844
datetime_format=None,
835845
mode: str = "w",
836846
storage_options: StorageOptions = None,
847+
if_exists: Optional[str] = None,
837848
**engine_kwargs,
838849
):
839850
# validate that this engine can handle the extension
@@ -868,6 +879,18 @@ def __init__(
868879

869880
self.mode = mode
870881

882+
if if_exists and "r+" not in mode:
883+
raise ValueError("if_exists is only valid in append mode (mode='a')")
884+
if if_exists is not None and if_exists not in {
885+
"new_sheet",
886+
"overwrite_sheet",
887+
"overwrite_cells",
888+
}:
889+
raise ValueError(f"'{if_exists}' is not valid for if_exists")
890+
if if_exists is None and "r+" in mode:
891+
if_exists = "new_sheet"
892+
self.if_exists = if_exists
893+
871894
def __fspath__(self):
872895
return getattr(self.handles.handle, "name", "")
873896

pandas/io/excel/_openpyxl.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,18 @@ def __init__(
3737
engine=None,
3838
mode: str = "w",
3939
storage_options: StorageOptions = None,
40+
if_exists: Optional[str] = None,
4041
**engine_kwargs,
4142
):
4243
# Use the openpyxl module as the Excel writer.
4344
from openpyxl.workbook import Workbook
4445

4546
super().__init__(
46-
path, mode=mode, storage_options=storage_options, **engine_kwargs
47+
path,
48+
mode=mode,
49+
storage_options=storage_options,
50+
if_exists=if_exists,
51+
**engine_kwargs,
4752
)
4853

4954
# ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
@@ -53,6 +58,8 @@ def __init__(
5358

5459
self.book = load_workbook(self.handles.handle)
5560
self.handles.handle.seek(0)
61+
self.sheets = {name: self.book[name] for name in self.book.sheetnames}
62+
5663
else:
5764
# Create workbook object with default optimized_write=True.
5865
self.book = Workbook()
@@ -412,7 +419,21 @@ def write_cells(
412419
_style_cache: Dict[str, Dict[str, Serialisable]] = {}
413420

414421
if sheet_name in self.sheets:
415-
wks = self.sheets[sheet_name]
422+
if "r+" in self.mode:
423+
if self.if_exists == "new_sheet":
424+
wks = self.book.create_sheet()
425+
# openpyxl will create a name for the new sheet by appending digits
426+
wks.title = sheet_name
427+
self.sheets[wks.title] = wks
428+
elif self.if_exists == "overwrite_sheet":
429+
wks = self.sheets[sheet_name]
430+
wks.delete_cols(1, wks.max_column)
431+
elif self.if_exists == "overwrite_cells" or self.if_exists is None:
432+
wks = self.sheets[sheet_name]
433+
else:
434+
raise ValueError(f"'{self.if_exists}' is not valid for if_exists")
435+
else:
436+
wks = self.sheets[sheet_name]
416437
else:
417438
wks = self.book.create_sheet()
418439
wks.title = sheet_name

pandas/tests/io/excel/test_openpyxl.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from pathlib import Path
2+
import re
23

34
import numpy as np
45
import pytest
@@ -109,6 +110,52 @@ def test_write_append_mode(ext, mode, expected):
109110
assert wb2.worksheets[index]["A1"].value == cell_value
110111

111112

113+
@pytest.mark.parametrize(
114+
"if_exists,num_sheets,expected",
115+
[
116+
("new_sheet", 2, ["apple", "banana"]),
117+
("overwrite_sheet", 1, ["pear"]),
118+
("overwrite_cells", 1, ["pear", "banana"]),
119+
],
120+
)
121+
def test_if_exists_append_modes(ext, if_exists, num_sheets, expected):
122+
# GH 40230
123+
df1 = DataFrame({"fruit": ["apple", "banana"]})
124+
df2 = DataFrame({"fruit": ["pear"]})
125+
126+
with tm.ensure_clean(ext) as f:
127+
with pd.ExcelWriter(f, engine="openpyxl", mode="w") as writer:
128+
df1.to_excel(writer, sheet_name="foo", index=False)
129+
with pd.ExcelWriter(
130+
f, engine="openpyxl", mode="a", if_exists=if_exists
131+
) as writer:
132+
df2.to_excel(writer, sheet_name="foo", index=False)
133+
134+
wb = openpyxl.load_workbook(f)
135+
assert len(wb.sheetnames) == num_sheets
136+
assert wb.sheetnames[0] == "foo"
137+
result = pd.read_excel(wb, "foo", engine="openpyxl")
138+
assert list(result["fruit"]) == expected
139+
if len(wb.sheetnames) == 2:
140+
# atm the name given for the second sheet will be "foo1"
141+
# but we don't want the test to fail if openpyxl changes this
142+
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
143+
assert result.equals(df2)
144+
wb.close()
145+
146+
147+
def test_if_exists_raises(ext):
148+
if_exists_msg = "if_exists is only valid in append mode (mode='a')"
149+
invalid_msg = "'invalid' is not valid for if_exists"
150+
151+
with tm.ensure_clean(ext) as f:
152+
with pytest.raises(ValueError, match=re.escape(if_exists_msg)):
153+
ExcelWriter(f, engine="openpyxl", mode="w", if_exists="new_sheet")
154+
with tm.ensure_clean(ext) as f:
155+
with pytest.raises(ValueError, match=invalid_msg):
156+
ExcelWriter(f, engine="openpyxl", mode="a", if_exists="invalid")
157+
158+
112159
def test_to_excel_with_openpyxl_engine(ext):
113160
# GH 29854
114161
with tm.ensure_clean(ext) as filename:

0 commit comments

Comments
 (0)