Skip to content

Commit fe60c2a

Browse files
committed
FEAT: Add if_exists parameter to ExcelWriter
1 parent 81114eb commit fe60c2a

File tree

8 files changed

+87
-10
lines changed

8 files changed

+87
-10
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ Other enhancements
125125
- :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
126126
- Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
127127
- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
128+
- :class:`pandas.ExcelWriter` now accepts an ``if_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`)
128129
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
129130
- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
130131
- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)

pandas/io/excel/_base.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,16 @@ class ExcelWriter(metaclass=abc.ABCMeta):
666666
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".
667667
668668
.. versionadded:: 1.2.0
669+
if_exists : {'new_sheet', 'overwrite_sheet', 'overwrite_cells'}, default 'new_sheet'
670+
How to behave when trying to write to a sheet that already
671+
exists (append mode only).
672+
673+
* new_sheet: Create a new sheet with a different name.
674+
* overwrite_sheet: Delete the contents of the sheet, then write to it.
675+
* overwrite_cells: Write directly to the named sheet
676+
without deleting the previous contents.
677+
678+
.. versionadded:: 1.3.0
669679
670680
Attributes
671681
----------
@@ -834,6 +844,7 @@ def __init__(
834844
datetime_format=None,
835845
mode: str = "w",
836846
storage_options: StorageOptions = None,
847+
if_exists: Optional[str] = None,
837848
**engine_kwargs,
838849
):
839850
# validate that this engine can handle the extension
@@ -951,8 +962,7 @@ def close(self):
951962

952963
@doc(storage_options=_shared_docs["storage_options"])
953964
def inspect_excel_format(
954-
content_or_path: FilePathOrBuffer,
955-
storage_options: StorageOptions = None,
965+
content_or_path: FilePathOrBuffer, storage_options: StorageOptions = None,
956966
) -> str:
957967
"""
958968
Inspect the path or content of an excel file and get its format.

pandas/io/excel/_odswriter.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,15 @@ def __init__(
2828
engine: Optional[str] = None,
2929
mode: str = "w",
3030
storage_options: StorageOptions = None,
31+
if_exists: Optional[str] = None,
3132
**engine_kwargs,
3233
):
3334
from odf.opendocument import OpenDocumentSpreadsheet
3435

3536
engine_kwargs["engine"] = engine
3637

37-
if mode == "a":
38-
raise ValueError("Append mode is not supported with odf!")
38+
if mode == "a" or if_exists:
39+
raise ValueError("Append mode and if_exists are not supported with odf!")
3940

4041
super().__init__(
4142
path, mode=mode, storage_options=storage_options, **engine_kwargs

pandas/io/excel/_openpyxl.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def __init__(
3737
engine=None,
3838
mode: str = "w",
3939
storage_options: StorageOptions = None,
40+
if_exists: Optional[str] = None,
4041
**engine_kwargs,
4142
):
4243
# Use the openpyxl module as the Excel writer.
@@ -53,12 +54,26 @@ def __init__(
5354

5455
self.book = load_workbook(self.handles.handle)
5556
self.handles.handle.seek(0)
57+
self.sheets = {name: self.book[name] for name in self.book.sheetnames}
58+
59+
if if_exists is None:
60+
self.if_exists = "new_sheet"
61+
elif if_exists in {"new_sheet", "overwrite_sheet", "overwrite_cells"}:
62+
self.if_exists = if_exists
63+
else:
64+
raise ValueError(f"'{if_exists}' is not valid for if_exists")
5665
else:
5766
# Create workbook object with default optimized_write=True.
5867
self.book = Workbook()
5968

6069
if self.book.worksheets:
6170
self.book.remove(self.book.worksheets[0])
71+
if if_exists is None:
72+
self.if_exists = None
73+
else:
74+
raise ValueError(
75+
"The 'if_exists' parameter is only valid with mode='a' (append)"
76+
)
6277

6378
def save(self):
6479
"""
@@ -412,7 +427,18 @@ def write_cells(
412427
_style_cache: Dict[str, Dict[str, Serialisable]] = {}
413428

414429
if sheet_name in self.sheets:
415-
wks = self.sheets[sheet_name]
430+
if self.if_exists == "new_sheet":
431+
wks = self.book.create_sheet()
432+
# openpyxl will create a name for the new sheet by appending digits
433+
wks.title = sheet_name
434+
self.sheets[wks.title] = wks
435+
elif self.if_exists == "overwrite_sheet":
436+
wks = self.sheets[sheet_name]
437+
wks.delete_cols(1, wks.max_column)
438+
elif self.if_exists == "overwrite_cells" or self.if_exists is None:
439+
wks = self.sheets[sheet_name]
440+
else:
441+
raise ValueError(f"'{self.if_exists}' is not valid for if_exists")
416442
else:
417443
wks = self.book.create_sheet()
418444
wks.title = sheet_name

pandas/io/excel/_xlsxwriter.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from typing import (
22
Dict,
33
List,
4+
Optional,
45
Tuple,
56
)
67

@@ -175,13 +176,16 @@ def __init__(
175176
datetime_format=None,
176177
mode: str = "w",
177178
storage_options: StorageOptions = None,
179+
if_exists: Optional[str] = None,
178180
**engine_kwargs,
179181
):
180182
# Use the xlsxwriter module as the Excel writer.
181183
from xlsxwriter import Workbook
182184

183-
if mode == "a":
184-
raise ValueError("Append mode is not supported with xlsxwriter!")
185+
if mode == "a" or if_exists:
186+
raise ValueError(
187+
"Append mode and if_exists are not supported with xlsxwriter!"
188+
)
185189

186190
super().__init__(
187191
path,

pandas/io/excel/_xlwt.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from typing import (
22
TYPE_CHECKING,
33
Dict,
4+
Optional,
45
)
56

67
import pandas._libs.json as json
@@ -24,15 +25,16 @@ def __init__(
2425
encoding=None,
2526
mode: str = "w",
2627
storage_options: StorageOptions = None,
28+
if_exists: Optional[str] = None,
2729
**engine_kwargs,
2830
):
2931
# Use the xlwt module as the Excel writer.
3032
import xlwt
3133

3234
engine_kwargs["engine"] = engine
3335

34-
if mode == "a":
35-
raise ValueError("Append mode is not supported with xlwt!")
36+
if mode == "a" or if_exists:
37+
raise ValueError("Append mode and if_exists are not supported with xlwt!")
3638

3739
super().__init__(
3840
path, mode=mode, storage_options=storage_options, **engine_kwargs

pandas/tests/io/excel/test_openpyxl.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,39 @@ def test_write_append_mode(ext, mode, expected):
109109
assert wb2.worksheets[index]["A1"].value == cell_value
110110

111111

112+
@pytest.mark.parametrize(
113+
"if_exists,num_sheets,expected",
114+
[
115+
("new_sheet", 2, ["apple", "banana"]),
116+
("overwrite_sheet", 1, ["pear"]),
117+
("overwrite_cells", 1, ["pear", "banana"]),
118+
],
119+
)
120+
def test_if_exists_append_modes(ext, if_exists, num_sheets, expected):
121+
df1 = DataFrame({"fruit": ["apple", "banana"]})
122+
df2 = DataFrame({"fruit": ["pear"]})
123+
124+
with tm.ensure_clean(ext) as f:
125+
with pd.ExcelWriter(f, engine="openpyxl", mode="w") as writer:
126+
df1.to_excel(writer, sheet_name="foo", index=False)
127+
with pd.ExcelWriter(
128+
f, engine="openpyxl", mode="a", if_exists=if_exists
129+
) as writer:
130+
df2.to_excel(writer, sheet_name="foo", index=False)
131+
132+
wb = openpyxl.load_workbook(f)
133+
assert len(wb.sheetnames) == num_sheets
134+
assert wb.sheetnames[0] == "foo"
135+
result = pd.read_excel(wb, "foo", engine="openpyxl")
136+
assert list(result["fruit"]) == expected
137+
if len(wb.sheetnames) == 2:
138+
# atm the name given for the second sheet will be "foo1"
139+
# but we don't want the test to fail if openpyxl changes this
140+
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
141+
assert result.equals(df2)
142+
wb.close()
143+
144+
112145
def test_to_excel_with_openpyxl_engine(ext):
113146
# GH 29854
114147
with tm.ensure_clean(ext) as filename:

pandas/tests/io/excel/test_xlsxwriter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def test_column_format(ext):
5656

5757

5858
def test_write_append_mode_raises(ext):
59-
msg = "Append mode is not supported with xlsxwriter!"
59+
msg = "Append mode and if_exists are not supported with xlsxwriter!"
6060

6161
with tm.ensure_clean(ext) as f:
6262
with pytest.raises(ValueError, match=msg):

0 commit comments

Comments
 (0)