pandas-dev · rhshadrach · Apr 22, 2021 · Mar 1, 2021 · Mar 5, 2021 · Mar 5, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -196,6 +196,7 @@ Other enhancements
 - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
 - Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
 - :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
+- :class:`pandas.ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`)
 - :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
 - :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
 - :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -664,6 +664,15 @@ class ExcelWriter(metaclass=abc.ABCMeta):
         be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".
 
         .. versionadded:: 1.2.0
+    if_sheet_exists : {'error', 'new', 'replace'}, default 'error'
+        How to behave when trying to write to a sheet that already
+        exists (append mode only).
+
+        * error: raise a ValueError.
+        * new: Create a new sheet, with a name determined by the engine.
+        * replace: Delete the contents of the sheet before writing to it.
+
+        .. versionadded:: 1.3.0
     engine_kwargs : dict, optional
         Keyword arguments to be passed into the engine.
 
@@ -760,6 +769,7 @@ def __new__(
         datetime_format=None,
         mode: str = "w",
         storage_options: StorageOptions = None,
+        if_sheet_exists: str | None = None,
         engine_kwargs: dict | None = None,
         **kwargs,
     ):
@@ -861,6 +871,7 @@ def __init__(
         datetime_format=None,
         mode: str = "w",
         storage_options: StorageOptions = None,
+        if_sheet_exists: str | None = None,
         engine_kwargs: dict | None = None,
         **kwargs,
     ):
@@ -896,6 +907,17 @@ def __init__(
 
         self.mode = mode
 
+        if if_sheet_exists not in [None, "error", "new", "replace"]:
+            raise ValueError(
+                f"'{if_sheet_exists}' is not valid for if_sheet_exists. "
+                "Valid options are 'error', 'new' and 'replace'."
+            )
+        if if_sheet_exists and "r+" not in mode:
+            raise ValueError("if_sheet_exists is only valid in append mode (mode='a')")
+        if if_sheet_exists is None:
+            if_sheet_exists = "error"
+        self.if_sheet_exists = if_sheet_exists
+
     def __fspath__(self):
         return getattr(self.handles.handle, "name", "")
 

diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py
@@ -30,6 +30,7 @@ def __init__(
         datetime_format=None,
         mode: str = "w",
         storage_options: StorageOptions = None,
+        if_sheet_exists: Optional[str] = None,
         engine_kwargs: Optional[Dict[str, Any]] = None,
     ):
         from odf.opendocument import OpenDocumentSpreadsheet
@@ -41,6 +42,7 @@ def __init__(
             path,
             mode=mode,
             storage_options=storage_options,
+            if_sheet_exists=if_sheet_exists,
             engine_kwargs=engine_kwargs,
         )
 

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
@@ -37,6 +37,7 @@ def __init__(
         datetime_format=None,
         mode: str = "w",
         storage_options: StorageOptions = None,
+        if_sheet_exists: str | None = None,
         engine_kwargs: dict[str, Any] | None = None,
     ):
         # Use the openpyxl module as the Excel writer.
@@ -46,6 +47,7 @@ def __init__(
             path,
             mode=mode,
             storage_options=storage_options,
+            if_sheet_exists=if_sheet_exists,
             engine_kwargs=engine_kwargs,
         )
 
@@ -56,6 +58,8 @@ def __init__(
 
             self.book = load_workbook(self.handles.handle)
             self.handles.handle.seek(0)
+            self.sheets = {name: self.book[name] for name in self.book.sheetnames}
+
         else:
             # Create workbook object with default optimized_write=True.
             self.book = Workbook()
@@ -414,8 +418,26 @@ def write_cells(
 
         _style_cache: dict[str, dict[str, Serialisable]] = {}
 
-        if sheet_name in self.sheets:
-            wks = self.sheets[sheet_name]
+        if sheet_name in self.sheets and self.if_sheet_exists != "new":
+            if "r+" in self.mode:
+                if self.if_sheet_exists == "replace":
+                    old_wks = self.sheets[sheet_name]
+                    target_index = self.book.index(old_wks)
+                    del self.book[sheet_name]
+                    wks = self.book.create_sheet(sheet_name, target_index)
+                    self.sheets[sheet_name] = wks
+                elif self.if_sheet_exists == "error":
+                    raise ValueError(
+                        f"Sheet '{sheet_name}' already exists and "
+                        f"if_sheet_exists is set to 'error'."
+                    )
+                else:
+                    raise ValueError(
+                        f"'{self.if_sheet_exists}' is not valid for if_sheet_exists. "
+                        "Valid options are 'error', 'new' and 'replace'."
+                    )
+            else:
+                wks = self.sheets[sheet_name]
         else:
             wks = self.book.create_sheet()
             wks.title = sheet_name

diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py
@@ -177,6 +177,7 @@ def __init__(
         datetime_format=None,
         mode: str = "w",
         storage_options: StorageOptions = None,
+        if_sheet_exists: Optional[str] = None,
         engine_kwargs: Optional[Dict[str, Any]] = None,
     ):
         # Use the xlsxwriter module as the Excel writer.
@@ -194,6 +195,7 @@ def __init__(
             datetime_format=datetime_format,
             mode=mode,
             storage_options=storage_options,
+            if_sheet_exists=if_sheet_exists,
             engine_kwargs=engine_kwargs,
         )
 

diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py
@@ -28,6 +28,7 @@ def __init__(
         encoding=None,
         mode: str = "w",
         storage_options: StorageOptions = None,
+        if_sheet_exists: Optional[str] = None,
         engine_kwargs: Optional[Dict[str, Any]] = None,
     ):
         # Use the xlwt module as the Excel writer.
@@ -40,6 +41,7 @@ def __init__(
             path,
             mode=mode,
             storage_options=storage_options,
+            if_sheet_exists=if_sheet_exists,
             engine_kwargs=engine_kwargs,
         )
 

diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+import re
 
 import numpy as np
 import pytest
@@ -109,6 +110,66 @@ def test_write_append_mode(ext, mode, expected):
             assert wb2.worksheets[index]["A1"].value == cell_value
 
 
+@pytest.mark.parametrize(
+    "if_sheet_exists,num_sheets,expected",
+    [
+        ("new", 2, ["apple", "banana"]),
+        ("replace", 1, ["pear"]),
+    ],
+)
+def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
+    # GH 40230
+    df1 = DataFrame({"fruit": ["apple", "banana"]})
+    df2 = DataFrame({"fruit": ["pear"]})
+
+    with tm.ensure_clean(ext) as f:
+        df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
+        with ExcelWriter(
+            f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
+        ) as writer:
+            df2.to_excel(writer, sheet_name="foo", index=False)
+
+        wb = openpyxl.load_workbook(f)
+        assert len(wb.sheetnames) == num_sheets
+        assert wb.sheetnames[0] == "foo"
+        result = pd.read_excel(wb, "foo", engine="openpyxl")
+        assert list(result["fruit"]) == expected
+        if len(wb.sheetnames) == 2:
+            result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
+            tm.assert_frame_equal(result, df2)
+        wb.close()
+
+
+@pytest.mark.parametrize(
+    "if_sheet_exists,msg",
+    [
+        (
+            "invalid",
+            "'invalid' is not valid for if_sheet_exists. Valid options "
+            "are 'error', 'new' and 'replace'.",
+        ),
+        (
+            "error",
+            "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
+        ),
+        (
+            None,
+            "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
+        ),
+    ],
+)
+def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
+    # GH 40230
+    df = DataFrame({"fruit": ["pear"]})
+    with tm.ensure_clean(ext) as f:
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            df.to_excel(f, "foo", engine="openpyxl")
+            with ExcelWriter(
+                f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
+            ) as writer:
+                df.to_excel(writer, sheet_name="foo")
+
+
 def test_to_excel_with_openpyxl_engine(ext):
     # GH 29854
     with tm.ensure_clean(ext) as filename:
@@ -175,7 +236,9 @@ def test_append_mode_file(ext):
     with tm.ensure_clean(ext) as f:
         df.to_excel(f, engine="openpyxl")
 
-        with ExcelWriter(f, mode="a", engine="openpyxl") as writer:
+        with ExcelWriter(
+            f, mode="a", engine="openpyxl", if_sheet_exists="new"
+        ) as writer:
             df.to_excel(writer)
 
         # make sure that zip files are not concatenated by making sure that

diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
@@ -1325,6 +1325,14 @@ def test_excel_duplicate_columns_with_names(self, path):
         expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"])
         tm.assert_frame_equal(result, expected)
 
+    def test_if_sheet_exists_raises(self, ext):
+        # GH 40230
+        msg = "if_sheet_exists is only valid in append mode (mode='a')"
+
+        with tm.ensure_clean(ext) as f:
+            with pytest.raises(ValueError, match=re.escape(msg)):
+                ExcelWriter(f, if_sheet_exists="replace")
+
 
 class TestExcelWriterEngineTests:
     @pytest.mark.parametrize(