pandas-dev · rhshadrach · Apr 22, 2021 · Mar 1, 2021 · Mar 5, 2021 · Mar 5, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -125,6 +125,7 @@ Other enhancements
 - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
 - Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
 - :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
+- :class:`pandas.ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`)
 - :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
 - :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
 - :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -666,6 +666,17 @@ class ExcelWriter(metaclass=abc.ABCMeta):
         be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".
 
         .. versionadded:: 1.2.0
+    if_sheet_exists : {'new', 'replace', 'overwrite', 'fail'}, default 'new'
+        How to behave when trying to write to a sheet that already
+        exists (append mode only).
+
+        * new: Create a new sheet with a different name.
+        * replace: Delete the contents of the sheet before writing to it.
+        * overwrite: Write directly to the named sheet
+          without deleting the previous contents.
+        * fail: raise a ValueError.
+
+        .. versionadded:: 1.3.0
 
     Attributes
     ----------
@@ -834,6 +845,7 @@ def __init__(
         datetime_format=None,
         mode: str = "w",
         storage_options: StorageOptions = None,
+        if_sheet_exists: Optional[str] = None,
         **engine_kwargs,
     ):
         # validate that this engine can handle the extension
@@ -868,6 +880,15 @@ def __init__(
 
         self.mode = mode
 
+        ise_valid = [None, "new", "replace", "overwrite", "fail"]
+        if if_sheet_exists not in ise_valid:
+            raise ValueError(f"'{if_sheet_exists}' is not valid for if_sheet_exists")
+        if if_sheet_exists and "r+" not in mode:
+            raise ValueError("if_sheet_exists is only valid in append mode (mode='a')")
+        if if_sheet_exists is None and "r+" in mode:
+            if_sheet_exists = "new"
+        self.if_sheet_exists = if_sheet_exists
+
     def __fspath__(self):
         return getattr(self.handles.handle, "name", "")
 

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
@@ -37,13 +37,18 @@ def __init__(
         engine=None,
         mode: str = "w",
         storage_options: StorageOptions = None,
+        if_sheet_exists: Optional[str] = None,
         **engine_kwargs,
     ):
         # Use the openpyxl module as the Excel writer.
         from openpyxl.workbook import Workbook
 
         super().__init__(
-            path, mode=mode, storage_options=storage_options, **engine_kwargs
+            path,
+            mode=mode,
+            storage_options=storage_options,
+            if_sheet_exists=if_sheet_exists,
+            **engine_kwargs,
         )
 
         # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
@@ -53,6 +58,8 @@ def __init__(
 
             self.book = load_workbook(self.handles.handle)
             self.handles.handle.seek(0)
+            self.sheets = {name: self.book[name] for name in self.book.sheetnames}
+
         else:
             # Create workbook object with default optimized_write=True.
             self.book = Workbook()
@@ -412,7 +419,25 @@ def write_cells(
         _style_cache: Dict[str, Dict[str, Serialisable]] = {}
 
         if sheet_name in self.sheets:
-            wks = self.sheets[sheet_name]
+            if "r+" in self.mode:
+                if self.if_sheet_exists == "new":
+                    wks = self.book.create_sheet()
+                    # openpyxl will create a name for the new sheet by appending digits
+                    wks.title = sheet_name
+                    self.sheets[wks.title] = wks
+                elif self.if_sheet_exists == "replace":
+                    wks = self.sheets[sheet_name]
+                    wks.delete_cols(1, wks.max_column)
+                elif self.if_sheet_exists == "overwrite":
+                    wks = self.sheets[sheet_name]
+                elif self.if_sheet_exists == "fail":
+                    raise ValueError(f"Sheet '{sheet_name}' already exists.")
+                else:
+                    raise ValueError(
+                        f"'{self.if_sheet_exists}' is not valid for if_sheet_exists"
+                    )
+            else:
+                wks = self.sheets[sheet_name]
         else:
             wks = self.book.create_sheet()
             wks.title = sheet_name

diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+import re
 
 import numpy as np
 import pytest
@@ -109,6 +110,63 @@ def test_write_append_mode(ext, mode, expected):
             assert wb2.worksheets[index]["A1"].value == cell_value
 
 
+@pytest.mark.parametrize(
+    "if_sheet_exists,num_sheets,expected",
+    [
+        ("new", 2, ["apple", "banana"]),
+        (None, 2, ["apple", "banana"]),
+        ("replace", 1, ["pear"]),
+        ("overwrite", 1, ["pear", "banana"]),
+    ],
+)
+def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
+    # GH 40230
+    df1 = DataFrame({"fruit": ["apple", "banana"]})
+    df2 = DataFrame({"fruit": ["pear"]})
+
+    with tm.ensure_clean(ext) as f:
+        df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
+        with pd.ExcelWriter(
+            f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
+        ) as writer:
+            df2.to_excel(writer, sheet_name="foo", index=False)
+
+        wb = openpyxl.load_workbook(f)
+        assert len(wb.sheetnames) == num_sheets
+        assert wb.sheetnames[0] == "foo"
+        result = pd.read_excel(wb, "foo", engine="openpyxl")
+        assert list(result["fruit"]) == expected
+        if len(wb.sheetnames) == 2:
+            # atm the name given for the second sheet will be "foo1"
+            # but we don't want the test to fail if openpyxl changes this
+            result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
+            tm.assert_frame_equal(result, df2)
+        wb.close()
+
+
+def test_if_sheet_exists_raises(ext):
+    mode_msg = "if_sheet_exists is only valid in append mode (mode='a')"
+    invalid_msg = "'invalid' is not valid for if_sheet_exists"
+    fail_msg = "Sheet 'foo' already exists."
+    df = DataFrame({"fruit": ["pear"]})
+
+    with tm.ensure_clean(ext) as f:
+        with pytest.raises(ValueError, match=re.escape(mode_msg)):
+            ExcelWriter(f, engine="openpyxl", mode="w", if_sheet_exists="new")
+
+    with tm.ensure_clean(ext) as f:
+        with pytest.raises(ValueError, match=invalid_msg):
+            ExcelWriter(f, engine="openpyxl", mode="a", if_sheet_exists="invalid")
+
+    with tm.ensure_clean(ext) as f:
+        with pytest.raises(ValueError, match=fail_msg):
+            df.to_excel(f, "foo", engine="openpyxl")
+            with pd.ExcelWriter(
+                f, engine="openpyxl", mode="a", if_sheet_exists="fail"
+            ) as writer:
+                df.to_excel(writer, sheet_name="foo")
+
+
 def test_to_excel_with_openpyxl_engine(ext):
     # GH 29854
     with tm.ensure_clean(ext) as filename:

diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py
@@ -1,3 +1,4 @@
+import re
 import warnings
 
 import pytest
@@ -57,7 +58,10 @@ def test_column_format(ext):
 
 def test_write_append_mode_raises(ext):
     msg = "Append mode is not supported with xlsxwriter!"
+    ise_msg = "if_sheet_exists is only valid in append mode (mode='a')"
 
     with tm.ensure_clean(ext) as f:
         with pytest.raises(ValueError, match=msg):
             ExcelWriter(f, engine="xlsxwriter", mode="a")
+        with pytest.raises(ValueError, match=re.escape(ise_msg)):
+            ExcelWriter(f, engine="xlsxwriter", if_sheet_exists="replace")