Skip to content

ENH: Adding new pandas option for integer formatting, Issue#57177 #58519

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/source/whatsnew/v2.2.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ including other versions of pandas.
{{ header }}

.. ---------------------------------------------------------------------------
.. _whatsnew_222.enhancements:

Enhancements
~~~~~~~~~~~~
- Added new display.option for intger formatting within DataFrames (:issue:`57177`)
.. _whatsnew_222.regressions:

Fixed regressions
Expand Down
20 changes: 17 additions & 3 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ def use_numba_cb(key: str) -> None:
df.info() is called. Valid values True,False,'deep'
"""

pc_integer_format_doc = """
: str
This formats integer values in a DataFrame with a delimiter (either "," or "_"). The default
delimiter is no space.
"""


def table_schema_cb(key: str) -> None:
from pandas.io.formats.printing import enable_data_resource_formatter
Expand Down Expand Up @@ -388,6 +394,12 @@ def is_terminal() -> bool:
cf.register_option(
"max_dir_items", 100, pc_max_dir_items, validator=is_nonnegative_int
)
cf.register_option(
"integer_format",
None,
pc_integer_format_doc,
validator=is_instance_factory((type(None), str)),
)

tc_sim_interactive_doc = """
: boolean
Expand All @@ -412,9 +424,11 @@ def is_terminal() -> bool:
"copy_on_write",
# Get the default from an environment variable, if set, otherwise defaults
# to False. This environment variable can be set for testing.
"warn"
if os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "warn"
else os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
(
"warn"
if os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "warn"
else os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1"
),
copy_on_write_doc,
validator=is_one_of_factory([True, False, "warn"]),
)
Expand Down
42 changes: 33 additions & 9 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
CompressionOptions,
FilePath,
FloatFormatType,
IntegerFormatType,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised this worked because _typing.py has no IntegerFormatType

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh did not notice that until I created a PR, just pushed something that fixes that.

FormattersType,
IndexLabel,
SequenceNotStr,
Expand Down Expand Up @@ -434,6 +435,7 @@ def __init__(
formatters: FormattersType | None = None,
justify: str | None = None,
float_format: FloatFormatType | None = None,
integer_format: IntegerFormatType | None = None,
sparsify: bool | None = None,
index_names: bool = True,
max_rows: int | None = None,
Expand All @@ -453,6 +455,7 @@ def __init__(
self.formatters = self._initialize_formatters(formatters)
self.justify = self._initialize_justify(justify)
self.float_format = float_format
self.integer_format = integer_format
self.sparsify = self._initialize_sparsify(sparsify)
self.show_index_names = index_names
self.decimal = decimal
Expand Down Expand Up @@ -756,6 +759,7 @@ def format_col(self, i: int) -> list[str]:
frame.iloc[:, i]._values,
formatter,
float_format=self.float_format,
integer_format=self.integer_format,
na_rep=self.na_rep,
space=self.col_space.get(frame.columns[i]),
decimal=self.decimal,
Expand Down Expand Up @@ -789,9 +793,11 @@ def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:
fmt_columns = columns._format_flat(include_name=False)
str_columns = [
[
" " + x
if not self._get_formatter(i) and is_numeric_dtype(dtype)
else x
(
" " + x
if not self._get_formatter(i) and is_numeric_dtype(dtype)
else x
)
]
for i, (x, dtype) in enumerate(zip(fmt_columns, self.frame.dtypes))
]
Expand Down Expand Up @@ -1063,6 +1069,7 @@ def format_array(
values: ArrayLike,
formatter: Callable | None,
float_format: FloatFormatType | None = None,
integer_format: IntegerFormatType | None = None,
na_rep: str = "NaN",
digits: int | None = None,
space: str | int | None = None,
Expand Down Expand Up @@ -1124,6 +1131,9 @@ def format_array(
if float_format is None:
float_format = get_option("display.float_format")

if integer_format is None:
integer_format = get_option("display.integer_format")

if digits is None:
digits = get_option("display.precision")

Expand All @@ -1132,6 +1142,7 @@ def format_array(
digits=digits,
na_rep=na_rep,
float_format=float_format,
integer_format=integer_format,
formatter=formatter,
space=space,
justify=justify,
Expand All @@ -1153,8 +1164,10 @@ def __init__(
na_rep: str = "NaN",
space: str | int = 12,
float_format: FloatFormatType | None = None,
integer_format: IntegerFormatType | None = None,
justify: str = "right",
decimal: str = ".",
delimiter: str | None = None,
quoting: int | None = None,
fixed_width: bool = True,
leading_space: bool | None = True,
Expand All @@ -1166,8 +1179,10 @@ def __init__(
self.space = space
self.formatter = formatter
self.float_format = float_format
self.integer_format = integer_format
self.justify = justify
self.decimal = decimal
self.delimiter = delimiter
self.quoting = quoting
self.fixed_width = fixed_width
self.leading_space = leading_space
Expand Down Expand Up @@ -1455,13 +1470,22 @@ def _format_strings(self) -> list[str]:

class _IntArrayFormatter(_GenericArrayFormatter):
def _format_strings(self) -> list[str]:
if self.leading_space is False:
formatter_str = lambda x: f"{x:d}".format(x=x)
if (
(self.integer_format == ",")
or (self.integer_format == "_")
or (self.integer_format == None)
):
if self.integer_format == None:
self.integer_format = ""
if self.leading_space is False:
formatter_str = lambda x: f"{x:{self.integer_format}}".format(x=x)
else:
formatter_str = lambda x: f"{x: {self.integer_format}}".format(x=x)
formatter = self.formatter or formatter_str
fmt_values = [formatter(x) for x in self.values]
return fmt_values
else:
formatter_str = lambda x: f"{x: d}".format(x=x)
formatter = self.formatter or formatter_str
fmt_values = [formatter(x) for x in self.values]
return fmt_values
raise ValueError("integer_format must be one of ',','_', or None")


class _Datetime64Formatter(_GenericArrayFormatter):
Expand Down
66 changes: 66 additions & 0 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2259,3 +2259,69 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method):
msg = "buf is not a file name and it has no write method"
with pytest.raises(TypeError, match=msg):
getattr(float_frame, method)(buf=object())


class TestIntArrayFormatter:
def test_format_comma(self):
with option_context("display.integer_format", ","):
df = pd.DataFrame({"A": [1000, 20000, 30], "B": [4.1, 50000.2, 600.0]})
formatted_df = repr(df)
# Valid delimiter used for integer_format
expected_results = {
"A": ["1,000", "20,000", "30"],
"B": ["4.1", "50000.2", "600.0"],
}
expected_df = pd.DataFrame(expected_results)
expected_df = repr(expected_df)

assert formatted_df == expected_df

def test_format_underscore(self):
with option_context("display.integer_format", "_"):
df = pd.DataFrame({"A": [1000, 20000, 30], "B": [4.1, 50000.2, 600.0]})
formatted_df = repr(df)
# Valid delimiter used for integer_format
expected_results = {
"A": ["1_000", "20_000", "30"],
"B": ["4.1", "50000.2", "600.0"],
}
expected_df = pd.DataFrame(expected_results)
expected_df = repr(expected_df)

assert formatted_df == expected_df

def test_format_empty(self):
with option_context("display.integer_format", None):
df = pd.DataFrame({"A": [1000, 20000, 30], "B": [4.1, 50000.2, 600.0]})
formatted_df = repr(df)
# Valid delimiter used for integer_format
expected_results = {
"A": ["1000", "20000", "30"],
"B": ["4.1", "50000.2", "600.0"],
}
expected_df = pd.DataFrame(expected_results)
expected_df = repr(expected_df)

assert formatted_df == expected_df

def test_format_invalid_to_none(self):
with option_context("display.integer_format", "."):
df = pd.DataFrame({"A": [1000, 20000, 30], "B": [4.1, 50000.2, 600.0]})

with pytest.raises(
ValueError, match="integer_format must be one of ',','_', or None"
):
repr(df)

with option_context("display.integer_format", None):
df = pd.DataFrame({"A": [1000, 20000, 30], "B": [4.1, 50000.2, 600.0]})
formatted_df = repr(df)
# Valid delimiter used for integer_format
expected_results = {
"A": ["1000", "20000", "30"],
"B": ["4.1", "50000.2", "600.0"],
}
expected_df = pd.DataFrame(expected_results)
expected_df = repr(expected_df)

assert formatted_df == expected_df