Skip to content

BUG: extra leading space in to_string when index=False #29670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -940,6 +940,7 @@ I/O
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`)
- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)
- :func:`read_excel` now accepts binary data (:issue:`15914`)

Expand Down
42 changes: 33 additions & 9 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,11 +341,18 @@ def _get_formatted_index(self) -> Tuple[List[str], bool]:
return fmt_index, have_header

def _get_formatted_values(self) -> List[str]:
leading_space: Union[bool, str]
if self.index:
leading_space = "compat"
else:
leading_space = False

return format_array(
self.tr_series._values,
None,
float_format=self.float_format,
na_rep=self.na_rep,
leading_space=leading_space,
)

def to_string(self) -> str:
Expand Down Expand Up @@ -947,13 +954,20 @@ def to_latex(
def _format_col(self, i: int) -> List[str]:
frame = self.tr_frame
formatter = self._get_formatter(i)

leading_space: Union[bool, str]
if self.index:
leading_space = "compat"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like there was already some discussion around this here https://github.com/pandas-dev/pandas/pull/25000/files#r252237505 -

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

emm, i think i addressed it already in this PR?

else:
leading_space = False
return format_array(
frame.iloc[:, i]._values,
formatter,
float_format=self.float_format,
na_rep=self.na_rep,
space=self.col_space,
decimal=self.decimal,
leading_space=leading_space,
)

def to_html(
Expand Down Expand Up @@ -1105,7 +1119,7 @@ def format_array(
space: Optional[Union[str, int]] = None,
justify: str = "right",
decimal: str = ".",
leading_space: Optional[bool] = None,
leading_space: Union[str, bool] = "compat",
) -> List[str]:
"""
Format an array for printing.
Expand All @@ -1120,7 +1134,7 @@ def format_array(
space
justify
decimal
leading_space : bool, optional
leading_space : bool or 'compat', default is 'compat'
Whether the array should be formatted with a leading space.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as previous here what is the intended type of this?

Copy link
Member Author

@charlesdong1991 charlesdong1991 Nov 17, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, I didn't notice this so just copy paste the change in my previous PR. my bad, changed.

When an array as a column of a Series or DataFrame, we do want
the leading space to pad between columns.
Expand Down Expand Up @@ -1187,7 +1201,7 @@ def __init__(
decimal: str = ".",
quoting: Optional[int] = None,
fixed_width: bool = True,
leading_space: Optional[bool] = None,
leading_space: Union[str, bool] = "compat",
):
self.values = values
self.digits = digits
Expand Down Expand Up @@ -1251,7 +1265,7 @@ def _format(x):

is_float_type = lib.map_infer(vals, is_float) & notna(vals)
leading_space = self.leading_space
if leading_space is None:
if leading_space == "compat":
leading_space = is_float_type.any()

fmt_values = []
Expand Down Expand Up @@ -1391,9 +1405,11 @@ def format_values_with(float_format):
float_format: Optional[float_format_type]
if self.float_format is None:
if self.fixed_width:
float_format = partial(
"{value: .{digits:d}f}".format, digits=self.digits
)
if self.leading_space is not False:
fmt_str = "{value: .{digits:d}f}"
else:
fmt_str = "{value:.{digits:d}f}"
float_format = partial(fmt_str.format, digits=self.digits)
else:
float_format = self.float_format
else:
Expand Down Expand Up @@ -1425,7 +1441,11 @@ def format_values_with(float_format):
).any()

if has_small_values or (too_long and has_large_values):
float_format = partial("{value: .{digits:d}e}".format, digits=self.digits)
if self.leading_space is not False:
fmt_str = "{value: .{digits:d}e}"
else:
fmt_str = "{value:.{digits:d}e}"
float_format = partial(fmt_str.format, digits=self.digits)
formatted_values = format_values_with(float_format)

return formatted_values
Expand All @@ -1440,7 +1460,11 @@ def _format_strings(self) -> List[str]:

class IntArrayFormatter(GenericArrayFormatter):
def _format_strings(self) -> List[str]:
formatter = self.formatter or (lambda x: "{x: d}".format(x=x))
if self.leading_space is False:
fmt_str = "{x:d}"
else:
fmt_str = "{x: d}"
formatter = self.formatter or (lambda x: fmt_str.format(x=x))
fmt_values = [formatter(x) for x in self.values]
return fmt_values

Expand Down
42 changes: 38 additions & 4 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1508,11 +1508,11 @@ def test_to_string_no_index(self):

df_s = df.to_string(index=False)
# Leading space is expected for positive numbers.
expected = " x y z\n 11 33 AAA\n 22 -44 "
expected = " x y z\n11 33 AAA\n22 -44 "
assert df_s == expected

df_s = df[["y", "x", "z"]].to_string(index=False)
expected = " y x z\n 33 11 AAA\n-44 22 "
expected = " y x z\n 33 11 AAA\n-44 22 "
assert df_s == expected

def test_to_string_line_width_no_index(self):
Expand All @@ -1527,7 +1527,7 @@ def test_to_string_line_width_no_index(self):
df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]})

df_s = df.to_string(line_width=1, index=False)
expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 "
expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 "

assert df_s == expected

Expand Down Expand Up @@ -2222,7 +2222,7 @@ def test_to_string_without_index(self):
# GH 11729 Test index=False option
s = Series([1, 2, 3, 4])
result = s.to_string(index=False)
expected = " 1\n" + " 2\n" + " 3\n" + " 4"
expected = "1\n" + "2\n" + "3\n" + "4"
assert result == expected

def test_unicode_name_in_footer(self):
Expand Down Expand Up @@ -3277,3 +3277,37 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method):
msg = "buf is not a file name and it has no write method"
with pytest.raises(TypeError, match=msg):
getattr(float_frame, method)(buf=object())


@pytest.mark.parametrize(
"input_array, expected",
[
("a", "a"),
(["a", "b"], "a\nb"),
([1, "a"], "1\na"),
(1, "1"),
([0, -1], " 0\n-1"),
(1.0, "1.0"),
([" a", " b"], " a\n b"),
([".1", "1"], ".1\n 1"),
(["10", "-10"], " 10\n-10"),
],
)
def test_format_remove_leading_space_series(input_array, expected):
# GH: 24980
s = pd.Series(input_array).to_string(index=False)
assert s == expected


@pytest.mark.parametrize(
"input_array, expected",
[
({"A": ["a"]}, "A\na"),
({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"),
({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"),
],
)
def test_format_remove_leading_space_dataframe(input_array, expected):
# GH: 24980
df = pd.DataFrame(input_array).to_string(index=False)
assert df == expected
22 changes: 11 additions & 11 deletions pandas/tests/io/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def test_to_latex(self, float_frame):
withoutindex_result = df.to_latex(index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
a & b \\
a & b \\
\midrule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand Down Expand Up @@ -413,7 +413,7 @@ def test_to_latex_longtable(self):
withoutindex_result = df.to_latex(index=False, longtable=True)
withoutindex_expected = r"""\begin{longtable}{rl}
\toprule
a & b \\
a & b \\
\midrule
\endhead
\midrule
Expand All @@ -423,8 +423,8 @@ def test_to_latex_longtable(self):

\bottomrule
\endlastfoot
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\end{longtable}
"""

Expand Down Expand Up @@ -614,8 +614,8 @@ def test_to_latex_no_header(self):
withoutindex_result = df.to_latex(index=False, header=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand All @@ -641,10 +641,10 @@ def test_to_latex_specified_header(self):
withoutindex_result = df.to_latex(header=["AA", "BB"], index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
AA & BB \\
AA & BB \\
\midrule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand Down