Skip to content

Commit 7a7e2d3

Browse files
committed
Merge remote-tracking branch 'upstream/master' into remove-sparse
2 parents 58b848a + 9ef67b1 commit 7a7e2d3

File tree

22 files changed

+137
-77
lines changed

22 files changed

+137
-77
lines changed

doc/source/development/contributing.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ We'll now kick off a three-step process:
208208
209209
# Build and install pandas
210210
python setup.py build_ext --inplace -j 4
211-
python -m pip install -e --no-build-isolation .
211+
python -m pip install -e . --no-build-isolation
212212
213213
At this point you should be able to import pandas from your locally built version::
214214

@@ -252,7 +252,7 @@ You'll need to have at least python3.5 installed on your system.
252252
253253
# Build and install pandas
254254
python setup.py build_ext --inplace -j 4
255-
python -m pip install -e --no-build-isolation .
255+
python -m pip install -e . --no-build-isolation
256256
257257
Creating a branch
258258
-----------------

doc/source/user_guide/options.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ display.max_colwidth 50 The maximum width in charac
353353
a column in the repr of a pandas
354354
data structure. When the column overflows,
355355
a "..." placeholder is embedded in
356-
the output.
356+
the output. 'None' value means unlimited.
357357
display.max_info_columns 100 max_info_columns is used in DataFrame.info
358358
method to decide if per column information
359359
will be printed.

doc/source/whatsnew/v1.0.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ including other versions of pandas.
2121
Enhancements
2222
~~~~~~~~~~~~
2323

24+
- :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`)
25+
-
2426

2527
.. _whatsnew_1000.enhancements.other:
2628

@@ -200,6 +202,7 @@ I/O
200202
- Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`)
201203
- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`)
202204
- Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`)
205+
- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`)
203206

204207
Plotting
205208
^^^^^^^^

pandas/core/arrays/sparse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,10 +244,10 @@ def construct_from_string(cls, string):
244244
if string.startswith("Sparse"):
245245
try:
246246
sub_type, has_fill_value = cls._parse_subtype(string)
247-
result = SparseDtype(sub_type)
248-
except Exception:
247+
except ValueError:
249248
raise TypeError(msg)
250249
else:
250+
result = SparseDtype(sub_type)
251251
msg = (
252252
"Could not construct SparseDtype from '{}'.\n\nIt "
253253
"looks like the fill_value in the string is not "

pandas/core/base.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,17 +1289,17 @@ def value_counts(
12891289
12901290
Parameters
12911291
----------
1292-
normalize : boolean, default False
1292+
normalize : bool, default False
12931293
If True then the object returned will contain the relative
12941294
frequencies of the unique values.
1295-
sort : boolean, default True
1295+
sort : bool, default True
12961296
Sort by frequencies.
1297-
ascending : boolean, default False
1297+
ascending : bool, default False
12981298
Sort in ascending order.
1299-
bins : integer, optional
1299+
bins : int, optional
13001300
Rather than count values, group them into half-open bins,
13011301
a convenience for ``pd.cut``, only works with numeric data.
1302-
dropna : boolean, default True
1302+
dropna : bool, default True
13031303
Don't include counts of NaN.
13041304
13051305
Returns
@@ -1496,7 +1496,7 @@ def memory_usage(self, deep=False):
14961496
size_hint="",
14971497
sort=textwrap.dedent(
14981498
"""\
1499-
sort : boolean, default False
1499+
sort : bool, default False
15001500
Sort `uniques` and shuffle `labels` to maintain the
15011501
relationship.
15021502
"""

pandas/core/common.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -445,15 +445,15 @@ def pipe(obj, func, *args, **kwargs):
445445
446446
Parameters
447447
----------
448-
func : callable or tuple of (callable, string)
448+
func : callable or tuple of (callable, str)
449449
Function to apply to this object or, alternatively, a
450450
``(callable, data_keyword)`` tuple where ``data_keyword`` is a
451451
string indicating the keyword of `callable`` that expects the
452452
object.
453-
args : iterable, optional
454-
positional arguments passed into ``func``.
455-
kwargs : dict, optional
456-
a dictionary of keyword arguments passed into ``func``.
453+
*args : iterable, optional
454+
Positional arguments passed into ``func``.
455+
**kwargs : dict, optional
456+
A dictionary of keyword arguments passed into ``func``.
457457
458458
Returns
459459
-------

pandas/core/config_init.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,10 @@ def use_numexpr_cb(key):
148148
"""
149149

150150
max_colwidth_doc = """
151-
: int
151+
: int or None
152152
The maximum width in characters of a column in the repr of
153153
a pandas data structure. When the column overflows, a "..."
154-
placeholder is embedded in the output.
154+
placeholder is embedded in the output. A 'None' value means unlimited.
155155
"""
156156

157157
colheader_justify_doc = """
@@ -340,7 +340,9 @@ def is_terminal():
340340
validator=is_instance_factory([type(None), int]),
341341
)
342342
cf.register_option("max_categories", 8, pc_max_categories_doc, validator=is_int)
343-
cf.register_option("max_colwidth", 50, max_colwidth_doc, validator=is_int)
343+
cf.register_option(
344+
"max_colwidth", 50, max_colwidth_doc, validator=is_nonnegative_int
345+
)
344346
if is_terminal():
345347
max_cols = 0 # automatically determine optimal number of columns
346348
else:

pandas/core/dtypes/common.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2034,10 +2034,8 @@ def pandas_dtype(dtype):
20342034
# raise a consistent TypeError if failed
20352035
try:
20362036
npdtype = np.dtype(dtype)
2037-
except Exception:
2038-
# we don't want to force a repr of the non-string
2039-
if not isinstance(dtype, str):
2040-
raise TypeError("data type not understood")
2037+
except SyntaxError:
2038+
# np.dtype uses `eval` which can raise SyntaxError
20412039
raise TypeError("data type '{}' not understood".format(dtype))
20422040

20432041
# Any invalid dtype (such as pd.Timestamp) should raise an error.

pandas/core/frame.py

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,7 @@ def __repr__(self):
641641
max_rows = get_option("display.max_rows")
642642
min_rows = get_option("display.min_rows")
643643
max_cols = get_option("display.max_columns")
644+
max_colwidth = get_option("display.max_colwidth")
644645
show_dimensions = get_option("display.show_dimensions")
645646
if get_option("display.expand_frame_repr"):
646647
width, _ = console.get_console_size()
@@ -652,6 +653,7 @@ def __repr__(self):
652653
min_rows=min_rows,
653654
max_cols=max_cols,
654655
line_width=width,
656+
max_colwidth=max_colwidth,
655657
show_dimensions=show_dimensions,
656658
)
657659

@@ -730,12 +732,17 @@ def to_string(
730732
show_dimensions=False,
731733
decimal=".",
732734
line_width=None,
735+
max_colwidth=None,
733736
):
734737
"""
735738
Render a DataFrame to a console-friendly tabular output.
736739
%(shared_params)s
737740
line_width : int, optional
738741
Width to wrap a line in characters.
742+
max_colwidth : int, optional
743+
Max width to truncate each column in characters. By default, no limit.
744+
745+
.. versionadded:: 1.0.0
739746
%(returns)s
740747
See Also
741748
--------
@@ -752,26 +759,29 @@ def to_string(
752759
2 3 6
753760
"""
754761

755-
formatter = fmt.DataFrameFormatter(
756-
self,
757-
columns=columns,
758-
col_space=col_space,
759-
na_rep=na_rep,
760-
formatters=formatters,
761-
float_format=float_format,
762-
sparsify=sparsify,
763-
justify=justify,
764-
index_names=index_names,
765-
header=header,
766-
index=index,
767-
min_rows=min_rows,
768-
max_rows=max_rows,
769-
max_cols=max_cols,
770-
show_dimensions=show_dimensions,
771-
decimal=decimal,
772-
line_width=line_width,
773-
)
774-
return formatter.to_string(buf=buf)
762+
from pandas import option_context
763+
764+
with option_context("display.max_colwidth", max_colwidth):
765+
formatter = fmt.DataFrameFormatter(
766+
self,
767+
columns=columns,
768+
col_space=col_space,
769+
na_rep=na_rep,
770+
formatters=formatters,
771+
float_format=float_format,
772+
sparsify=sparsify,
773+
justify=justify,
774+
index_names=index_names,
775+
header=header,
776+
index=index,
777+
min_rows=min_rows,
778+
max_rows=max_rows,
779+
max_cols=max_cols,
780+
show_dimensions=show_dimensions,
781+
decimal=decimal,
782+
line_width=line_width,
783+
)
784+
return formatter.to_string(buf=buf)
775785

776786
# ----------------------------------------------------------------------
777787

@@ -2098,8 +2108,12 @@ def to_parquet(
20982108
Name of the compression to use. Use ``None`` for no compression.
20992109
index : bool, default None
21002110
If ``True``, include the dataframe's index(es) in the file output.
2101-
If ``False``, they will not be written to the file. If ``None``,
2102-
the behavior depends on the chosen engine.
2111+
If ``False``, they will not be written to the file.
2112+
If ``None``, similar to ``True`` the dataframe's index(es)
2113+
will be saved. However, instead of being saved as values,
2114+
the RangeIndex will be stored as a range in the metadata so it
2115+
doesn't require much space and is faster. Other indexes will
2116+
be included as columns in the file output.
21032117
21042118
.. versionadded:: 0.24.0
21052119

pandas/core/generic.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2310,7 +2310,7 @@ def to_json(
23102310
floating point values.
23112311
force_ascii : bool, default True
23122312
Force encoded string to be ASCII.
2313-
date_unit : string, default 'ms' (milliseconds)
2313+
date_unit : str, default 'ms' (milliseconds)
23142314
The time unit to encode to, governs timestamp and ISO8601
23152315
precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,
23162316
microsecond, and nanosecond respectively.
@@ -2530,8 +2530,9 @@ def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs):
25302530
25312531
Parameters
25322532
----------
2533-
path : string File path, buffer-like, or None
2534-
if None, return generated bytes
2533+
path : str, buffer-like, or None
2534+
Destination for the serialized object.
2535+
If None, return generated bytes
25352536
append : bool whether to append to an existing msgpack
25362537
(default is False)
25372538
compress : type of compressor (zlib or blosc), default to None (no
@@ -4618,8 +4619,9 @@ def filter(self, items=None, like=None, regex=None, axis=None):
46184619
Keep labels from axis for which "like in label == True".
46194620
regex : str (regular expression)
46204621
Keep labels from axis for which re.search(regex, label) == True.
4621-
axis : int or string axis name
4622-
The axis to filter on. By default this is the info axis,
4622+
axis : {0 or ‘index’, 1 or ‘columns’, None}, default None
4623+
The axis to filter on, expressed either as an index (int)
4624+
or axis name (str). By default this is the info axis,
46234625
'index' for Series, 'columns' for DataFrame.
46244626
46254627
Returns
@@ -4852,7 +4854,7 @@ def sample(
48524854
random_state : int or numpy.random.RandomState, optional
48534855
Seed for the random number generator (if int), or numpy RandomState
48544856
object.
4855-
axis : int or string, optional
4857+
axis : {0 or ‘index’, 1 or ‘columns’, None}, default None
48564858
Axis to sample. Accepts axis number or name. Default is stat axis
48574859
for given data type (0 for Series and DataFrames).
48584860
@@ -8445,7 +8447,7 @@ def first(self, offset):
84458447
84468448
Parameters
84478449
----------
8448-
offset : string, DateOffset, dateutil.relativedelta
8450+
offset : str, DateOffset, dateutil.relativedelta
84498451
84508452
Returns
84518453
-------
@@ -8508,7 +8510,7 @@ def last(self, offset):
85088510
85098511
Parameters
85108512
----------
8511-
offset : string, DateOffset, dateutil.relativedelta
8513+
offset : str, DateOffset, dateutil.relativedelta
85128514
85138515
Returns
85148516
-------
@@ -9116,7 +9118,7 @@ def _where(
91169118
91179119
Parameters
91189120
----------
9119-
cond : boolean %(klass)s, array-like, or callable
9121+
cond : bool %(klass)s, array-like, or callable
91209122
Where `cond` is %(cond)s, keep the original value. Where
91219123
%(cond_rev)s, replace with corresponding value from `other`.
91229124
If `cond` is callable, it is computed on the %(klass)s and
@@ -9423,9 +9425,10 @@ def tshift(self, periods=1, freq=None, axis=0):
94239425
----------
94249426
periods : int
94259427
Number of periods to move, can be positive or negative
9426-
freq : DateOffset, timedelta, or time rule string, default None
9427-
Increment to use from the tseries module or time rule (e.g. 'EOM')
9428-
axis : int or basestring
9428+
freq : DateOffset, timedelta, or str, default None
9429+
Increment to use from the tseries module
9430+
or time rule expressed as a string (e.g. 'EOM')
9431+
axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0
94299432
Corresponds to the axis that contains the Index
94309433
94319434
Returns
@@ -9483,9 +9486,9 @@ def truncate(self, before=None, after=None, axis=None, copy=True):
94839486
94849487
Parameters
94859488
----------
9486-
before : date, string, int
9489+
before : date, str, int
94879490
Truncate all rows before this index value.
9488-
after : date, string, int
9491+
after : date, str, int
94899492
Truncate all rows after this index value.
94909493
axis : {0 or 'index', 1 or 'columns'}, optional
94919494
Axis to truncate. Truncates the index (rows) by default.
@@ -10294,7 +10297,7 @@ def _check_percentile(self, q):
1029410297
How to handle NAs before computing percent changes.
1029510298
limit : int, default None
1029610299
The number of consecutive NAs to fill before stopping.
10297-
freq : DateOffset, timedelta, or offset alias string, optional
10300+
freq : DateOffset, timedelta, or str, optional
1029810301
Increment to use from time series API (e.g. 'M' or BDay()).
1029910302
**kwargs
1030010303
Additional keyword arguments are passed into

pandas/io/clipboards.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover
131131

132132
if isinstance(obj, ABCDataFrame):
133133
# str(df) has various unhelpful defaults, like truncation
134-
with option_context("display.max_colwidth", 999999):
134+
with option_context("display.max_colwidth", None):
135135
objstr = obj.to_string(**kwargs)
136136
else:
137137
objstr = str(obj)

pandas/io/formats/html.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ def _write_header(self, indent: int) -> None:
377377
self.write("</thead>", indent)
378378

379379
def _get_formatted_values(self) -> Dict[int, List[str]]:
380-
with option_context("display.max_colwidth", 999999):
380+
with option_context("display.max_colwidth", None):
381381
fmt_values = {i: self.fmt._format_col(i) for i in range(self.ncols)}
382382
return fmt_values
383383

pandas/io/parquet.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,12 @@ def to_parquet(
227227
Name of the compression to use. Use ``None`` for no compression.
228228
index : bool, default None
229229
If ``True``, include the dataframe's index(es) in the file output. If
230-
``False``, they will not be written to the file. If ``None``, the
231-
engine's default behavior will be used.
230+
``False``, they will not be written to the file.
231+
If ``None``, similar to ``True`` the dataframe's index(es)
232+
will be saved. However, instead of being saved as values,
233+
the RangeIndex will be stored as a range in the metadata so it
234+
doesn't require much space and is faster. Other indexes will
235+
be included as columns in the file output.
232236
233237
.. versionadded:: 0.24.0
234238

pandas/io/pytables.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1782,7 +1782,7 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
17821782
# making an Index instance could throw a number of different errors
17831783
try:
17841784
self.values = Index(values, **kwargs)
1785-
except Exception: # noqa: E722
1785+
except Exception:
17861786

17871787
# if the output freq is different that what we recorded,
17881788
# it should be None (see also 'doc example part 2')

0 commit comments

Comments
 (0)