Skip to content

Commit a240b29

Browse files
committed
Merge y-p/unicode__ #2224
2 parents f6d48ca + 436bf36 commit a240b29

File tree

9 files changed

+330
-106
lines changed

9 files changed

+330
-106
lines changed

pandas/core/format.py

Lines changed: 26 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
string representation of NAN to use, default 'NaN'
3737
formatters : list or dict of one-parameter functions, optional
3838
formatter functions to apply to columns' elements by position or name,
39-
default None
39+
default None, if the result is a string , it must be a unicode string.
4040
float_format : one-parameter function, optional
4141
formatter function to apply to columns' elements if they are floats
4242
default None
@@ -62,7 +62,7 @@ class SeriesFormatter(object):
6262
def __init__(self, series, buf=None, header=True, length=True,
6363
na_rep='NaN', name=False, float_format=None):
6464
self.series = series
65-
self.buf = buf if buf is not None else StringIO()
65+
self.buf = buf if buf is not None else StringIO(u"")
6666
self.name = name
6767
self.na_rep = na_rep
6868
self.length = length
@@ -112,7 +112,7 @@ def to_string(self):
112112
series = self.series
113113

114114
if len(series) == 0:
115-
return ''
115+
return u''
116116

117117
fmt_index, have_header = self._get_formatted_index()
118118
fmt_values = self._get_formatted_values()
@@ -135,9 +135,7 @@ def to_string(self):
135135
if footer:
136136
result.append(footer)
137137

138-
if py3compat.PY3:
139-
return unicode(u'\n'.join(result))
140-
return com.console_encode(u'\n'.join(result))
138+
return unicode(u'\n'.join(result))
141139

142140
if py3compat.PY3: # pragma: no cover
143141
_encode_diff = lambda x: 0
@@ -200,10 +198,15 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
200198
else:
201199
self.columns = frame.columns
202200

203-
def _to_str_columns(self, force_unicode=False):
201+
def _to_str_columns(self, force_unicode=None):
204202
"""
205203
Render a DataFrame to a list of columns (as lists of strings).
206204
"""
205+
import warnings
206+
if force_unicode is not None: # pragma: no cover
207+
warnings.warn("force_unicode is deprecated, it will have no effect",
208+
FutureWarning)
209+
207210
# may include levels names also
208211
str_index = self._get_formatted_index()
209212
str_columns = self._get_formatted_column_labels()
@@ -237,32 +240,17 @@ def _to_str_columns(self, force_unicode=False):
237240
if self.index:
238241
strcols.insert(0, str_index)
239242

240-
if not py3compat.PY3:
241-
if force_unicode:
242-
def make_unicode(x):
243-
if isinstance(x, unicode):
244-
return x
245-
return x.decode('utf-8')
246-
strcols = map(lambda col: map(make_unicode, col), strcols)
247-
else:
248-
# Generally everything is plain strings, which has ascii
249-
# encoding. Problem is when there is a char with value over
250-
# 127. Everything then gets converted to unicode.
251-
try:
252-
map(lambda col: map(str, col), strcols)
253-
except UnicodeError:
254-
def make_unicode(x):
255-
if isinstance(x, unicode):
256-
return x
257-
return x.decode('utf-8')
258-
strcols = map(lambda col: map(make_unicode, col), strcols)
259-
260243
return strcols
261244

262-
def to_string(self, force_unicode=False):
245+
def to_string(self, force_unicode=None):
263246
"""
264247
Render a DataFrame to a console-friendly tabular output.
265248
"""
249+
import warnings
250+
if force_unicode is not None: # pragma: no cover
251+
warnings.warn("force_unicode is deprecated, it will have no effect",
252+
FutureWarning)
253+
266254
frame = self.frame
267255

268256
if len(frame.columns) == 0 or len(frame.index) == 0:
@@ -272,15 +260,20 @@ def to_string(self, force_unicode=False):
272260
com.pprint_thing(frame.index)))
273261
text = info_line
274262
else:
275-
strcols = self._to_str_columns(force_unicode)
263+
strcols = self._to_str_columns()
276264
text = adjoin(1, *strcols)
277265

278266
self.buf.writelines(text)
279267

280-
def to_latex(self, force_unicode=False, column_format=None):
268+
def to_latex(self, force_unicode=None, column_format=None):
281269
"""
282270
Render a DataFrame to a LaTeX tabular environment output.
283271
"""
272+
import warnings
273+
if force_unicode is not None: # pragma: no cover
274+
warnings.warn("force_unicode is deprecated, it will have no effect",
275+
FutureWarning)
276+
284277
frame = self.frame
285278

286279
if len(frame.columns) == 0 or len(frame.index) == 0:
@@ -289,7 +282,7 @@ def to_latex(self, force_unicode=False, column_format=None):
289282
frame.columns, frame.index))
290283
strcols = [[info_line]]
291284
else:
292-
strcols = self._to_str_columns(force_unicode)
285+
strcols = self._to_str_columns()
293286

294287
if column_format is None:
295288
column_format = '|l|%s|' % '|'.join('c' for _ in strcols)
@@ -726,18 +719,10 @@ def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
726719
self.justify = justify
727720

728721
def get_result(self):
729-
if self._have_unicode():
730-
fmt_values = self._format_strings(use_unicode=True)
731-
else:
732-
fmt_values = self._format_strings(use_unicode=False)
733-
722+
fmt_values = self._format_strings()
734723
return _make_fixed_width(fmt_values, self.justify)
735724

736-
def _have_unicode(self):
737-
mask = lib.map_infer(self.values, lambda x: isinstance(x, unicode))
738-
return mask.any()
739-
740-
def _format_strings(self, use_unicode=False):
725+
def _format_strings(self):
741726
if self.float_format is None:
742727
float_format = print_config.float_format
743728
if float_format is None:

pandas/core/frame.py

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -608,20 +608,51 @@ def _need_info_repr_(self):
608608
else:
609609
return False
610610

611-
def __repr__(self):
611+
def __str__(self):
612+
"""
613+
Return a string representation for a particular DataFrame
614+
615+
Invoked by str(df) in both py2/py3.
616+
Yields Bytestring in Py2, Unicode String in py3.
617+
"""
618+
619+
if py3compat.PY3:
620+
return self.__unicode__()
621+
return self.__bytes__()
622+
623+
def __bytes__(self):
612624
"""
613625
Return a string representation for a particular DataFrame
626+
627+
Invoked by bytes(df) in py3 only.
628+
Yields a bytestring in both py2/py3.
629+
"""
630+
return com.console_encode(self.__unicode__())
631+
632+
def __unicode__(self):
633+
"""
634+
Return a string representation for a particular DataFrame
635+
636+
Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
614637
"""
615-
buf = StringIO()
638+
buf = StringIO(u"")
616639
if self._need_info_repr_():
617640
self.info(buf=buf, verbose=self._verbose_info)
618641
else:
619642
self.to_string(buf=buf)
643+
620644
value = buf.getvalue()
645+
assert type(value) == unicode
621646

622-
if py3compat.PY3:
623-
return unicode(value)
624-
return com.console_encode(value)
647+
return value
648+
649+
def __repr__(self):
650+
"""
651+
Return a string representation for a particular DataFrame
652+
653+
Yields Bytestring in Py2, Unicode String in py3.
654+
"""
655+
return str(self)
625656

626657
def _repr_html_(self):
627658
"""
@@ -1389,19 +1420,21 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
13891420
def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
13901421
header=True, index=True, na_rep='NaN', formatters=None,
13911422
float_format=None, sparsify=None, nanRep=None,
1392-
index_names=True, justify=None, force_unicode=False):
1423+
index_names=True, justify=None, force_unicode=None):
13931424
"""
13941425
Render a DataFrame to a console-friendly tabular output.
13951426
"""
1427+
import warnings
1428+
if force_unicode is not None: # pragma: no cover
1429+
warnings.warn("force_unicode is deprecated, it will have no effect",
1430+
FutureWarning)
13961431

13971432
if nanRep is not None: # pragma: no cover
1398-
import warnings
13991433
warnings.warn("nanRep is deprecated, use na_rep",
14001434
FutureWarning)
14011435
na_rep = nanRep
14021436

14031437
if colSpace is not None: # pragma: no cover
1404-
import warnings
14051438
warnings.warn("colSpace is deprecated, use col_space",
14061439
FutureWarning)
14071440
col_space = colSpace
@@ -1414,15 +1447,10 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
14141447
justify=justify,
14151448
index_names=index_names,
14161449
header=header, index=index)
1417-
formatter.to_string(force_unicode=force_unicode)
1450+
formatter.to_string()
14181451

14191452
if buf is None:
14201453
result = formatter.buf.getvalue()
1421-
if not force_unicode:
1422-
try:
1423-
result = str(result)
1424-
except ValueError:
1425-
pass
14261454
return result
14271455

14281456
@Appender(fmt.docstring_to_string, indents=1)

pandas/core/index.py

Lines changed: 79 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,50 @@ def __repr__(self):
139139
data = self[:3].tolist() + ["..."] + self[-3:].tolist()
140140
else:
141141
data = self
142+
143+
def __str__(self):
144+
"""
145+
Return a string representation for a particular Index
146+
147+
Invoked by str(df) in both py2/py3.
148+
Yields Bytestring in Py2, Unicode String in py3.
149+
"""
150+
142151
if py3compat.PY3:
143-
prepr = com.pprint_thing(data)
152+
return self.__unicode__()
153+
return self.__bytes__()
154+
155+
def __bytes__(self):
156+
"""
157+
Return a string representation for a particular Index
158+
159+
Invoked by bytes(df) in py3 only.
160+
Yields a bytestring in both py2/py3.
161+
"""
162+
return com.console_encode(self.__unicode__())
163+
164+
def __unicode__(self):
165+
"""
166+
Return a string representation for a particular Index
167+
168+
Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
169+
"""
170+
if len(self) > 6 and len(self) > np.get_printoptions()['threshold']:
171+
data = self[:3].tolist() + ["..."] + self[-3:].tolist()
144172
else:
145-
prepr = com.pprint_thing_encoded(data)
173+
data = self
174+
175+
prepr = com.pprint_thing(data)
146176
return '%s(%s, dtype=%s)' % (type(self).__name__, prepr, self.dtype)
147177

178+
def __repr__(self):
179+
"""
180+
Return a string representation for a particular Index
181+
182+
Yields Bytestring in Py2, Unicode String in py3.
183+
"""
184+
return str(self)
185+
148186
def astype(self, dtype):
149187
return Index(self.values.astype(dtype), name=self.name,
150188
dtype=dtype)
@@ -213,15 +251,6 @@ def summary(self, name=None):
213251
name = type(self).__name__
214252
return '%s: %s entries%s' % (name, len(self), index_summary)
215253

216-
def __str__(self):
217-
try:
218-
return np.array_repr(self.values)
219-
except UnicodeError:
220-
converted = u','.join(com.pprint_thing(x) for x in self.values)
221-
result = u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted,
222-
str(self.values.dtype))
223-
return com.console_encode(result)
224-
225254
def _mpl_repr(self):
226255
# how to represent ourselves to matplotlib
227256
return self.values
@@ -400,8 +429,8 @@ def format(self, name=False):
400429
result = []
401430
for dt in self:
402431
if dt.time() != zero_time or dt.tzinfo is not None:
403-
return header + ['%s' % x for x in self]
404-
result.append('%d-%.2d-%.2d' % (dt.year, dt.month, dt.day))
432+
return header + [u'%s' % x for x in self]
433+
result.append(u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day))
405434
return header + result
406435

407436
values = self.values
@@ -1325,7 +1354,33 @@ def _array_values(self):
13251354
def dtype(self):
13261355
return np.dtype('O')
13271356

1328-
def __repr__(self):
1357+
def __str__(self):
1358+
"""
1359+
Return a string representation for a particular Index
1360+
1361+
Invoked by str(df) in both py2/py3.
1362+
Yields Bytestring in Py2, Unicode String in py3.
1363+
"""
1364+
1365+
if py3compat.PY3:
1366+
return self.__unicode__()
1367+
return self.__bytes__()
1368+
1369+
def __bytes__(self):
1370+
"""
1371+
Return a string representation for a particular Index
1372+
1373+
Invoked by bytes(df) in py3 only.
1374+
Yields a bytestring in both py2/py3.
1375+
"""
1376+
return com.console_encode(self.__unicode__())
1377+
1378+
def __unicode__(self):
1379+
"""
1380+
Return a string representation for a particular Index
1381+
1382+
Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
1383+
"""
13291384
output = 'MultiIndex\n%s'
13301385

13311386
options = np.get_printoptions()
@@ -1341,10 +1396,15 @@ def __repr__(self):
13411396

13421397
np.set_printoptions(threshold=options['threshold'])
13431398

1344-
if py3compat.PY3:
1345-
return output % summary
1346-
else:
1347-
return com.console_encode(output % summary)
1399+
return output % summary
1400+
1401+
def __repr__(self):
1402+
"""
1403+
Return a string representation for a particular Index
1404+
1405+
Yields Bytestring in Py2, Unicode String in py3.
1406+
"""
1407+
return str(self)
13481408

13491409
def __len__(self):
13501410
return len(self.labels[0])
@@ -1502,7 +1562,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
15021562
formatted = lev.take(lab).format()
15031563
else:
15041564
# weird all NA case
1505-
formatted = [str(x) for x in com.take_1d(lev.values, lab)]
1565+
formatted = [com.pprint_thing(x) for x in com.take_1d(lev.values, lab)]
15061566
stringified_levels.append(formatted)
15071567

15081568
result_levels = []

0 commit comments

Comments
 (0)