Skip to content

Commit f801a3a

Browse files
committed
fix(enc): apply surrogateescape when enc/decoding cmd-streams also
Try to fix #543 unicode woes in "core" position (ie TMPDIR): + Apply surrogate-escapes(PEP383) also when decoding. + Ensure all file-path and cmd-streams are surogate-escape dencoded. + test_utils: check if lock works with unicodes. + git.compat: FIX undefined exc to raise in `replace_surrogate_encode()` and fkale8 fixes. ci results: + Linux: + py2.7 FAIL 53 TCs + py3: fixed + Windows, all were OK
1 parent 1a09edb commit f801a3a

File tree

9 files changed

+45
-30
lines changed

9 files changed

+45
-30
lines changed

git/cmd.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@
2121
from git.compat import (
2222
string_types,
2323
defenc,
24-
force_bytes,
2524
PY3,
2625
# just to satisfy flake8 on py3
2726
unicode,
2827
safe_decode,
28+
safe_encode,
2929
is_posix,
3030
is_win,
3131
)
@@ -274,13 +274,13 @@ def wait(self, stderr=b''): # TODO: Bad choice to mimic `proc.wait()` but with
274274
:raise GitCommandError: if the return status is not 0"""
275275
if stderr is None:
276276
stderr = b''
277-
stderr = force_bytes(stderr)
277+
stderr = safe_encode(stderr)
278278

279279
status = self.proc.wait()
280280

281281
def read_all_from_possibly_closed_stream(stream):
282282
try:
283-
return stderr + force_bytes(stream.read())
283+
return stderr + safe_encode(stream.read())
284284
except ValueError:
285285
return stderr or b''
286286

git/compat.py

+17-14
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@
2121
from gitdb.utils.encoding import (
2222
string_types, # @UnusedImport
2323
text_type, # @UnusedImport
24-
force_bytes, # @UnusedImport
25-
force_text # @UnusedImport
24+
force_text, # @UnusedImport
2625
)
2726

2827

@@ -77,7 +76,7 @@ def safe_decode(s):
7776
def safe_encode(s):
7877
"""Safely decodes a binary string to unicode"""
7978
if isinstance(s, unicode):
80-
return s.encode(defenc)
79+
return s.encode(defenc, 'surrogateescape')
8180
elif isinstance(s, bytes):
8281
return s
8382
elif s is not None:
@@ -123,8 +122,8 @@ def __str__(self):
123122
else: # Python 2
124123
def __str__(self):
125124
return self.__unicode__().encode(defenc)
126-
127-
125+
126+
128127
"""
129128
This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
130129
handler of Python 3.
@@ -139,12 +138,14 @@ def __str__(self):
139138
# # -- Python 2/3 compatibility -------------------------------------
140139
# FS_ERRORS = 'my_surrogateescape'
141140

141+
142142
def u(text):
143143
if PY3:
144144
return text
145145
else:
146146
return text.decode('unicode_escape')
147147

148+
148149
def b(data):
149150
if PY3:
150151
return data.encode('latin1')
@@ -155,9 +156,10 @@ def b(data):
155156
_unichr = chr
156157
bytes_chr = lambda code: bytes((code,))
157158
else:
158-
_unichr = unichr
159+
_unichr = unichr # @UndefinedVariable
159160
bytes_chr = chr
160161

162+
161163
def surrogateescape_handler(exc):
162164
"""
163165
Pure Python implementation of the PEP 383: the "surrogateescape" error
@@ -204,7 +206,7 @@ def replace_surrogate_encode(mystring):
204206
# The following magic comes from Py3.3's Python/codecs.c file:
205207
if not 0xD800 <= code <= 0xDCFF:
206208
# Not a surrogate. Fail with the original exception.
207-
raise exc
209+
raise
208210
# mybytes = [0xe0 | (code >> 12),
209211
# 0x80 | ((code >> 6) & 0x3f),
210212
# 0x80 | (code & 0x3f)]
@@ -256,9 +258,8 @@ def encodefilename(fn):
256258
elif 0xDC80 <= code <= 0xDCFF:
257259
ch = bytes_chr(code - 0xDC00)
258260
else:
259-
raise UnicodeEncodeError(FS_ENCODING,
260-
fn, index, index+1,
261-
'ordinal not in range(128)')
261+
raise UnicodeEncodeError(FS_ENCODING, fn, index, index + 1,
262+
'ordinal not in range(128)')
262263
encoded.append(ch)
263264
return bytes().join(encoded)
264265
elif FS_ENCODING == 'utf-8':
@@ -272,20 +273,22 @@ def encodefilename(fn):
272273
ch = bytes_chr(code - 0xDC00)
273274
encoded.append(ch)
274275
else:
275-
raise UnicodeEncodeError(
276-
FS_ENCODING,
277-
fn, index, index+1, 'surrogates not allowed')
276+
raise UnicodeEncodeError(FS_ENCODING, fn, index, index + 1,
277+
'surrogates not allowed')
278278
else:
279279
ch_utf8 = ch.encode('utf-8')
280280
encoded.append(ch_utf8)
281281
return bytes().join(encoded)
282282
else:
283283
return fn.encode(FS_ENCODING, FS_ERRORS)
284284

285+
285286
def decodefilename(fn):
286287
return fn.decode(FS_ENCODING, FS_ERRORS)
287288

288-
FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
289+
FS_ENCODING = 'ascii'
290+
fn = b('[abc\xff]')
291+
encoded = u('[abc\udcff]')
289292
# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
290293
# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
291294

git/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,7 @@ def get_value(self, section, option, default=None):
551551
def _value_to_string(self, value):
552552
if isinstance(value, (int, float, bool)):
553553
return str(value)
554-
return force_text(value)
554+
return force_text(value) # No `safe_decode()`, let any unicode errors bubble-up.
555555

556556
@needs_values
557557
@set_dirty_and_flush_changes

git/index/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
izip,
1616
xrange,
1717
string_types,
18-
force_bytes,
1918
defenc,
2019
mviter,
21-
is_win
20+
is_win,
21+
safe_encode
2222
)
2323
from git.exc import (
2424
GitCommandError,
@@ -597,7 +597,7 @@ def _store_path(self, filepath, fprogress):
597597
st = os.lstat(filepath) # handles non-symlinks as well
598598
if S_ISLNK(st.st_mode):
599599
# in PY3, readlink is string, but we need bytes. In PY2, it's just OS encoded bytes, we assume UTF-8
600-
open_stream = lambda: BytesIO(force_bytes(os.readlink(filepath), encoding=defenc))
600+
open_stream = lambda: BytesIO(safe_encode(os.readlink(filepath)))
601601
else:
602602
open_stream = lambda: open(filepath, 'rb')
603603
with open_stream() as stream:

git/index/fun.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
from git.compat import (
1818
PY3,
1919
defenc,
20-
force_text,
21-
force_bytes,
2220
is_posix,
2321
safe_encode,
2422
safe_decode,
@@ -91,8 +89,8 @@ def run_commit_hook(name, index):
9189
stdout = ''.join(stdout)
9290
stderr = ''.join(stderr)
9391
if cmd.returncode != 0:
94-
stdout = force_text(stdout, defenc)
95-
stderr = force_text(stderr, defenc)
92+
stdout = safe_decode(stdout)
93+
stderr = safe_decode(stderr)
9694
raise HookExecutionError(hp, cmd.returncode, stdout, stderr)
9795
# end handle return code
9896

@@ -136,7 +134,7 @@ def write_cache(entries, stream, extension_data=None, ShaStreamCls=IndexFileSHA1
136134
write(entry[4]) # ctime
137135
write(entry[5]) # mtime
138136
path = entry[3]
139-
path = force_bytes(path, encoding=defenc)
137+
path = safe_encode(path)
140138
plen = len(path) & CE_NAMEMASK # path length
141139
assert plen == len(path), "Path %s too long to fit into index" % entry[3]
142140
flags = plen | (entry[2] & CE_NAMEMASK_INV) # clear possible previous values

git/remote.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
import re
1010

1111
from git.cmd import handle_process_output, Git
12-
from git.compat import (defenc, force_text, is_win)
12+
from git.compat import (
13+
defenc,
14+
is_win,
15+
safe_decode,
16+
)
1317
from git.exc import GitCommandError
1418
from git.util import (
1519
LazyMixin,
@@ -640,7 +644,7 @@ def _get_fetch_info_from_stderr(self, proc, progress):
640644
log.warning("Error lines received while fetching: %s", stderr_text)
641645

642646
for line in progress.other_lines:
643-
line = force_text(line)
647+
line = safe_decode(line)
644648
for cmd in cmds:
645649
if len(line) > 1 and line[0] == ' ' and line[1] == cmd:
646650
fetch_info_lines.append(line)

git/test/test_repo.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
is_win,
3636
string_types,
3737
win_encode,
38+
safe_encode,
3839
)
3940
from git.exc import (
4041
BadObject,
@@ -804,7 +805,7 @@ def test_git_file(self, rwrepo):
804805

805806
# Test using an absolute gitdir path in the .git file.
806807
with open(git_file_path, 'wb') as fp:
807-
fp.write(('gitdir: %s\n' % real_path_abs).encode('ascii'))
808+
fp.write(safe_encode(('gitdir: %s\n' % real_path_abs)))
808809
git_file_repo = Repo(rwrepo.working_tree_dir)
809810
self.assertEqual(osp.abspath(git_file_repo.git_dir), real_path_abs)
810811

git/test/test_util.py

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
# test_utils.py
23
# Copyright (C) 2008, 2009 Michael Trier ([email protected]) and contributors
34
#
@@ -153,6 +154,14 @@ def test_lock_file(self):
153154
lock_file._obtain_lock_or_raise()
154155
lock_file._release_lock()
155156

157+
def test_lock_file_unicode(self):
158+
my_file = tempfile.mktemp(prefix=u'καλημερα_')
159+
lock_file = LockFile(my_file)
160+
assert not lock_file._has_lock()
161+
lock_file._obtain_lock_or_raise()
162+
assert lock_file._has_lock()
163+
lock_file._release_lock()
164+
156165
def test_blocking_lock_file(self):
157166
my_file = tempfile.mktemp()
158167
lock_file = BlockingLockFile(my_file)

0 commit comments

Comments
 (0)