Skip to content

Commit 05cf33a

Browse files
Harmon758Byron
authored andcommitted
Remove surrogateescape error handler for Python 2
1 parent e50ee0a commit 05cf33a

File tree

1 file changed

+0
-179
lines changed

1 file changed

+0
-179
lines changed

‎git/compat.py

-179
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import locale
1111
import os
1212
import sys
13-
import codecs
1413

1514

1615
from gitdb.utils.compat import (
@@ -91,181 +90,3 @@ def __str__(self):
9190
else: # Python 2
9291
def __str__(self):
9392
return self.__unicode__().encode(defenc)
94-
95-
96-
"""
97-
This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
98-
handler of Python 3.
99-
Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
100-
"""
101-
102-
# This code is released under the Python license and the BSD 2-clause license
103-
104-
105-
FS_ERRORS = 'surrogateescape'
106-
107-
# # -- Python 2/3 compatibility -------------------------------------
108-
# FS_ERRORS = 'my_surrogateescape'
109-
110-
def u(text):
111-
if PY3:
112-
return text
113-
return text.decode('unicode_escape')
114-
115-
def b(data):
116-
if PY3:
117-
return data.encode('latin1')
118-
return data
119-
120-
def surrogateescape_handler(exc):
121-
"""
122-
Pure Python implementation of the PEP 383: the "surrogateescape" error
123-
handler of Python 3. Undecodable bytes will be replaced by a Unicode
124-
character U+DCxx on decoding, and these are translated into the
125-
original bytes on encoding.
126-
"""
127-
mystring = exc.object[exc.start:exc.end]
128-
129-
try:
130-
if isinstance(exc, UnicodeDecodeError):
131-
# mystring is a byte-string in this case
132-
decoded = replace_surrogate_decode(mystring)
133-
elif isinstance(exc, UnicodeEncodeError):
134-
# In the case of u'\udcc3'.encode('ascii',
135-
# 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an
136-
# exception anyway after this function is called, even though I think
137-
# it's doing what it should. It seems that the strict encoder is called
138-
# to encode the unicode string that this function returns ...
139-
decoded = replace_surrogate_encode(mystring, exc)
140-
else:
141-
raise exc
142-
except NotASurrogateError:
143-
raise exc
144-
return (decoded, exc.end)
145-
146-
147-
class NotASurrogateError(Exception):
148-
pass
149-
150-
151-
def replace_surrogate_encode(mystring, exc):
152-
"""
153-
Returns a (unicode) string, not the more logical bytes, because the codecs
154-
register_error functionality expects this.
155-
"""
156-
decoded = []
157-
for ch in mystring:
158-
# if PY3:
159-
# code = ch
160-
# else:
161-
code = ord(ch)
162-
163-
# The following magic comes from Py3.3's Python/codecs.c file:
164-
if not 0xD800 <= code <= 0xDCFF:
165-
# Not a surrogate. Fail with the original exception.
166-
raise exc
167-
# mybytes = [0xe0 | (code >> 12),
168-
# 0x80 | ((code >> 6) & 0x3f),
169-
# 0x80 | (code & 0x3f)]
170-
# Is this a good idea?
171-
if 0xDC00 <= code <= 0xDC7F:
172-
decoded.append(chr(code - 0xDC00))
173-
elif code <= 0xDCFF:
174-
decoded.append(chr(code - 0xDC00))
175-
else:
176-
raise NotASurrogateError
177-
return str().join(decoded)
178-
179-
180-
def replace_surrogate_decode(mybytes):
181-
"""
182-
Returns a (unicode) string
183-
"""
184-
decoded = []
185-
for ch in mybytes:
186-
# We may be parsing newbytes (in which case ch is an int) or a native
187-
# str on Py2
188-
if isinstance(ch, int):
189-
code = ch
190-
else:
191-
code = ord(ch)
192-
if 0x80 <= code <= 0xFF:
193-
decoded.append(chr(0xDC00 + code))
194-
elif code <= 0x7F:
195-
decoded.append(chr(code))
196-
else:
197-
# # It may be a bad byte
198-
# # Try swallowing it.
199-
# continue
200-
# print("RAISE!")
201-
raise NotASurrogateError
202-
return str().join(decoded)
203-
204-
205-
def encodefilename(fn):
206-
if FS_ENCODING == 'ascii':
207-
# ASCII encoder of Python 2 expects that the error handler returns a
208-
# Unicode string encodable to ASCII, whereas our surrogateescape error
209-
# handler has to return bytes in 0x80-0xFF range.
210-
encoded = []
211-
for index, ch in enumerate(fn):
212-
code = ord(ch)
213-
if code < 128:
214-
ch = bytes((code,))
215-
elif 0xDC80 <= code <= 0xDCFF:
216-
ch = bytes((code - 0xDC00,))
217-
else:
218-
raise UnicodeEncodeError(FS_ENCODING,
219-
fn, index, index+1,
220-
'ordinal not in range(128)')
221-
encoded.append(ch)
222-
return bytes().join(encoded)
223-
elif FS_ENCODING == 'utf-8':
224-
# UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
225-
# doesn't go through our error handler
226-
encoded = []
227-
for index, ch in enumerate(fn):
228-
code = ord(ch)
229-
if 0xD800 <= code <= 0xDFFF:
230-
if 0xDC80 <= code <= 0xDCFF:
231-
ch = bytes((code - 0xDC00,))
232-
encoded.append(ch)
233-
else:
234-
raise UnicodeEncodeError(
235-
FS_ENCODING,
236-
fn, index, index+1, 'surrogates not allowed')
237-
else:
238-
ch_utf8 = ch.encode('utf-8')
239-
encoded.append(ch_utf8)
240-
return bytes().join(encoded)
241-
return fn.encode(FS_ENCODING, FS_ERRORS)
242-
243-
def decodefilename(fn):
244-
return fn.decode(FS_ENCODING, FS_ERRORS)
245-
246-
FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
247-
# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
248-
# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
249-
250-
251-
# normalize the filesystem encoding name.
252-
# For example, we expect "utf-8", not "UTF8".
253-
FS_ENCODING = codecs.lookup(FS_ENCODING).name
254-
255-
256-
def register_surrogateescape():
257-
"""
258-
Registers the surrogateescape error handler on Python 2 (only)
259-
"""
260-
if PY3:
261-
return
262-
try:
263-
codecs.lookup_error(FS_ERRORS)
264-
except LookupError:
265-
codecs.register_error(FS_ERRORS, surrogateescape_handler)
266-
267-
268-
try:
269-
b"100644 \x9f\0aaa".decode(defenc, "surrogateescape")
270-
except Exception:
271-
register_surrogateescape()

0 commit comments

Comments
 (0)