[pypy-commit] pypy default: Indirect creation of function unicode_encode_unicode_escape(),
amauryfa
noreply at buildbot.pypy.org
Mon Sep 24 23:45:47 CEST 2012
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch:
Changeset: r57523:ece825856859
Date: 2012-09-23 18:08 +0200
http://bitbucket.org/pypy/pypy/changeset/ece825856859/
Log: Indirect creation of function unicode_encode_unicode_escape(), will
make sense in python3 which needs different variations of the same
function.
diff --git a/pypy/rlib/runicode.py b/pypy/rlib/runicode.py
--- a/pypy/rlib/runicode.py
+++ b/pypy/rlib/runicode.py
@@ -1202,74 +1202,82 @@
return builder.build(), pos
-def unicode_encode_unicode_escape(s, size, errors, errorhandler=None, quotes=False):
- # errorhandler is not used: this function cannot cause Unicode errors
- result = StringBuilder(size)
+def make_unicode_escape_function():
+ # Python3 has two similar escape functions: One to implement
+ # encode('unicode_escape') and which outputs bytes, and unicode.__repr__
+ # which outputs unicode. They cannot share RPython code, so we generate
+ # them with the template below.
+ # Python2 does not really need this, but it reduces diffs between branches.
+ def unicode_escape(s, size, errors, errorhandler=None, quotes=False):
+ # errorhandler is not used: this function cannot cause Unicode errors
+ result = StringBuilder(size)
- if quotes:
- if s.find(u'\'') != -1 and s.find(u'\"') == -1:
- quote = ord('\"')
- result.append('u"')
+ if quotes:
+ if s.find(u'\'') != -1 and s.find(u'\"') == -1:
+ quote = ord('\"')
+ result.append('u"')
+ else:
+ quote = ord('\'')
+ result.append('u\'')
else:
- quote = ord('\'')
- result.append('u\'')
- else:
- quote = 0
+ quote = 0
- if size == 0:
- return ''
+ if size == 0:
+ return ''
- pos = 0
- while pos < size:
- ch = s[pos]
- oc = ord(ch)
+ pos = 0
+ while pos < size:
+ ch = s[pos]
+ oc = ord(ch)
- # Escape quotes
- if quotes and (oc == quote or ch == '\\'):
- result.append('\\')
- result.append(chr(oc))
- pos += 1
- continue
-
- # The following logic is enabled only if MAXUNICODE == 0xffff, or
- # for testing on top of a host CPython where sys.maxunicode == 0xffff
- if ((MAXUNICODE < 65536 or
- (not we_are_translated() and sys.maxunicode < 65536))
- and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
- # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
- pos += 1
- oc2 = ord(s[pos])
-
- if 0xDC00 <= oc2 <= 0xDFFF:
- ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
- raw_unicode_escape_helper(result, ucs)
+ # Escape quotes
+ if quotes and (oc == quote or ch == '\\'):
+ result.append('\\')
+ result.append(chr(oc))
pos += 1
continue
- # Fall through: isolated surrogates are copied as-is
- pos -= 1
- # Map special whitespace to '\t', \n', '\r'
- if ch == '\t':
- result.append('\\t')
- elif ch == '\n':
- result.append('\\n')
- elif ch == '\r':
- result.append('\\r')
- elif ch == '\\':
- result.append('\\\\')
+ # The following logic is enabled only if MAXUNICODE == 0xffff, or
+ # for testing on top of a host Python where sys.maxunicode == 0xffff
+ if ((MAXUNICODE < 65536 or
+ (not we_are_translated() and sys.maxunicode < 65536))
+ and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
+ # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
+ pos += 1
+ oc2 = ord(s[pos])
- # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
- elif oc < 32 or oc >= 0x7F:
- raw_unicode_escape_helper(result, oc)
+ if 0xDC00 <= oc2 <= 0xDFFF:
+ ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
+ raw_unicode_escape_helper(result, ucs)
+ pos += 1
+ continue
+ # Fall through: isolated surrogates are copied as-is
+ pos -= 1
- # Copy everything else as-is
- else:
- result.append(chr(oc))
- pos += 1
+ # Map special whitespace to '\t', \n', '\r'
+ if ch == '\t':
+ result.append('\\t')
+ elif ch == '\n':
+ result.append('\\n')
+ elif ch == '\r':
+ result.append('\\r')
+ elif ch == '\\':
+ result.append('\\\\')
- if quotes:
- result.append(chr(quote))
- return result.build()
+ # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
+ elif oc < 32 or oc >= 0x7F:
+ raw_unicode_escape_helper(result, oc)
+
+ # Copy everything else as-is
+ else:
+ result.append(chr(oc))
+ pos += 1
+
+ if quotes:
+ result.append(chr(quote))
+ return result.build()
+ return unicode_escape
+unicode_encode_unicode_escape = make_unicode_escape_function()
# ____________________________________________________________
# Raw unicode escape
More information about the pypy-commit
mailing list