[Python-checkins] cpython (2.7): Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
serhiy.storchaka
python-checkins at python.org
Thu Oct 3 11:13:03 CEST 2013
http://hg.python.org/cpython/rev/5e8de100f708
changeset: 85936:5e8de100f708
branch: 2.7
parent: 85933:43064ded64cb
user: Serhiy Storchaka <storchaka at gmail.com>
date: Thu Oct 03 12:08:22 2013 +0300
summary:
Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
files:
Lib/lib2to3/fixes/fix_unicode.py | 32 +++++++++++++---
Lib/lib2to3/tests/test_fixers.py | 37 ++++++++++++++++++++
Misc/NEWS | 2 +
3 files changed, 64 insertions(+), 7 deletions(-)
diff --git a/Lib/lib2to3/fixes/fix_unicode.py b/Lib/lib2to3/fixes/fix_unicode.py
--- a/Lib/lib2to3/fixes/fix_unicode.py
+++ b/Lib/lib2to3/fixes/fix_unicode.py
@@ -1,25 +1,43 @@
-"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...".
+r"""Fixer for unicode.
+
+* Changes unicode to str and unichr to chr.
+
+* If "...\u..." is not unicode literal change it into "...\\u...".
+
+* Change u"..." into "...".
"""
-import re
from ..pgen2 import token
from .. import fixer_base
_mapping = {u"unichr" : u"chr", u"unicode" : u"str"}
-_literal_re = re.compile(ur"[uU][rR]?[\'\"]")
class FixUnicode(fixer_base.BaseFix):
BM_compatible = True
PATTERN = "STRING | 'unicode' | 'unichr'"
+ def start_tree(self, tree, filename):
+ super(FixUnicode, self).start_tree(tree, filename)
+ self.unicode_literals = 'unicode_literals' in tree.future_features
+
def transform(self, node, results):
if node.type == token.NAME:
new = node.clone()
new.value = _mapping[node.value]
return new
elif node.type == token.STRING:
- if _literal_re.match(node.value):
- new = node.clone()
- new.value = new.value[1:]
- return new
+ val = node.value
+ if (not self.unicode_literals and val[0] in u'rR\'"' and
+ u'\\' in val):
+ val = ur'\\'.join([
+ v.replace(u'\\u', ur'\\u').replace(u'\\U', ur'\\U')
+ for v in val.split(ur'\\')
+ ])
+ if val[0] in u'uU':
+ val = val[1:]
+ if val == node.value:
+ return node
+ new = node.clone()
+ new.value = val
+ return new
diff --git a/Lib/lib2to3/tests/test_fixers.py b/Lib/lib2to3/tests/test_fixers.py
--- a/Lib/lib2to3/tests/test_fixers.py
+++ b/Lib/lib2to3/tests/test_fixers.py
@@ -2824,6 +2824,43 @@
a = """R'''x''' """
self.check(b, a)
+ def test_native_literal_escape_u(self):
+ b = """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ a = """'\\\\\\\\u20ac\\\\U0001d121\\\\u20ac'"""
+ self.check(b, a)
+
+ b = """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ a = """r'\\\\\\\\u20ac\\\\U0001d121\\\\u20ac'"""
+ self.check(b, a)
+
+ def test_bytes_literal_escape_u(self):
+ b = """b'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ a = """b'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ self.check(b, a)
+
+ b = """br'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ a = """br'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ self.check(b, a)
+
+ def test_unicode_literal_escape_u(self):
+ b = """u'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ a = """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ self.check(b, a)
+
+ b = """ur'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ a = """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ self.check(b, a)
+
+ def test_native_unicode_literal_escape_u(self):
+ f = 'from __future__ import unicode_literals\n'
+ b = f + """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ a = f + """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ self.check(b, a)
+
+ b = f + """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ a = f + """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+ self.check(b, a)
+
class Test_callable(FixerTestCase):
fixer = "callable"
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -32,6 +32,8 @@
Library
-------
+- Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
+
- Issue #19137: The pprint module now correctly formats empty set and frozenset
and instances of set and frozenset subclasses.
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list