[pypy-svn] r75742 - in pypy/trunk/pypy: interpreter/pyparser interpreter/pyparser/test module/_codecs/test

Thu Jul 1 23:19:39 CEST 2010

Author: afa
Date: Thu Jul  1 23:19:38 2010
New Revision: 75742

Modified:
   pypy/trunk/pypy/interpreter/pyparser/parsestring.py
   pypy/trunk/pypy/interpreter/pyparser/test/test_parsestring.py
   pypy/trunk/pypy/module/_codecs/test/test_codecs.py
Log:
Fixed issue531: CPython oddities when parsing octal escapes in string literals

'\9' == ('\\' + '9'), '\400' == chr(0)

Noticed by santagada.


Modified: pypy/trunk/pypy/interpreter/pyparser/parsestring.py
==============================================================================

--- pypy/trunk/pypy/interpreter/pyparser/parsestring.py	(original)
+++ pypy/trunk/pypy/interpreter/pyparser/parsestring.py	Thu Jul  1 23:19:38 2010
@@ -160,11 +160,17 @@
             span = ps
             span += (span < end) and (s[span] in '01234567')
             span += (span < end) and (s[span] in '01234567')
-            lis.append(chr(int(s[prevps : span], 8)))
+            octal = s[prevps : span]
+            # emulate a strange wrap-around behavior of CPython:
+            # \400 is the same as \000 because 0400 == 256
+            num = int(octal, 8) & 0xFF
+            lis.append(chr(num))
             ps = span
         elif ch == 'x':
             if ps+2 <= end and isxdigit(s[ps]) and isxdigit(s[ps + 1]):
-                lis.append(chr(int(s[ps : ps + 2], 16)))
+                hexa = s[ps : ps + 2]
+                num = int(hexa, 16)
+                lis.append(chr(num))
                 ps += 2
             else:
                 raise_app_valueerror(space, 'invalid \\x escape')

Modified: pypy/trunk/pypy/interpreter/pyparser/test/test_parsestring.py
==============================================================================
--- pypy/trunk/pypy/interpreter/pyparser/test/test_parsestring.py	(original)
+++ pypy/trunk/pypy/interpreter/pyparser/test/test_parsestring.py	Thu Jul  1 23:19:38 2010
@@ -22,6 +22,15 @@
         s = r'"\123"'
         w_ret = parsestring.parsestr(space, None, s)
         assert space.str_w(w_ret) == chr(0123)
+        s = r'"\400"'
+        w_ret = parsestring.parsestr(space, None, s)
+        assert space.str_w(w_ret) == chr(0)
+        s = r'"\9"'
+        w_ret = parsestring.parsestr(space, None, s)
+        assert space.str_w(w_ret) == '\\9'
+        s = r'"\08"'
+        w_ret = parsestring.parsestr(space, None, s)
+        assert space.str_w(w_ret) == chr(0) + '8'
         s = r'"\x"'
         space.raises_w(space.w_ValueError, parsestring.parsestr, space, None, s)
         s = r'"\x7"'

Modified: pypy/trunk/pypy/module/_codecs/test/test_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/test/test_codecs.py	(original)
+++ pypy/trunk/pypy/module/_codecs/test/test_codecs.py	Thu Jul  1 23:19:38 2010
@@ -271,7 +271,6 @@
         assert u"\u0663".encode("raw-unicode-escape") == "\u0663"
 
     def test_escape_decode(self):
-        
         test = 'a\n\\b\x00c\td\u2045'.encode('string_escape')
         assert test.decode('string_escape') =='a\n\\b\x00c\td\u2045'
         assert '\\077'.decode('string_escape') == '?'
@@ -279,6 +278,14 @@
         assert '\\253'.decode('string_escape') == chr(0253)
         assert '\\312'.decode('string_escape') == chr(0312)
 
+    def test_escape_decode_wrap_around(self):
+        assert '\\400'.decode('string_escape') == chr(0)
+
+    def test_escape_decode_ignore_invalid(self):
+        assert '\\9'.decode('string_escape') == '\\9'
+        assert '\\01'.decode('string_escape') == chr(01)
+        assert '\\0f'.decode('string_escape') == chr(0) + 'f'
+        assert '\\08'.decode('string_escape') == chr(0) + '8'
 
     def test_decode_utf8_different_case(self):
         constant = u"a"