[pypy-svn] r16357 - in pypy/dist/pypy/module/_codecs: . test

Wed Aug 24 12:21:29 CEST 2005

Author: ale
Date: Wed Aug 24 12:21:28 2005
New Revision: 16357

Modified:
   pypy/dist/pypy/module/_codecs/app_codecs.py
   pypy/dist/pypy/module/_codecs/test/test_codecs.py
Log:
Added check for trailing backslash. Detected in test_pickle.

Modified: pypy/dist/pypy/module/_codecs/app_codecs.py
==============================================================================

--- pypy/dist/pypy/module/_codecs/app_codecs.py	(original)
+++ pypy/dist/pypy/module/_codecs/app_codecs.py	Wed Aug 24 12:21:28 2005
@@ -311,36 +311,39 @@
         
         if data[i] == '\\':
             i += 1
-            if data[i] == '\\':
-                res += '\\'
-            elif data[i] == 'n':
-                res += '\n'
-            elif data[i] == 't':
-                res += '\t'
-            elif data[i] == 'r':
-                res += '\r'
-            elif data[i] == 'b':
-                res += '\b'
-            elif data[i] == '\'':
-                res += '\''
-            elif data[i] == '\"':
-                res += '\"'
-            elif data[i] == 'f':
-                res += '\f'
-            elif data[i] == 'a':
-                res += '\a'
-            elif data[i] == 'v':
-                res += '\v'
-            elif '0' <= data[i] <= '9':
-                # emulate a strange wrap-around behavior of CPython:
-                # \400 is the same as \000 because 0400 == 256
-                octal = data[i:i+3]
-                res += chr(int(octal,8) & 0xFF)
-                i += 2
-            elif data[i] == 'x':
-                hexa = data[i+1:i+3]
-                res += chr(int(hexa,16))
-                i += 2
+            if i >= l:
+                raise ValueError("Trailing \\ in string")
+            else:
+                if data[i] == '\\':
+                    res += '\\'
+                elif data[i] == 'n':
+                    res += '\n'
+                elif data[i] == 't':
+                    res += '\t'
+                elif data[i] == 'r':
+                    res += '\r'
+                elif data[i] == 'b':
+                    res += '\b'
+                elif data[i] == '\'':
+                    res += '\''
+                elif data[i] == '\"':
+                    res += '\"'
+                elif data[i] == 'f':
+                    res += '\f'
+                elif data[i] == 'a':
+                    res += '\a'
+                elif data[i] == 'v':
+                    res += '\v'
+                elif '0' <= data[i] <= '9':
+                    # emulate a strange wrap-around behavior of CPython:
+                    # \400 is the same as \000 because 0400 == 256
+                    octal = data[i:i+3]
+                    res += chr(int(octal,8) & 0xFF)
+                    i += 2
+                elif data[i] == 'x':
+                    hexa = data[i+1:i+3]
+                    res += chr(int(hexa,16))
+                    i += 2
         else:
             res += data[i]
         i += 1

Modified: pypy/dist/pypy/module/_codecs/test/test_codecs.py
==============================================================================
--- pypy/dist/pypy/module/_codecs/test/test_codecs.py	(original)
+++ pypy/dist/pypy/module/_codecs/test/test_codecs.py	Wed Aug 24 12:21:28 2005
@@ -1,6 +1,172 @@
 import autopath
 
 class AppTestCodecs:
+
+    def test_indexerror(self):
+        test =   "\\"     # trailing backslash
+             
+        raises (ValueError, test.decode,'string-escape')
+
+    def test_insecure_pickle(self):
+        import pickle
+        insecure = ["abc", "2 + 2", # not quoted
+                    #"'abc' + 'def'", # not a single quoted string
+                    "'abc", # quote is not closed
+                    "'abc\"", # open quote and close quote don't match
+                    "'abc'   ?", # junk after close quote
+                    "'\\'", # trailing backslash
+                    # some tests of the quoting rules
+                    #"'abc\"\''",
+                    #"'\\\\a\'\'\'\\\'\\\\\''",
+                    ]
+        for s in insecure:
+            buf = "S" + s + "\012p0\012."
+            print s
+            raises (ValueError, pickle.loads, buf)
+
+    def test_partial_utf8(self):
+        class Queue(object):
+            """     
+            queue: write bytes at one end, read bytes from the other end
+            """ 
+            def __init__(self):
+                self._buffer = ""
+            
+            def write(self, chars):
+                self._buffer += chars
+    
+            def read(self, size=-1):
+                if size<0:
+                    s = self._buffer
+                    self._buffer = ""
+                    return s
+                else: 
+                    s = self._buffer[:size]
+                    self._buffer = self._buffer[size:]
+                    return s
+        def check_partial(encoding, input, partialresults):
+            import codecs
+            
+            # get a StreamReader for the encoding and feed the bytestring version
+            # of input to the reader byte by byte. Read every available from
+            # the StreamReader and check that the results equal the appropriate
+            # entries from partialresults.
+            q = Queue()
+            r = codecs.getreader(encoding)(q)
+            result = u""
+            for (c, partialresult) in zip(input.encode(encoding), partialresults):
+                q.write(c)
+                result += r.read()
+                assert result == partialresult
+            # check that there's nothing left in the buffers
+            assert  r.read() == u"" 
+            assert  r.bytebuffer ==  "" 
+            assert  r.charbuffer == u""
+        encoding = 'utf-8'
+        check_partial(encoding, 
+                u"\x00\xff\u07ff\u0800\uffff",
+                [
+                    u"\x00",
+                    u"\x00",
+                    u"\x00\xff",
+                    u"\x00\xff",
+                    u"\x00\xff\u07ff",
+                    u"\x00\xff\u07ff",
+                    u"\x00\xff\u07ff",
+                    u"\x00\xff\u07ff\u0800",
+                    u"\x00\xff\u07ff\u0800",
+                    u"\x00\xff\u07ff\u0800",
+                    u"\x00\xff\u07ff\u0800\uffff",
+                ]
+            )
+
+    def test_partial_utf16(self):
+        class Queue(object):
+            """     
+            queue: write bytes at one end, read bytes from the other end
+            """ 
+            def __init__(self):
+                self._buffer = ""
+            
+            def write(self, chars):
+                self._buffer += chars
+    
+            def read(self, size=-1):
+                if size<0:
+                    s = self._buffer
+                    self._buffer = ""
+                    return s
+                else: 
+                    s = self._buffer[:size]
+                    self._buffer = self._buffer[size:]
+                    return s
+        def check_partial(encoding, input, partialresults):
+            import codecs
+            
+            # get a StreamReader for the encoding and feed the bytestring version
+            # of input to the reader byte by byte. Read every available from
+            # the StreamReader and check that the results equal the appropriate
+            # entries from partialresults.
+            q = Queue()
+            r = codecs.getreader(encoding)(q)
+            result = u""
+            for (c, partialresult) in zip(input.encode(encoding), partialresults):
+                q.write(c)
+                result += r.read()
+                assert result == partialresult
+            # check that there's nothing left in the buffers
+            assert  r.read() == u"" 
+            assert  r.bytebuffer ==  "" 
+            assert  r.charbuffer == u""
+        encoding = 'utf-16'
+        check_partial(encoding, 
+                u"\x00\xff\u0100\uffff",
+                [
+                    u"", # first byte of BOM read
+                    u"", # second byte of BOM read => byteorder known
+                    u"",
+                    u"\x00",
+                    u"\x00",
+                    u"\x00\xff",
+                    u"\x00\xff",
+                    u"\x00\xff\u0100",
+                    u"\x00\xff\u0100",
+                    u"\x00\xff\u0100\uffff",
+                ])
+    def test_bug1098990_a(self):
+        import codecs, StringIO
+        self.encoding = 'utf-8'
+        s1 = u"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
+        s2 = u"offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
+        s3 = u"next line.\r\n"
+       
+        s = (s1+s2+s3).encode(self.encoding)
+        stream = StringIO.StringIO(s)
+        reader = codecs.getreader(self.encoding)(stream)
+        assert reader.readline() == s1
+        assert reader.readline() == s2
+        assert reader.readline() == s3
+        assert reader.readline() == u""
+
+    def test_bug1098990_b(self):
+        import codecs, StringIO
+        self.encoding = 'utf-8'
+        s1 = u"aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
+        s2 = u"bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
+        s3 = u"stillokay:bbbbxx\r\n"
+        s4 = u"broken!!!!badbad\r\n"
+        s5 = u"againokay.\r\n"
+
+        s = (s1+s2+s3+s4+s5).encode(self.encoding)
+        stream = StringIO.StringIO(s)
+        reader = codecs.getreader(self.encoding)(stream)
+        assert reader.readline() == s1
+        assert reader.readline() == s2
+        assert reader.readline() == s3
+        assert reader.readline() == s4
+        assert reader.readline() == s5
+        assert reader.readline() == u""    
+    
     def test_seek_utf16le(self):
         # all codecs should be able to encode these
         import codecs, StringIO
@@ -10,9 +176,7 @@
         for t in xrange(5):
             # Test that calling seek resets the internal codec state and buffers
             reader.seek(0, 0)
-            print "before"
             line = reader.readline()
-            print "after",line
             assert s[:len(line)] == line