[pypy-svn] r61035 - in pypy/trunk/pypy/lib: . app_test

Fri Jan 16 14:52:35 CET 2009

Author: arigo
Date: Fri Jan 16 14:52:34 2009
New Revision: 61035

Modified:
   pypy/trunk/pypy/lib/app_test/test_binascii.py
   pypy/trunk/pypy/lib/binascii.py
Log:
Kill the old b2a_qp() and copy it from C.
Allows a new test to pass.


Modified: pypy/trunk/pypy/lib/app_test/test_binascii.py
==============================================================================

--- pypy/trunk/pypy/lib/app_test/test_binascii.py	(original)
+++ pypy/trunk/pypy/lib/app_test/test_binascii.py	Fri Jan 16 14:52:34 2009
@@ -147,6 +147,12 @@
         f('')
     binascii.crc_hqx('', 0)
 
+def test_qp_bug_case():
+    assert binascii.b2a_qp('y'*77, False, False) == 'y'*75 + '=\nyy'
+    assert binascii.b2a_qp(' '*77, False, False) == ' '*75 + '=\n =20'
+    assert binascii.b2a_qp('y'*76, False, False) == 'y'*76
+    assert binascii.b2a_qp(' '*76, False, False) == ' '*75 + '=\n=20'
+
 def test_wrong_padding():
     s = 'CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ'
     raises(binascii.Error, binascii.a2b_base64, s)

Modified: pypy/trunk/pypy/lib/binascii.py
==============================================================================
--- pypy/trunk/pypy/lib/binascii.py	(original)
+++ pypy/trunk/pypy/lib/binascii.py	Fri Jan 16 14:52:34 2009
@@ -254,70 +254,75 @@
             inp += 1
     return ''.join(odata)
 
-def b2a_qp(s, quotetabs=False, istext=True, header=False):
+def b2a_qp(data, quotetabs=False, istext=True, header=False):
     """quotetabs=True means that tab and space characters are always
        quoted.
        istext=False means that \r and \n are treated as regular characters
        header=True encodes space characters with '_' and requires
        real '_' characters to be quoted.
     """
-    crlf = s.find('\r\n')
-    lf = s.find('\n')
-    linebreak = None
-    if crlf >= 0 and crlf <= lf:
-        linebreak = '\r\n'
-    elif lf > 0:
-        linebreak = '\n'
-    
-    # if linebreak and linebreak == '\r\n':
-    # The above is more efficient for files with \n linebreaks,
-    # but fails badly on files with mixed linebreak encoding
-    if linebreak:
-        s = s.replace('\r\n', '\n')
-    else:
-        linebreak = '\n'
-
-    lines = s.split('\n')
+    MAXLINESIZE = 76
 
-    soft_lbr = '=' + linebreak
-    result = []
-    for line in lines:
-        charlist = []
-        count = 0
-        for c in line:
-            # Don't quote
-            if '!' <= c <= '<' or '>' <= c <= '^' or '`' <= c <= '~' or (
-                c == '_' and not header) or (c in '\n\r' and istext):
-                if count >= 75:
-                    charlist.append(soft_lbr)
-                    count = 0
-                charlist.append(c)
-                count += 1
-            elif not quotetabs and c in '\t ':
-                if count >= 72:
-                    charlist.append(soft_lbr)
-                    count = 0
-
-                if count >= 71: # Quote
-                    count += 3
-                    charlist.append('=' + two_hex_digits(ord(c)))
-                else: # Don't quote
-                    if c == ' ' and header:
-                        charlist.append('_')
-                    else:
-                        charlist.append(c)
-                    count += 1
-            else: # Quote
-                if count >= 72:
-                    charlist.append(soft_lbr)
-                    count = 0
-                count += 3
-                charlist.append('=' + two_hex_digits(ord(c)))
-        if charlist and charlist[-1] in '\t ':
-            # Whitespace at end of line has to be quoted
-            charlist[-1] = '=' + two_hex_digits(ord(charlist[-1]))
-        result.append(''.join(charlist))
-    return linebreak.join(result)
+    # See if this string is using CRLF line ends
+    lf = data.find('\n')
+    crlf = lf > 0 and data[lf-1] == '\r'
+
+    inp = 0
+    linelen = 0
+    odata = []
+    while inp < len(data):
+        c = data[inp]
+        if (c > '~' or
+            c == '=' or
+            (header and c == '_') or
+            (c == '.' and linelen == 0 and (inp == len(data) or
+                                            data[inp+1] == '\n' or
+                                            data[inp+1] == '\r')) or
+            (not istext and (c == '\r' or c == '\n')) or
+            ((c == '\t' or c == ' ') and (inp + 1 == len(data))) or
+            (c <= ' ' and c != '\r' and c != '\n' and
+             (quotetabs or (not quotetabs and (c != '\t' and c != ' '))))):
+            linelen += 3
+            if linelen >= MAXLINESIZE:
+                odata.append('=')
+                if crlf: odata.append('\r')
+                odata.append('\n')
+                linelen = 3
+            odata.append('=' + two_hex_digits(ord(c)))
+            inp += 1
+        else:
+            if (istext and
+                (c == '\n' or (inp+1 < len(data) and c == '\r' and
+                               data[inp+1] == '\n'))):
+                linelen = 0
+                # Protect against whitespace on end of line
+                if (len(odata) > 0 and
+                    (odata[-1] == ' ' or odata[-1] == '\t')):
+                    ch = ord(odata[-1])
+                    odata[-1] = '='
+                    odata.append(two_hex_digits(ch))
+
+                if crlf: odata.append('\r')
+                odata.append('\n')
+                if c == '\r':
+                    inp += 2
+                else:
+                    inp += 1
+            else:
+                if (inp + 1 < len(data) and
+                    data[inp+1] != '\n' and
+                    (linelen + 1) >= MAXLINESIZE):
+                    odata.append('=')
+                    if crlf: odata.append('\r')
+                    odata.append('\n')
+                    linelen = 0
+
+                linelen += 1
+                if header and c == ' ':
+                    c = '_'
+                odata.append(c)
+                inp += 1
+    return ''.join(odata)
 
 hex_numbers = '0123456789ABCDEF'
 def hex(n):