[Python-checkins] r86934 - in python/branches/py3k: Doc/library/codecs.rst Doc/library/stdtypes.rst Lib/codecs.py Lib/encodings/aliases.py Lib/encodings/base64_codec.py Lib/encodings/bz2_codec.py Lib/encodings/hex_codec.py Lib/encodings/quopri_codec.py Lib/encodings/rot_13.py Lib/encodings/uu_codec.py Lib/encodings/zlib_codec.py Lib/test/test_bytes.py Lib/test/test_codecs.py Misc/NEWS Objects/bytearrayobject.c Objects/bytesobject.c Objects/unicodeobject.c

georg.brandl python-checkins at python.org
Thu Dec 2 19:06:51 CET 2010


Author: georg.brandl
Date: Thu Dec  2 19:06:51 2010
New Revision: 86934

Log:
#7475: add (un)transform method to bytes/bytearray and str, add back codecs that can be used with them from Python 2.

Added:
   python/branches/py3k/Lib/encodings/base64_codec.py   (contents, props changed)
   python/branches/py3k/Lib/encodings/bz2_codec.py   (contents, props changed)
   python/branches/py3k/Lib/encodings/hex_codec.py   (contents, props changed)
   python/branches/py3k/Lib/encodings/quopri_codec.py   (contents, props changed)
   python/branches/py3k/Lib/encodings/rot_13.py   (contents, props changed)
   python/branches/py3k/Lib/encodings/uu_codec.py   (contents, props changed)
   python/branches/py3k/Lib/encodings/zlib_codec.py   (contents, props changed)
Modified:
   python/branches/py3k/Doc/library/codecs.rst
   python/branches/py3k/Doc/library/stdtypes.rst
   python/branches/py3k/Lib/codecs.py
   python/branches/py3k/Lib/encodings/aliases.py
   python/branches/py3k/Lib/test/test_bytes.py
   python/branches/py3k/Lib/test/test_codecs.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Objects/bytearrayobject.c
   python/branches/py3k/Objects/bytesobject.c
   python/branches/py3k/Objects/unicodeobject.c

Modified: python/branches/py3k/Doc/library/codecs.rst
==============================================================================
--- python/branches/py3k/Doc/library/codecs.rst	(original)
+++ python/branches/py3k/Doc/library/codecs.rst	Thu Dec  2 19:06:51 2010
@@ -1165,6 +1165,46 @@
 |                    |         | operand                   |
 +--------------------+---------+---------------------------+
 
+The following codecs provide bytes-to-bytes mappings.  They can be used with
+:meth:`bytes.transform` and :meth:`bytes.untransform`.
+
++--------------------+---------------------------+---------------------------+
+| Codec              | Aliases                   | Purpose                   |
++====================+===========================+===========================+
+| base64_codec       | base64, base-64           | Convert operand to MIME   |
+|                    |                           | base64                    |
++--------------------+---------------------------+---------------------------+
+| bz2_codec          | bz2                       | Compress the operand      |
+|                    |                           | using bz2                 |
++--------------------+---------------------------+---------------------------+
+| hex_codec          | hex                       | Convert operand to        |
+|                    |                           | hexadecimal               |
+|                    |                           | representation, with two  |
+|                    |                           | digits per byte           |
++--------------------+---------------------------+---------------------------+
+| quopri_codec       | quopri, quoted-printable, | Convert operand to MIME   |
+|                    | quotedprintable           | quoted printable          |
++--------------------+---------------------------+---------------------------+
+| uu_codec           | uu                        | Convert the operand using |
+|                    |                           | uuencode                  |
++--------------------+---------------------------+---------------------------+
+| zlib_codec         | zip, zlib                 | Compress the operand      |
+|                    |                           | using gzip                |
++--------------------+---------------------------+---------------------------+
+
+The following codecs provide string-to-string mappings.  They can be used with
+:meth:`str.transform` and :meth:`str.untransform`.
+
++--------------------+---------------------------+---------------------------+
+| Codec              | Aliases                   | Purpose                   |
++====================+===========================+===========================+
+| rot_13             | rot13                     | Returns the Caesar-cypher |
+|                    |                           | encryption of the operand |
++--------------------+---------------------------+---------------------------+
+
+.. versionadded:: 3.2
+   bytes-to-bytes and string-to-string codecs.
+
 
 :mod:`encodings.idna` --- Internationalized Domain Names in Applications
 ------------------------------------------------------------------------

Modified: python/branches/py3k/Doc/library/stdtypes.rst
==============================================================================
--- python/branches/py3k/Doc/library/stdtypes.rst	(original)
+++ python/branches/py3k/Doc/library/stdtypes.rst	Thu Dec  2 19:06:51 2010
@@ -1352,6 +1352,19 @@
         "They're Bill's Friends."
 
 
+.. method:: str.transform(encoding, errors='strict')
+
+   Return an encoded version of the string.  In contrast to :meth:`encode`, this
+   method works with codecs that provide string-to-string mappings, and not
+   string-to-bytes mappings.  :meth:`transform` therefore returns a string
+   object.
+
+   The codecs that can be used with this method are listed in
+   :ref:`standard-encodings`.
+
+   .. versionadded:: 3.2
+
+
 .. method:: str.translate(map)
 
    Return a copy of the *s* where all characters have been mapped through the
@@ -1369,6 +1382,14 @@
       example).
 
 
+.. method:: str.untransform(encoding, errors='strict')
+
+   Return a decoded version of the string.  This provides the reverse operation
+   of :meth:`transform`.
+
+   .. versionadded:: 3.2
+
+
 .. method:: str.upper()
 
    Return a copy of the string converted to uppercase.
@@ -1800,6 +1821,20 @@
 The maketrans and translate methods differ in semantics from the versions
 available on strings:
 
+.. method:: bytes.transform(encoding, errors='strict')
+            bytearray.transform(encoding, errors='strict')
+
+   Return an encoded version of the bytes object.  In contrast to
+   :meth:`encode`, this method works with codecs that provide bytes-to-bytes
+   mappings, and not string-to-bytes mappings.  :meth:`transform` therefore
+   returns a bytes or bytearray object.
+
+   The codecs that can be used with this method are listed in
+   :ref:`standard-encodings`.
+
+   .. versionadded:: 3.2
+
+
 .. method:: bytes.translate(table[, delete])
             bytearray.translate(table[, delete])
 
@@ -1817,6 +1852,15 @@
       b'rd ths shrt txt'
 
 
+.. method:: bytes.untransform(encoding, errors='strict')
+            bytearray.untransform(encoding, errors='strict')
+
+   Return an decoded version of the bytes object.  This provides the reverse
+   operation of :meth:`transform`.
+
+   .. versionadded:: 3.2
+
+
 .. staticmethod:: bytes.maketrans(from, to)
                   bytearray.maketrans(from, to)
 

Modified: python/branches/py3k/Lib/codecs.py
==============================================================================
--- python/branches/py3k/Lib/codecs.py	(original)
+++ python/branches/py3k/Lib/codecs.py	Thu Dec  2 19:06:51 2010
@@ -396,6 +396,8 @@
 
 class StreamReader(Codec):
 
+    charbuffertype = str
+
     def __init__(self, stream, errors='strict'):
 
         """ Creates a StreamReader instance.
@@ -417,9 +419,8 @@
         self.stream = stream
         self.errors = errors
         self.bytebuffer = b""
-        # For str->str decoding this will stay a str
-        # For str->unicode decoding the first read will promote it to unicode
-        self.charbuffer = ""
+        self._empty_charbuffer = self.charbuffertype()
+        self.charbuffer = self._empty_charbuffer
         self.linebuffer = None
 
     def decode(self, input, errors='strict'):
@@ -455,7 +456,7 @@
         """
         # If we have lines cached, first merge them back into characters
         if self.linebuffer:
-            self.charbuffer = "".join(self.linebuffer)
+            self.charbuffer = self._empty_charbuffer.join(self.linebuffer)
             self.linebuffer = None
 
         # read until we get the required number of characters (if available)
@@ -498,7 +499,7 @@
         if chars < 0:
             # Return everything we've got
             result = self.charbuffer
-            self.charbuffer = ""
+            self.charbuffer = self._empty_charbuffer
         else:
             # Return the first chars characters
             result = self.charbuffer[:chars]
@@ -529,7 +530,7 @@
             return line
 
         readsize = size or 72
-        line = ""
+        line = self._empty_charbuffer
         # If size is given, we call read() only once
         while True:
             data = self.read(readsize, firstline=True)
@@ -537,7 +538,8 @@
                 # If we're at a "\r" read one extra character (which might
                 # be a "\n") to get a proper line ending. If the stream is
                 # temporarily exhausted we return the wrong line ending.
-                if data.endswith("\r"):
+                if (isinstance(data, str) and data.endswith("\r")) or \
+                   (isinstance(data, bytes) and data.endswith(b"\r")):
                     data += self.read(size=1, chars=1)
 
             line += data
@@ -563,7 +565,8 @@
                 line0withoutend = lines[0].splitlines(False)[0]
                 if line0withend != line0withoutend: # We really have a line end
                     # Put the rest back together and keep it until the next call
-                    self.charbuffer = "".join(lines[1:]) + self.charbuffer
+                    self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \
+                                      self.charbuffer
                     if keepends:
                         line = line0withend
                     else:
@@ -574,7 +577,7 @@
                 if line and not keepends:
                     line = line.splitlines(False)[0]
                 break
-            if readsize<8000:
+            if readsize < 8000:
                 readsize *= 2
         return line
 
@@ -603,7 +606,7 @@
 
         """
         self.bytebuffer = b""
-        self.charbuffer = ""
+        self.charbuffer = self._empty_charbuffer
         self.linebuffer = None
 
     def seek(self, offset, whence=0):

Modified: python/branches/py3k/Lib/encodings/aliases.py
==============================================================================
--- python/branches/py3k/Lib/encodings/aliases.py	(original)
+++ python/branches/py3k/Lib/encodings/aliases.py	Thu Dec  2 19:06:51 2010
@@ -33,9 +33,9 @@
     'us'                 : 'ascii',
     'us_ascii'           : 'ascii',
 
-    ## base64_codec codec
-    #'base64'             : 'base64_codec',
-    #'base_64'            : 'base64_codec',
+    # base64_codec codec
+    'base64'             : 'base64_codec',
+    'base_64'            : 'base64_codec',
 
     # big5 codec
     'big5_tw'            : 'big5',
@@ -45,8 +45,8 @@
     'big5_hkscs'         : 'big5hkscs',
     'hkscs'              : 'big5hkscs',
 
-    ## bz2_codec codec
-    #'bz2'                : 'bz2_codec',
+    # bz2_codec codec
+    'bz2'                : 'bz2_codec',
 
     # cp037 codec
     '037'                : 'cp037',
@@ -248,8 +248,8 @@
     'cp936'              : 'gbk',
     'ms936'              : 'gbk',
 
-    ## hex_codec codec
-    #'hex'                : 'hex_codec',
+    # hex_codec codec
+    'hex'                : 'hex_codec',
 
     # hp_roman8 codec
     'roman8'             : 'hp_roman8',
@@ -450,13 +450,13 @@
     'cp154'              : 'ptcp154',
     'cyrillic_asian'     : 'ptcp154',
 
-    ## quopri_codec codec
-    #'quopri'             : 'quopri_codec',
-    #'quoted_printable'   : 'quopri_codec',
-    #'quotedprintable'    : 'quopri_codec',
+    # quopri_codec codec
+    'quopri'             : 'quopri_codec',
+    'quoted_printable'   : 'quopri_codec',
+    'quotedprintable'    : 'quopri_codec',
 
-    ## rot_13 codec
-    #'rot13'              : 'rot_13',
+    # rot_13 codec
+    'rot13'              : 'rot_13',
 
     # shift_jis codec
     'csshiftjis'         : 'shift_jis',
@@ -518,12 +518,12 @@
     'utf8_ucs2'          : 'utf_8',
     'utf8_ucs4'          : 'utf_8',
 
-    ## uu_codec codec
-    #'uu'                 : 'uu_codec',
+    # uu_codec codec
+    'uu'                 : 'uu_codec',
 
-    ## zlib_codec codec
-    #'zip'                : 'zlib_codec',
-    #'zlib'               : 'zlib_codec',
+    # zlib_codec codec
+    'zip'                : 'zlib_codec',
+    'zlib'               : 'zlib_codec',
 
     # temporary mac CJK aliases, will be replaced by proper codecs in 3.1
     'x_mac_japanese'      : 'shift_jis',

Added: python/branches/py3k/Lib/encodings/base64_codec.py
==============================================================================
--- (empty file)
+++ python/branches/py3k/Lib/encodings/base64_codec.py	Thu Dec  2 19:06:51 2010
@@ -0,0 +1,55 @@
+"""Python 'base64_codec' Codec - base64 content transfer encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Written by Marc-Andre Lemburg (mal at lemburg.com).
+"""
+
+import codecs
+import base64
+
+### Codec APIs
+
+def base64_encode(input, errors='strict'):
+    assert errors == 'strict'
+    return (base64.encodestring(input), len(input))
+
+def base64_decode(input, errors='strict'):
+    assert errors == 'strict'
+    return (base64.decodestring(input), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return base64_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return base64_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        assert self.errors == 'strict'
+        return base64.encodestring(input)
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        assert self.errors == 'strict'
+        return base64.decodestring(input)
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='base64',
+        encode=base64_encode,
+        decode=base64_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )

Added: python/branches/py3k/Lib/encodings/bz2_codec.py
==============================================================================
--- (empty file)
+++ python/branches/py3k/Lib/encodings/bz2_codec.py	Thu Dec  2 19:06:51 2010
@@ -0,0 +1,77 @@
+"""Python 'bz2_codec' Codec - bz2 compression encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Adapted by Raymond Hettinger from zlib_codec.py which was written
+by Marc-Andre Lemburg (mal at lemburg.com).
+"""
+
+import codecs
+import bz2 # this codec needs the optional bz2 module !
+
+### Codec APIs
+
+def bz2_encode(input, errors='strict'):
+    assert errors == 'strict'
+    return (bz2.compress(input), len(input))
+
+def bz2_decode(input, errors='strict'):
+    assert errors == 'strict'
+    return (bz2.decompress(input), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return bz2_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return bz2_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def __init__(self, errors='strict'):
+        assert errors == 'strict'
+        self.errors = errors
+        self.compressobj = bz2.BZ2Compressor()
+
+    def encode(self, input, final=False):
+        if final:
+            c = self.compressobj.compress(input)
+            return c + self.compressobj.flush()
+        else:
+            return self.compressobj.compress(input)
+
+    def reset(self):
+        self.compressobj = bz2.BZ2Compressor()
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def __init__(self, errors='strict'):
+        assert errors == 'strict'
+        self.errors = errors
+        self.decompressobj = bz2.BZ2Decompressor()
+
+    def decode(self, input, final=False):
+        try:
+            return self.decompressobj.decompress(input)
+        except EOFError:
+            return ''
+
+    def reset(self):
+        self.decompressobj = bz2.BZ2Decompressor()
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name="bz2",
+        encode=bz2_encode,
+        decode=bz2_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )

Added: python/branches/py3k/Lib/encodings/hex_codec.py
==============================================================================
--- (empty file)
+++ python/branches/py3k/Lib/encodings/hex_codec.py	Thu Dec  2 19:06:51 2010
@@ -0,0 +1,55 @@
+"""Python 'hex_codec' Codec - 2-digit hex content transfer encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Written by Marc-Andre Lemburg (mal at lemburg.com).
+"""
+
+import codecs
+import binascii
+
+### Codec APIs
+
+def hex_encode(input, errors='strict'):
+    assert errors == 'strict'
+    return (binascii.b2a_hex(input), len(input))
+
+def hex_decode(input, errors='strict'):
+    assert errors == 'strict'
+    return (binascii.a2b_hex(input), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return hex_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return hex_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        assert self.errors == 'strict'
+        return binascii.b2a_hex(input)
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        assert self.errors == 'strict'
+        return binascii.a2b_hex(input)
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='hex',
+        encode=hex_encode,
+        decode=hex_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )

Added: python/branches/py3k/Lib/encodings/quopri_codec.py
==============================================================================
--- (empty file)
+++ python/branches/py3k/Lib/encodings/quopri_codec.py	Thu Dec  2 19:06:51 2010
@@ -0,0 +1,56 @@
+"""Codec for quoted-printable encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+"""
+
+import codecs
+import quopri
+from io import BytesIO
+
+def quopri_encode(input, errors='strict'):
+    assert errors == 'strict'
+    f = BytesIO(input)
+    g = BytesIO()
+    quopri.encode(f, g, 1)
+    return (g.getvalue(), len(input))
+
+def quopri_decode(input, errors='strict'):
+    assert errors == 'strict'
+    f = BytesIO(input)
+    g = BytesIO()
+    quopri.decode(f, g)
+    return (g.getvalue(), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return quopri_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return quopri_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        return quopri_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        return quopri_decode(input, self.errors)[0]
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+# encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='quopri',
+        encode=quopri_encode,
+        decode=quopri_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )

Added: python/branches/py3k/Lib/encodings/rot_13.py
==============================================================================
--- (empty file)
+++ python/branches/py3k/Lib/encodings/rot_13.py	Thu Dec  2 19:06:51 2010
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+""" Python Character Mapping Codec for ROT13.
+
+This codec de/encodes from str to str and is therefore usable with
+str.transform() and str.untransform().
+
+Written by Marc-Andre Lemburg (mal at lemburg.com).
+"""
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return (input.translate(rot13_map), len(input))
+
+    def decode(self, input, errors='strict'):
+        return (input.translate(rot13_map), len(input))
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        return input.translate(rot13_map)
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        return input.translate(rot13_map)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='rot-13',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )
+
+### Map
+
+rot13_map = codecs.make_identity_dict(range(256))
+rot13_map.update({
+   0x0041: 0x004e,
+   0x0042: 0x004f,
+   0x0043: 0x0050,
+   0x0044: 0x0051,
+   0x0045: 0x0052,
+   0x0046: 0x0053,
+   0x0047: 0x0054,
+   0x0048: 0x0055,
+   0x0049: 0x0056,
+   0x004a: 0x0057,
+   0x004b: 0x0058,
+   0x004c: 0x0059,
+   0x004d: 0x005a,
+   0x004e: 0x0041,
+   0x004f: 0x0042,
+   0x0050: 0x0043,
+   0x0051: 0x0044,
+   0x0052: 0x0045,
+   0x0053: 0x0046,
+   0x0054: 0x0047,
+   0x0055: 0x0048,
+   0x0056: 0x0049,
+   0x0057: 0x004a,
+   0x0058: 0x004b,
+   0x0059: 0x004c,
+   0x005a: 0x004d,
+   0x0061: 0x006e,
+   0x0062: 0x006f,
+   0x0063: 0x0070,
+   0x0064: 0x0071,
+   0x0065: 0x0072,
+   0x0066: 0x0073,
+   0x0067: 0x0074,
+   0x0068: 0x0075,
+   0x0069: 0x0076,
+   0x006a: 0x0077,
+   0x006b: 0x0078,
+   0x006c: 0x0079,
+   0x006d: 0x007a,
+   0x006e: 0x0061,
+   0x006f: 0x0062,
+   0x0070: 0x0063,
+   0x0071: 0x0064,
+   0x0072: 0x0065,
+   0x0073: 0x0066,
+   0x0074: 0x0067,
+   0x0075: 0x0068,
+   0x0076: 0x0069,
+   0x0077: 0x006a,
+   0x0078: 0x006b,
+   0x0079: 0x006c,
+   0x007a: 0x006d,
+})
+
+### Filter API
+
+def rot13(infile, outfile):
+    outfile.write(infile.read().encode('rot-13'))
+
+if __name__ == '__main__':
+    import sys
+    rot13(sys.stdin, sys.stdout)

Added: python/branches/py3k/Lib/encodings/uu_codec.py
==============================================================================
--- (empty file)
+++ python/branches/py3k/Lib/encodings/uu_codec.py	Thu Dec  2 19:06:51 2010
@@ -0,0 +1,99 @@
+"""Python 'uu_codec' Codec - UU content transfer encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Written by Marc-Andre Lemburg (mal at lemburg.com). Some details were
+adapted from uu.py which was written by Lance Ellinghouse and
+modified by Jack Jansen and Fredrik Lundh.
+"""
+
+import codecs
+import binascii
+from io import BytesIO
+
+### Codec APIs
+
+def uu_encode(input, errors='strict', filename='<data>', mode=0o666):
+    assert errors == 'strict'
+    infile = BytesIO(input)
+    outfile = BytesIO()
+    read = infile.read
+    write = outfile.write
+
+    # Encode
+    write(('begin %o %s\n' % (mode & 0o777, filename)).encode('ascii'))
+    chunk = read(45)
+    while chunk:
+        write(binascii.b2a_uu(chunk))
+        chunk = read(45)
+    write(b' \nend\n')
+
+    return (outfile.getvalue(), len(input))
+
+def uu_decode(input, errors='strict'):
+    assert errors == 'strict'
+    infile = BytesIO(input)
+    outfile = BytesIO()
+    readline = infile.readline
+    write = outfile.write
+
+    # Find start of encoded data
+    while 1:
+        s = readline()
+        if not s:
+            raise ValueError('Missing "begin" line in input data')
+        if s[:5] == b'begin':
+            break
+
+    # Decode
+    while True:
+        s = readline()
+        if not s or s == b'end\n':
+            break
+        try:
+            data = binascii.a2b_uu(s)
+        except binascii.Error as v:
+            # Workaround for broken uuencoders by /Fredrik Lundh
+            nbytes = (((ord(s[0])-32) & 63) * 4 + 5) / 3
+            data = binascii.a2b_uu(s[:nbytes])
+            #sys.stderr.write("Warning: %s\n" % str(v))
+        write(data)
+    if not s:
+        raise ValueError('Truncated input data')
+
+    return (outfile.getvalue(), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return uu_encode(input, errors)
+
+    def decode(self, input, errors='strict'):
+        return uu_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        return uu_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        return uu_decode(input, self.errors)[0]
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='uu',
+        encode=uu_encode,
+        decode=uu_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Added: python/branches/py3k/Lib/encodings/zlib_codec.py
==============================================================================
--- (empty file)
+++ python/branches/py3k/Lib/encodings/zlib_codec.py	Thu Dec  2 19:06:51 2010
@@ -0,0 +1,77 @@
+"""Python 'zlib_codec' Codec - zlib compression encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Written by Marc-Andre Lemburg (mal at lemburg.com).
+"""
+
+import codecs
+import zlib # this codec needs the optional zlib module !
+
+### Codec APIs
+
+def zlib_encode(input, errors='strict'):
+    assert errors == 'strict'
+    return (zlib.compress(input), len(input))
+
+def zlib_decode(input, errors='strict'):
+    assert errors == 'strict'
+    return (zlib.decompress(input), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return zlib_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return zlib_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def __init__(self, errors='strict'):
+        assert errors == 'strict'
+        self.errors = errors
+        self.compressobj = zlib.compressobj()
+
+    def encode(self, input, final=False):
+        if final:
+            c = self.compressobj.compress(input)
+            return c + self.compressobj.flush()
+        else:
+            return self.compressobj.compress(input)
+
+    def reset(self):
+        self.compressobj = zlib.compressobj()
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def __init__(self, errors='strict'):
+        assert errors == 'strict'
+        self.errors = errors
+        self.decompressobj = zlib.decompressobj()
+
+    def decode(self, input, final=False):
+        if final:
+            c = self.decompressobj.decompress(input)
+            return c + self.decompressobj.flush()
+        else:
+            return self.decompressobj.decompress(input)
+
+    def reset(self):
+        self.decompressobj = zlib.decompressobj()
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='zlib',
+        encode=zlib_encode,
+        decode=zlib_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/branches/py3k/Lib/test/test_bytes.py
==============================================================================
--- python/branches/py3k/Lib/test/test_bytes.py	(original)
+++ python/branches/py3k/Lib/test/test_bytes.py	Thu Dec  2 19:06:51 2010
@@ -207,6 +207,11 @@
         self.assertEqual(b.decode(errors="ignore", encoding="utf8"),
                          "Hello world\n")
 
+    def test_transform(self):
+        b1 = self.type2test(range(256))
+        b2 = b1.transform("base64").untransform("base64")
+        self.assertEqual(b2, b1)
+
     def test_from_int(self):
         b = self.type2test(0)
         self.assertEqual(b, self.type2test())

Modified: python/branches/py3k/Lib/test/test_codecs.py
==============================================================================
--- python/branches/py3k/Lib/test/test_codecs.py	(original)
+++ python/branches/py3k/Lib/test/test_codecs.py	Thu Dec  2 19:06:51 2010
@@ -1659,6 +1659,67 @@
                 self.assertEqual(f.read(), data * 2)
 
 
+bytes_transform_encodings = [
+    "base64_codec",
+    "uu_codec",
+    "quopri_codec",
+    "hex_codec",
+]
+try:
+    import zlib
+except ImportError:
+    pass
+else:
+    bytes_transform_encodings.append("zlib_codec")
+try:
+    import bz2
+except ImportError:
+    pass
+else:
+    bytes_transform_encodings.append("bz2_codec")
+
+class TransformCodecTest(unittest.TestCase):
+    def test_basics(self):
+        binput = bytes(range(256))
+        ainput = bytearray(binput)
+        for encoding in bytes_transform_encodings:
+            # generic codecs interface
+            (o, size) = codecs.getencoder(encoding)(binput)
+            self.assertEqual(size, len(binput))
+            (i, size) = codecs.getdecoder(encoding)(o)
+            self.assertEqual(size, len(o))
+            self.assertEqual(i, binput)
+
+            # transform interface
+            boutput = binput.transform(encoding)
+            aoutput = ainput.transform(encoding)
+            self.assertEqual(boutput, aoutput)
+            self.assertIsInstance(boutput, bytes)
+            self.assertIsInstance(aoutput, bytearray)
+            bback = boutput.untransform(encoding)
+            aback = aoutput.untransform(encoding)
+            self.assertEqual(bback, aback)
+            self.assertEqual(bback, binput)
+            self.assertIsInstance(bback, bytes)
+            self.assertIsInstance(aback, bytearray)
+
+    def test_read(self):
+        for encoding in bytes_transform_encodings:
+            sin = b"\x80".transform(encoding)
+            reader = codecs.getreader(encoding)(io.BytesIO(sin))
+            sout = reader.read()
+            self.assertEqual(sout, b"\x80")
+
+    def test_readline(self):
+        for encoding in bytes_transform_encodings:
+            if encoding in ['uu_codec', 'zlib_codec']:
+                continue
+            sin = b"\x80".transform(encoding)
+            reader = codecs.getreader(encoding)(io.BytesIO(sin))
+            sout = reader.readline()
+            self.assertEqual(sout, b"\x80")
+
+
 def test_main():
     support.run_unittest(
         UTF32Test,
@@ -1686,6 +1747,7 @@
         TypesTest,
         SurrogateEscapeTest,
         BomTest,
+        TransformCodecTest,
     )
 
 

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Thu Dec  2 19:06:51 2010
@@ -10,6 +10,10 @@
 Core and Builtins
 -----------------
 
+- Issue #7475: Added transform() and untransform() methods to both bytes
+  and string types.  They can be used to access those codecs providing
+  bytes-to-bytes and string-to-string mappings.
+
 - Issue #8685: Speed up set difference ``a - b`` when source set ``a`` is
   much larger than operand ``b``.  Patch by Andrew Bennetts.
 

Modified: python/branches/py3k/Objects/bytearrayobject.c
==============================================================================
--- python/branches/py3k/Objects/bytearrayobject.c	(original)
+++ python/branches/py3k/Objects/bytearrayobject.c	Thu Dec  2 19:06:51 2010
@@ -2488,6 +2488,75 @@
     return PyUnicode_FromEncodedObject(self, encoding, errors);
 }
 
+PyDoc_STRVAR(transform__doc__,
+"B.transform(encoding, errors='strict') -> bytearray\n\
+\n\
+Transform B using the codec registered for encoding. errors may be given\n\
+to set a different error handling scheme.");
+
+static PyObject *
+bytearray_transform(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    const char *encoding = NULL;
+    const char *errors = NULL;
+    static char *kwlist[] = {"encoding", "errors", 0};
+    PyObject *v, *w;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:transform",
+                                     kwlist, &encoding, &errors))
+        return NULL;
+
+    v = PyCodec_Encode(self, encoding, errors);
+    if (v == NULL)
+        return NULL;
+    if (!PyBytes_Check(v)) {
+        PyErr_Format(PyExc_TypeError,
+                     "encoder did not return a bytes object (type=%.400s)",
+                     Py_TYPE(v)->tp_name);
+        Py_DECREF(v);
+        return NULL;
+    }
+    w = PyByteArray_FromStringAndSize(PyBytes_AS_STRING(v),
+                                      PyBytes_GET_SIZE(v));
+    Py_DECREF(v);
+    return w;
+}
+
+
+PyDoc_STRVAR(untransform__doc__,
+"B.untransform(encoding, errors='strict') -> bytearray\n\
+\n\
+Reverse-transform B using the codec registered for encoding. errors may\n\
+be given to set a different error handling scheme.");
+
+static PyObject *
+bytearray_untransform(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    const char *encoding = NULL;
+    const char *errors = NULL;
+    static char *kwlist[] = {"encoding", "errors", 0};
+    PyObject *v, *w;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:untransform",
+                                     kwlist, &encoding, &errors))
+        return NULL;
+
+    v = PyCodec_Decode(self, encoding, errors);
+    if (v == NULL)
+        return NULL;
+    if (!PyBytes_Check(v)) {
+        PyErr_Format(PyExc_TypeError,
+                     "decoder did not return a bytes object (type=%.400s)",
+                     Py_TYPE(v)->tp_name);
+        Py_DECREF(v);
+        return NULL;
+    }
+    w = PyByteArray_FromStringAndSize(PyBytes_AS_STRING(v),
+                                      PyBytes_GET_SIZE(v));
+    Py_DECREF(v);
+    return w;
+}
+
 PyDoc_STRVAR(alloc_doc,
 "B.__alloc__() -> int\n\
 \n\
@@ -2782,8 +2851,12 @@
     {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
      _Py_swapcase__doc__},
     {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
+    {"transform", (PyCFunction)bytearray_transform, METH_VARARGS | METH_KEYWORDS,
+     transform__doc__},
     {"translate", (PyCFunction)bytearray_translate, METH_VARARGS,
      translate__doc__},
+    {"untransform", (PyCFunction)bytearray_untransform, METH_VARARGS | METH_KEYWORDS,
+     untransform__doc__},
     {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
     {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
     {NULL}

Modified: python/branches/py3k/Objects/bytesobject.c
==============================================================================
--- python/branches/py3k/Objects/bytesobject.c	(original)
+++ python/branches/py3k/Objects/bytesobject.c	Thu Dec  2 19:06:51 2010
@@ -2312,6 +2312,68 @@
     return PyUnicode_FromEncodedObject(self, encoding, errors);
 }
 
+PyDoc_STRVAR(transform__doc__,
+"B.transform(encoding, errors='strict') -> bytes\n\
+\n\
+Transform B using the codec registered for encoding. errors may be given\n\
+to set a different error handling scheme.");
+
+static PyObject *
+bytes_transform(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    const char *encoding = NULL;
+    const char *errors = NULL;
+    static char *kwlist[] = {"encoding", "errors", 0};
+    PyObject *v;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:transform",
+                                     kwlist, &encoding, &errors))
+        return NULL;
+
+    v = PyCodec_Encode(self, encoding, errors);
+    if (v == NULL)
+        return NULL;
+    if (!PyBytes_Check(v)) {
+        PyErr_Format(PyExc_TypeError,
+                     "encoder did not return a bytes object (type=%.400s)",
+                     Py_TYPE(v)->tp_name);
+        Py_DECREF(v);
+        return NULL;
+    }
+    return v;
+}
+
+
+PyDoc_STRVAR(untransform__doc__,
+"B.untransform(encoding, errors='strict') -> bytes\n\
+\n\
+Reverse-transform B using the codec registered for encoding. errors may\n\
+be given to set a different error handling scheme.");
+
+static PyObject *
+bytes_untransform(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    const char *encoding = NULL;
+    const char *errors = NULL;
+    static char *kwlist[] = {"encoding", "errors", 0};
+    PyObject *v;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:untransform",
+                                     kwlist, &encoding, &errors))
+        return NULL;
+
+    v = PyCodec_Decode(self, encoding, errors);
+    if (v == NULL)
+        return NULL;
+    if (!PyBytes_Check(v)) {
+        PyErr_Format(PyExc_TypeError,
+                     "decoder did not return a bytes object (type=%.400s)",
+                     Py_TYPE(v)->tp_name);
+        Py_DECREF(v);
+        return NULL;
+    }
+    return v;
+}
 
 PyDoc_STRVAR(splitlines__doc__,
 "B.splitlines([keepends]) -> list of lines\n\
@@ -2475,8 +2537,10 @@
     {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
      _Py_swapcase__doc__},
     {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
+    {"transform", (PyCFunction)bytes_transform, METH_VARARGS | METH_KEYWORDS, transform__doc__},
     {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
      translate__doc__},
+    {"untransform", (PyCFunction)bytes_untransform, METH_VARARGS | METH_KEYWORDS, untransform__doc__},
     {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
     {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
     {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,

Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Thu Dec  2 19:06:51 2010
@@ -7432,6 +7432,7 @@
     v = PyUnicode_AsEncodedString((PyObject *)self, encoding, errors);
     if (v == NULL)
         goto onError;
+    /* XXX this check is redundant */
     if (!PyBytes_Check(v)) {
         PyErr_Format(PyExc_TypeError,
                      "encoder did not return a bytes object "
@@ -7446,6 +7447,44 @@
     return NULL;
 }
 
+PyDoc_STRVAR(transform__doc__,
+             "S.transform(encoding, errors='strict') -> str\n\
+\n\
+Transform S using the codec registered for encoding. errors may be given\n\
+to set a different error handling scheme.");
+
+static PyObject *
+unicode_transform(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
+{
+    static char *kwlist[] = {"encoding", "errors", 0};
+    char *encoding = NULL;
+    char *errors = NULL;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:transform",
+                                     kwlist, &encoding, &errors))
+        return NULL;
+    return PyUnicode_AsEncodedUnicode((PyObject *)self, encoding, errors);
+}
+
+PyDoc_STRVAR(untransform__doc__,
+             "S.untransform(encoding, errors='strict') -> str\n\
+\n\
+Reverse-transform S using the codec registered for encoding. errors may be\n\
+given to set a different error handling scheme.");
+
+static PyObject *
+unicode_untransform(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
+{
+    static char *kwlist[] = {"encoding", "errors", 0};
+    char *encoding = NULL;
+    char *errors = NULL;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:untransform",
+                                     kwlist, &encoding, &errors))
+        return NULL;
+    return PyUnicode_AsDecodedUnicode((PyObject *)self, encoding, errors);
+}
+
 PyDoc_STRVAR(expandtabs__doc__,
              "S.expandtabs([tabsize]) -> str\n\
 \n\
@@ -9091,7 +9130,8 @@
     /* Order is according to common usage: often used methods should
        appear first, since lookup is done sequentially. */
 
-    {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
+    {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS,
+     encode__doc__},
     {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
     {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
     {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
@@ -9136,6 +9176,10 @@
     {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
     {"maketrans", (PyCFunction) unicode_maketrans,
      METH_VARARGS | METH_STATIC, maketrans__doc__},
+    {"transform", (PyCFunction) unicode_transform, METH_VARARGS | METH_KEYWORDS,
+     transform__doc__},
+    {"untransform", (PyCFunction) unicode_untransform, METH_VARARGS | METH_KEYWORDS,
+     untransform__doc__},
     {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__},
 #if 0
     {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},


More information about the Python-checkins mailing list