[Python-checkins] r54078 - sandbox/trunk/pep3101/README.txt sandbox/trunk/pep3101/StringFormat.py

Fri Mar 2 12:42:10 CET 2007

Author: eric.smith
Date: Fri Mar  2 12:42:08 2007
New Revision: 54078

Added:
   sandbox/trunk/pep3101/StringFormat.py   (contents, props changed)
Modified:
   sandbox/trunk/pep3101/README.txt
Log:
Added StringFormat.py, which is Talin's original pure Python implementation.  Only for historical interest.

Modified: sandbox/trunk/pep3101/README.txt
==============================================================================

--- sandbox/trunk/pep3101/README.txt	(original)
+++ sandbox/trunk/pep3101/README.txt	Fri Mar  2 12:42:08 2007
@@ -42,6 +42,9 @@
       Python versions.
     - setup.py  -- Use "build" option to make the extension module
     - test_simpleformat.py  -- initial unittests
+    - StringFormat.py -- Talin's original implementation in Python.
+      This is only for historical interest: it doesn't exactly match
+      the PEP or C implementation.
 
 Todo:
 

Added: sandbox/trunk/pep3101/StringFormat.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/pep3101/StringFormat.py	Fri Mar  2 12:42:08 2007
@@ -0,0 +1,442 @@
+# Python string formatting
+
+from math import log
+try:
+    import locale
+except:
+    locale = None
+try:
+    import fpformat
+except:
+    fpformat = None
+
+# Except for errors in the format string.
+class FormatError(StandardError):
+    pass
+
+strict_format_errors = False
+
+class ConversionTypes:
+    Binary      = 'b'   # Base-2
+    Character   = 'c'   # Print as character
+    Decimal     = 'd'   # Decimal integer
+    Exponent    = 'e'   # Exponential notation
+    ExponentUC  = 'E'   # Exponential notation with upper case 'E'
+    Fixed       = 'f'   # Fixed-point
+    FixedUC     = 'F'   # Fixed-point with upper case
+    General     = 'g'   # General number notation
+    GeneralUC   = 'G'   # General number notation with upper case 'E'
+    Number      = 'n'   # Number in locale-specific format
+    Octal       = 'o'   # Octal
+    Repr        = 'r'   # In repr() format
+    String      = 's'   # Convert using str()
+    Hex         = 'x'   # Base 16
+    HexUC       = 'X'   # Base 16 upper case
+    Percentage  = '%'   # As percentage
+
+ConversionTypes.All = set(ConversionTypes.__dict__.values())
+
+# Parse the standard conversion spec. Note that I don't use
+# regex here because I'm trying to eliminate external dependencies
+# as much as possible.
+def parse_std_conversion(spec):
+    length = None
+    precision = None
+    ctype = None
+    align = None
+    fill_char = None
+    sign = None
+
+    index = 0
+    spec_len = len(spec)
+
+    # If the second char is an alignment token,
+    # then parse the fill char
+    if spec_len >=2 and spec[ 1 ] in '<>=^':
+        fill_char = spec[ 0 ]
+        align = spec[ 1 ]
+        index = 2
+    # Otherwise, parse the alignment token
+    elif spec_len >= 1 and spec[ 0 ] in '<>=^':
+        align = spec[ 0 ]
+        index = 1
+
+    # Parse the various sign options
+    if index < spec_len and spec[ index ] in ' +-(':
+        sign = spec_len[ index ]
+        index += 1
+        if index < spec_len and spec[ index ] == ')':
+            index += 1
+
+    # The special case for 0-padding (backwards compat)
+    if fill_char == None and index < spec_len and spec[ index ] == '0':
+        fill_char = '0'
+        if align == None:
+            align = '='
+        index += 1
+
+    # Parse field width
+    saveindex = index
+    while index < spec_len and spec[index].isdigit():
+        index += 1
+
+    if index > saveindex:
+        length = int(spec[saveindex : index])
+
+    # Parse field precision
+    if index < spec_len and spec[index] == '.':
+        index += 1
+        saveindex = index
+        while index < spec_len and spec[index].isdigit():
+            index += 1
+        if index > saveindex:
+            precision = int(spec[saveindex:index])
+
+    # Finally, parse the type field
+    remaining = spec_len - index
+    if remaining > 1:
+        return None     # Invalid conversion spec
+
+    if remaining == 1:
+        ctype = spec[index]
+        if ctype not in ConversionTypes.All:
+            return None
+
+    return (fill_char, align, sign, length, precision, ctype)
+
+# Convert to int, and split into sign part and magnitude part
+def to_int(val):
+    val = int(val)
+    if val < 0: return '-', -val
+    return '+', val
+
+# Convert to float, and split into sign part and magnitude part
+def to_float(val):
+    val = float(val)
+    if val < 0: return '-', -val
+    return '+', val
+
+# Pure python implementation of the C printf 'e' format specificer
+def sci(val,precision,letter='e'):
+    # Split into sign and magnitude (not really needed for formatting
+    # since we already did this part. Mainly here in case 'sci'
+    # ever gets split out as an independent function.)
+    sign = ''
+    if val < 0:
+        sign = '-'
+        val = -val
+
+    # Calculate the exponent
+    exp = int(floor(log(val,10)))
+
+    # Normalize the value
+    val *= 10**-exp
+
+    # If the value is exactly an integer, then we don't want to
+    # print *any* decimal digits, regardless of precision
+    if val == floor(val):
+        val = int(val)
+    else:
+        # Otherwise, round it based on precision
+        val = round(val,precision)
+        # The rounding operation might have increased the
+        # number to where it is no longer normalized, if so
+        # then adjust the exponent.
+        if val >= 10.0:
+            exp += 1
+            val = val * 0.1
+
+    # Convert the exponent to a string using only str().
+    # The existing C printf always prints at least 2 digits.
+    esign = '+'
+    if exp < 0:
+        exp = -exp
+        esign = '-'
+    if exp < 10: exp = '0' + str(exp)
+    else: exp = str(exp)
+
+    # The final result
+    return sign + str(val) + letter + esign + exp
+
+# The standard formatter
+def format_builtin_type(value, spec):
+
+    # Parse the conversion spec
+    conversion = parse_std_conversion(spec)
+    if conversion is None:
+        raise FormatError("Invalid conversion spec: " + spec)
+
+    # Unpack the conversion spec
+    fill_char, align, sign_char, length, precision, ctype = conversion
+
+    # Choose a default conversion type
+    if ctype == None:
+        if isinstance(value, int) or isinstance(value, long):
+            ctype = ConversionTypes.Decimal
+        elif isinstance(value, float):
+            ctype = ConversionTypes.General
+        else:
+            ctype = ConversionTypes.String
+
+    sign = None
+
+    # Conversion types that resolve to other types
+    if ctype == ConversionTypes.Percentage:
+        ctype = ConversionTypes.Fixed
+        value = float(value) * 100.0
+
+    if ctype == ConversionTypes.Binary:
+        result = ''
+        sign, value = to_int(value)
+        while value:
+            if value & 1: result = '1' + result
+            else: result = '0' + result
+            value >>= 1
+        if len(result) == 0:
+            result = '0'
+    elif ctype == ConversionTypes.Octal:
+        sign, value = to_int(value)
+        result = oct(value)
+    elif ctype == ConversionTypes.Hex:
+        sign, value = to_int(value)
+        result = hex(value)
+    elif ctype == ConversionTypes.HexUC:
+        sign, value = to_int(value)
+        result = hex(value).upper()
+    elif ctype == ConversionTypes.Character:
+        result = chr(int( value) )
+    elif ctype == ConversionTypes.Decimal:
+        sign, value = to_int(value)
+        result = str(value)
+    elif ctype == ConversionTypes.Fixed or ctype == ConversionTypes.FixedUC:
+        sign, value = to_float(value)
+        if fpformat and precision is not None:
+            result = fpformat.fix(value, precision)
+        else:
+            result = str(value)
+    elif ctype == ConversionTypes.General or ctype == ConversionTypes.GeneralUC:
+        #Same as "e" if exponent is less than -4 or greater than precision, "f" otherwise.
+        sign, value = to_float(value)
+        if fpformat and precision is not None:
+            if value < 0.0001 or value > 10**precision:
+                result = fpformat.sci(value, precision)
+            else:
+                result = fpformat.fix(value, precision)
+            if ctype == ConversionTypes.GeneralUC:
+                result = result.upper()
+        else:
+            result = str(value)
+    elif ctype == ConversionTypes.Exponent or ctype == ConversionTypes.ExponentUC:
+        sign, value = to_float(value)
+        if precision is None: precision = 5 # Duh, I dunno
+        result = sci(value, precision, ctype)
+    elif ctype == ConversionTypes.Number:
+        sign, value = to_float(value)
+        if locale:
+            # For some reason, this is not working the way I would
+            # expect
+            result = locale.format("%f", float( value) )
+        else:
+            result = str(value)
+    elif ctype == ConversionTypes.String:
+        result = str(value)
+    elif ctype == ConversionTypes.Repr:
+        result = repr(value)
+
+    # Handle the sign logic
+    prefix = ''
+    suffix = ''
+    if sign == '-':
+        if sign_char == '(': prefix, suffix = '(', ')'
+        else: prefix = '-'
+    elif sign == '+':
+        if sign_char == '+': prefix = '+'
+        elif sign_char == ' ': prefix = ' '
+
+    # Handle the padding logic
+    if length is not None:
+        padding = length - len(result) - len(prefix) - len(suffix)
+        if padding > 0:
+            if align == '>' or align == '^':
+                return fill_char * padding + prefix + result + suffix
+            elif align == '='
+                return prefix + fill_char * padding + result + suffix
+            else:
+                return prefix + result + suffix + fill_char * padding
+
+    return prefix + result + suffix
+
+def cformat(template, format_hook, args, kwargs):
+    # Using array types since we're going to be growing
+    # a lot.
+    from array import array
+    array_type = 'c'
+
+    # Use unicode array if the original string is unicode.
+    if isinstance(template, unicode): array_type = 'u'
+    buffer = array(array_type)
+
+    # Track which arguments actuallly got used
+    unused_args = set(kwargs.keys())
+    unused_args.update(range(0, len(args)))
+
+    # Inner function to format a field from a value and
+    # conversion spec. Most details missing.
+    def format_field(value, cspec):
+
+        # See if there's a hook
+        if format_hook:
+            v = format_hook(value, cspec)
+            if v is not None:
+                return str(v)
+
+        # See if there's a __format__ method
+        elif hasattr(value, '__format__'):
+            return value.__format__(cspec)
+
+        # Default formatting
+        return format_builtin_type(value, cspec)
+
+    # Parse a field specification. Returns True if it was a valid
+    # field, False if it was merely an escaped brace. (We do it
+    # this way to avoid lookahead.)
+    def parse_field(buffer):
+
+        # A separate array for the field spec.
+        fieldspec = array(array_type)
+
+        # Consume from the template iterator.
+        for index, ch in template_iter:
+            # A sub-field. We just interpret it like a normal field,
+            # and append to the fieldspec.
+            if ch == '{':
+                # If the very first character is an open brace, then
+                # assume its an escaped (doubled) brace.
+                if len(fieldspec) == 0:
+                    return False
+
+                # Here's where we catch that doubled brace
+                if not parse_field(fieldspec):
+                    buffer.extend('{')
+                    return True
+
+            # End of field. Now interpret it.
+            elif ch == '}':
+                # Convert the array to string or uni
+                if array_type == 'u':
+                  fieldspec = fieldspec.tosunicode()
+                else:
+                  fieldspec = fieldspec.tostring()
+
+                # Check for conversion spec
+                name = fieldspec
+                conversion = ''
+                parts = fieldspec.split(':', 1)
+                if len(parts) > 1:
+                    name, conversion = parts
+
+                try:
+                    first_time = True
+                    # Split the field name into subfields
+                    for namepart in name.split('.'):
+                        # Split that part by open bracket chars
+                        keyparts = namepart.split('[')
+                        # The first part is just a bare name
+                        key = keyparts[0]
+
+                        # Empty strings are not allowed as field names
+                        if key == '':
+                            raise FormatError("empty field name at char " + str(index))
+
+                        # The first name in the sequence is used to index
+                        # the args/kwargs arrays. Subsequent names are used
+                        # on the result of the previous operation.
+                        if first_time:
+                            first_time = False
+
+                            # Attempt to coerce key to integer
+                            try:
+                                key = int(key)
+                                value = args[key]
+                            except ValueError:
+                                # Keyword args are strings, not uni (so far)
+                                value = kwargs[key]
+
+                            # If we got no exception, then remove from
+                            # unused args
+                            unused_args.remove(key)
+                        else:
+                            # This is not the first time, so get
+                            # an attribute
+                            value = getattr(value, key)
+
+                        # Now process any bracket expressions which followed
+                        # the first part.
+                        for key in keyparts[1:]:
+                            endbracket = key.find(']')
+                            if endbracket < 0 or endbracket != len(key) - 1:
+                                raise FormatError("Invalid field syntax at position " + str(index))
+
+                            # Strip off the closing bracket and try to coerce to int
+                            key = key[:-1]
+                            try:
+                                key = int(key)
+                            except ValueError:
+                                pass
+
+                            # Get the attribute
+                            value = value[key]
+
+                except (AttributeError,KeyError,IndexError), e:
+                    if strict_format_errors: raise
+                    buffer.extend('?' + e.__class__.__name__ + '?')
+                    return True
+
+                buffer.extend(format_field(value, conversion))
+                return True
+            else:
+                fieldspec.append(ch)
+
+        raise FormatError("unmatched open brace at position " + str(index))
+
+    # Construct an iterator from the template
+    template_iter = enumerate(template)
+    prev = None
+    for index, ch in template_iter:
+        if prev == '}':
+            if ch != '}':
+                raise FormatError("unmatched close brace")
+            else:
+                buffer.append('}')
+                prev = None
+                continue
+
+        if ch == '{':
+            # It's a field
+            if not parse_field(buffer):
+                buffer.extend('{')
+        elif ch != '}':
+            buffer.append(ch)
+        prev = ch
+
+    if prev == '}':
+        raise FormatError("unmatched close brace")
+
+    # Complain about unused args
+    if unused_args and strict_format_errors:
+        raise FormatError(
+            "Unused arguments: "
+            + ",".join(str(x) for x in unused_args))
+
+    # Convert the array to its proper type
+    if isinstance(template, unicode):
+        return buffer.tounicode()
+    else:
+        return buffer.tostring()
+
+def format(template, *args, **kwargs):
+    return cformat(template, None, args, kwargs)
+
+if __name__ == '__main__':
+    print format("This is a test of }} {0:x} {x} {y[3]} {2[2]} {1:5n}{{",
+        1000, 200000, 'grag', x='hex', y=[1,2,3]);