[Jython-checkins] jython (merge default -> default): Merge of formatting work to trunk

jeff.allen jython-checkins at python.org
Sun Jun 8 14:13:11 CEST 2014


http://hg.python.org/jython/rev/6cee6fef06f0
changeset:   7287:6cee6fef06f0
parent:      7278:9cd9ab75eade
parent:      7286:234d1492dde4
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Sun Jun 08 10:03:49 2014 +0100
summary:
  Merge of formatting work to trunk

files:
  Lib/test/test_builtin.py                                    |   13 +-
  Lib/test/test_format.py                                     |  473 +++--
  Lib/test/test_format_jy.py                                  |   69 +-
  Lib/test/test_types.py                                      |  122 +-
  Lib/test/test_unicode.py                                    |    7 +-
  src/org/python/core/PyComplex.java                          |  112 +-
  src/org/python/core/PyFloat.java                            |   77 +-
  src/org/python/core/PyInteger.java                          |  349 +---
  src/org/python/core/PyLong.java                             |  178 +-
  src/org/python/core/PyString.java                           |  811 +++------
  src/org/python/core/__builtin__.java                        |   20 +-
  src/org/python/core/stringlib/FloatFormatter.java           |   69 +-
  src/org/python/core/stringlib/IntegerFormatter.java         |  779 +++++++++
  src/org/python/core/stringlib/InternalFormat.java           |  360 +++-
  src/org/python/core/stringlib/InternalFormatSpec.java       |   88 -
  src/org/python/core/stringlib/InternalFormatSpecParser.java |  118 -
  src/org/python/core/stringlib/TextFormatter.java            |  119 +
  tests/java/org/python/core/StringFormatTest.java            |  264 ++-
  18 files changed, 2424 insertions(+), 1604 deletions(-)


diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -361,8 +361,7 @@
         self.assertEqual(eval('a', g, m), 12)
         self.assertRaises(NameError, eval, 'b', g, m)
         self.assertEqual(eval('dir()', g, m), list('xyz'))
-        if not is_jython: #FIXME #1861
-            self.assertEqual(eval('globals()', g, m), g)
+        self.assertEqual(eval('globals()', g, m), g)
         self.assertEqual(eval('locals()', g, m), m)
 
         # Jython allows arbitrary mappings for globals
@@ -386,8 +385,7 @@
         self.assertEqual(eval('a', g, d), 12)
         self.assertRaises(NameError, eval, 'b', g, d)
         self.assertEqual(eval('dir()', g, d), list('xyz'))
-        if not is_jython: #FIXME #1861
-            self.assertEqual(eval('globals()', g, d), g)
+        self.assertEqual(eval('globals()', g, d), g)
         self.assertEqual(eval('locals()', g, d), d)
 
         # Verify locals stores (used by list comps)
@@ -1320,7 +1318,6 @@
         self.assertRaises(TypeError, round, t)
         self.assertRaises(TypeError, round, t, 0)
 
-    @unittest.skipIf(is_jython, "FIXME #1861: not working in Jython")
     def test_round_large(self):
         # Issue #1869: integral floats should remain unchanged
         self.assertEqual(round(5e15-1), 5e15-1)
@@ -1387,7 +1384,6 @@
         b = 2
         return vars()
 
-    @unittest.skipIf(is_jython, "FIXME #1861: not working in Jython")
     def test_vars(self):
         self.assertEqual(set(vars()), set(dir()))
         import sys
@@ -1491,9 +1487,8 @@
             self.assertEqual(format(DerivedFromSimple2(10), 'abcdef'),
                              '10abcdef')
 
-        if not is_jython: #FIXME #1861 check again when __format__ works better.
-            class_test(*classes_new())
-            class_test(*classes_classic())
+        class_test(*classes_new())
+        class_test(*classes_classic())
 
         def empty_format_spec(value):
             # test that:
diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py
--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -1,14 +1,17 @@
-from test.test_support import verbose, have_unicode, TestFailed, is_jython
 import sys
+from test.test_support import verbose, have_unicode, TestFailed
+from test.test_support import is_jython
+import test.test_support as test_support
+import unittest
+
+maxsize = test_support.MAX_Py_ssize_t
 
 # test string formatting operator (I am not sure if this is being tested
 # elsewhere but, surely, some of the given cases are *not* tested because
 # they crash python)
 # test on unicode strings as well
 
-overflowok = 1
-
-def testformat(formatstr, args, output=None):
+def testformat(formatstr, args, output=None, limit=None, overflowok=False):
     if verbose:
         if output:
             print "%s %% %s =? %s ..." %\
@@ -23,231 +26,289 @@
         if verbose:
             print 'overflow (this is fine)'
     else:
-        if output and result != output:
+        if output and limit is None and result != output:
             if verbose:
                 print 'no'
-            print "%s %% %s == %s != %s" %\
-                (repr(formatstr), repr(args), repr(result), repr(output))
+            raise AssertionError("%r %% %r == %r != %r" %
+                                (formatstr, args, result, output))
+        # when 'limit' is specified, it determines how many characters
+        # must match exactly; lengths must always match.
+        # ex: limit=5, '12345678' matches '12345___'
+        # (mainly for floating point format tests for which an exact match
+        # can't be guaranteed due to rounding and representation errors)
+        elif output and limit is not None and (
+                len(result)!=len(output) or result[:limit]!=output[:limit]):
+            if verbose:
+                print 'no'
+            print "%s %% %s == %s != %s" % \
+                  (repr(formatstr), repr(args), repr(result), repr(output))
         else:
             if verbose:
                 print 'yes'
 
-def testboth(formatstr, *args):
-    testformat(formatstr, *args)
+
+def testboth(formatstr, *args, **kwargs):
+    testformat(formatstr, *args, **kwargs)
     if have_unicode:
-        testformat(unicode(formatstr), *args)
+        testformat(unicode(formatstr), *args, **kwargs)
 
 
-testboth("%.1d", (1,), "1")
-testboth("%.*d", (sys.maxint,1))  # expect overflow
-testboth("%.100d", (1,), '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001')
-testboth("%#.117x", (1,), '0x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001')
-testboth("%#.118x", (1,), '0x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001')
+class FormatTest(unittest.TestCase):
+    def test_format(self):
+        testboth("%.1d", (1,), "1")
+        testboth("%.*d", (sys.maxint,1), overflowok=True)  # expect overflow
+        testboth("%.100d", (1,), '00000000000000000000000000000000000000'
+                 '000000000000000000000000000000000000000000000000000000'
+                 '00000001', overflowok=True)
+        testboth("%#.117x", (1,), '0x00000000000000000000000000000000000'
+                 '000000000000000000000000000000000000000000000000000000'
+                 '0000000000000000000000000001',
+                 overflowok=True)
+        testboth("%#.118x", (1,), '0x00000000000000000000000000000000000'
+                 '000000000000000000000000000000000000000000000000000000'
+                 '00000000000000000000000000001',
+                 overflowok=True)
 
-testboth("%f", (1.0,), "1.000000")
-# these are trying to test the limits of the internal magic-number-length
-# formatting buffer, if that number changes then these tests are less
-# effective
-testboth("%#.*g", (109, -1.e+49/3.))
-testboth("%#.*g", (110, -1.e+49/3.))
-testboth("%#.*g", (110, -1.e+100/3.))
+        testboth("%f", (1.0,), "1.000000")
+        # these are trying to test the limits of the internal magic-number-length
+        # formatting buffer, if that number changes then these tests are less
+        # effective
+        testboth("%#.*g", (109, -1.e+49/3.))
+        testboth("%#.*g", (110, -1.e+49/3.))
+        testboth("%#.*g", (110, -1.e+100/3.))
 
-# test some ridiculously large precision, expect overflow
-testboth('%12.*f', (123456, 1.0))
+        # test some ridiculously large precision, expect overflow
+        # ... Jython remains consistent with the original comment.
+        testboth('%12.*f', (123456, 1.0), overflowok=is_jython)
 
-# Formatting of long integers. Overflow is not ok
-overflowok = 0
-testboth("%x", 10L, "a")
-testboth("%x", 100000000000L, "174876e800")
-testboth("%o", 10L, "12")
-testboth("%o", 100000000000L, "1351035564000")
-testboth("%d", 10L, "10")
-testboth("%d", 100000000000L, "100000000000")
+        # check for internal overflow validation on length of precision
+        # these tests should no longer cause overflow in Python
+        # 2.7/3.1 and later.
+        testboth("%#.*g", (110, -1.e+100/3.))
+        testboth("%#.*G", (110, -1.e+100/3.))
+        testboth("%#.*f", (110, -1.e+100/3.))
+        testboth("%#.*F", (110, -1.e+100/3.))
 
-big = 123456789012345678901234567890L
-testboth("%d", big, "123456789012345678901234567890")
-testboth("%d", -big, "-123456789012345678901234567890")
-testboth("%5d", -big, "-123456789012345678901234567890")
-testboth("%31d", -big, "-123456789012345678901234567890")
-testboth("%32d", -big, " -123456789012345678901234567890")
-testboth("%-32d", -big, "-123456789012345678901234567890 ")
-testboth("%032d", -big, "-0123456789012345678901234567890")
-testboth("%-032d", -big, "-123456789012345678901234567890 ")
-testboth("%034d", -big, "-000123456789012345678901234567890")
-testboth("%034d", big, "0000123456789012345678901234567890")
-testboth("%0+34d", big, "+000123456789012345678901234567890")
-testboth("%+34d", big, "   +123456789012345678901234567890")
-testboth("%34d", big, "    123456789012345678901234567890")
-testboth("%.2d", big, "123456789012345678901234567890")
-testboth("%.30d", big, "123456789012345678901234567890")
-testboth("%.31d", big, "0123456789012345678901234567890")
-testboth("%32.31d", big, " 0123456789012345678901234567890")
+        # Formatting of long integers. Overflow is not ok
+        testboth("%x", 10L, "a")
+        testboth("%x", 100000000000L, "174876e800")
+        testboth("%o", 10L, "12")
+        testboth("%o", 100000000000L, "1351035564000")
+        testboth("%d", 10L, "10")
+        testboth("%d", 100000000000L, "100000000000")
 
-big = 0x1234567890abcdef12345L  # 21 hex digits
-testboth("%x", big, "1234567890abcdef12345")
-testboth("%x", -big, "-1234567890abcdef12345")
-testboth("%5x", -big, "-1234567890abcdef12345")
-testboth("%22x", -big, "-1234567890abcdef12345")
-testboth("%23x", -big, " -1234567890abcdef12345")
-testboth("%-23x", -big, "-1234567890abcdef12345 ")
-testboth("%023x", -big, "-01234567890abcdef12345")
-testboth("%-023x", -big, "-1234567890abcdef12345 ")
-testboth("%025x", -big, "-0001234567890abcdef12345")
-testboth("%025x", big, "00001234567890abcdef12345")
-testboth("%0+25x", big, "+0001234567890abcdef12345")
-testboth("%+25x", big, "   +1234567890abcdef12345")
-testboth("%25x", big, "    1234567890abcdef12345")
-testboth("%.2x", big, "1234567890abcdef12345")
-testboth("%.21x", big, "1234567890abcdef12345")
-testboth("%.22x", big, "01234567890abcdef12345")
-testboth("%23.22x", big, " 01234567890abcdef12345")
-testboth("%-23.22x", big, "01234567890abcdef12345 ")
-testboth("%X", big, "1234567890ABCDEF12345")
-testboth("%#X", big, "0X1234567890ABCDEF12345")
-testboth("%#x", big, "0x1234567890abcdef12345")
-testboth("%#x", -big, "-0x1234567890abcdef12345")
-testboth("%#.23x", -big, "-0x001234567890abcdef12345")
-testboth("%#+.23x", big, "+0x001234567890abcdef12345")
-testboth("%# .23x", big, " 0x001234567890abcdef12345")
-testboth("%#+.23X", big, "+0X001234567890ABCDEF12345")
-testboth("%#-+.23X", big, "+0X001234567890ABCDEF12345")
-testboth("%#-+26.23X", big, "+0X001234567890ABCDEF12345")
-testboth("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ")
-testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345")
-# next one gets two leading zeroes from precision, and another from the
-# 0 flag and the width
-testboth("%#+027.23X", big, "+0X0001234567890ABCDEF12345")
-# same, except no 0 flag
-testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345")
+        big = 123456789012345678901234567890L
+        testboth("%d", big, "123456789012345678901234567890")
+        testboth("%d", -big, "-123456789012345678901234567890")
+        testboth("%5d", -big, "-123456789012345678901234567890")
+        testboth("%31d", -big, "-123456789012345678901234567890")
+        testboth("%32d", -big, " -123456789012345678901234567890")
+        testboth("%-32d", -big, "-123456789012345678901234567890 ")
+        testboth("%032d", -big, "-0123456789012345678901234567890")
+        testboth("%-032d", -big, "-123456789012345678901234567890 ")
+        testboth("%034d", -big, "-000123456789012345678901234567890")
+        testboth("%034d", big, "0000123456789012345678901234567890")
+        testboth("%0+34d", big, "+000123456789012345678901234567890")
+        testboth("%+34d", big, "   +123456789012345678901234567890")
+        testboth("%34d", big, "    123456789012345678901234567890")
+        testboth("%.2d", big, "123456789012345678901234567890")
+        testboth("%.30d", big, "123456789012345678901234567890")
+        testboth("%.31d", big, "0123456789012345678901234567890")
+        testboth("%32.31d", big, " 0123456789012345678901234567890")
+        testboth("%d", float(big), "123456________________________", 6)
 
-big = 012345670123456701234567012345670L  # 32 octal digits
-testboth("%o", big, "12345670123456701234567012345670")
-testboth("%o", -big, "-12345670123456701234567012345670")
-testboth("%5o", -big, "-12345670123456701234567012345670")
-testboth("%33o", -big, "-12345670123456701234567012345670")
-testboth("%34o", -big, " -12345670123456701234567012345670")
-testboth("%-34o", -big, "-12345670123456701234567012345670 ")
-testboth("%034o", -big, "-012345670123456701234567012345670")
-testboth("%-034o", -big, "-12345670123456701234567012345670 ")
-testboth("%036o", -big, "-00012345670123456701234567012345670")
-testboth("%036o", big, "000012345670123456701234567012345670")
-testboth("%0+36o", big, "+00012345670123456701234567012345670")
-testboth("%+36o", big, "   +12345670123456701234567012345670")
-testboth("%36o", big, "    12345670123456701234567012345670")
-testboth("%.2o", big, "12345670123456701234567012345670")
-testboth("%.32o", big, "12345670123456701234567012345670")
-testboth("%.33o", big, "012345670123456701234567012345670")
-testboth("%34.33o", big, " 012345670123456701234567012345670")
-testboth("%-34.33o", big, "012345670123456701234567012345670 ")
-testboth("%o", big, "12345670123456701234567012345670")
-testboth("%#o", big, "012345670123456701234567012345670")
-testboth("%#o", -big, "-012345670123456701234567012345670")
-testboth("%#.34o", -big, "-0012345670123456701234567012345670")
-testboth("%#+.34o", big, "+0012345670123456701234567012345670")
-testboth("%# .34o", big, " 0012345670123456701234567012345670")
-testboth("%#+.34o", big, "+0012345670123456701234567012345670")
-testboth("%#-+.34o", big, "+0012345670123456701234567012345670")
-testboth("%#-+37.34o", big, "+0012345670123456701234567012345670  ")
-testboth("%#+37.34o", big, "  +0012345670123456701234567012345670")
-# next one gets one leading zero from precision
-testboth("%.33o", big, "012345670123456701234567012345670")
-# base marker shouldn't change that, since "0" is redundant
-testboth("%#.33o", big, "012345670123456701234567012345670")
-# but reduce precision, and base marker should add a zero
-testboth("%#.32o", big, "012345670123456701234567012345670")
-# one leading zero from precision, and another from "0" flag & width
-testboth("%034.33o", big, "0012345670123456701234567012345670")
-# base marker shouldn't change that
-testboth("%0#34.33o", big, "0012345670123456701234567012345670")
+        big = 0x1234567890abcdef12345L  # 21 hex digits
+        testboth("%x", big, "1234567890abcdef12345")
+        testboth("%x", -big, "-1234567890abcdef12345")
+        testboth("%5x", -big, "-1234567890abcdef12345")
+        testboth("%22x", -big, "-1234567890abcdef12345")
+        testboth("%23x", -big, " -1234567890abcdef12345")
+        testboth("%-23x", -big, "-1234567890abcdef12345 ")
+        testboth("%023x", -big, "-01234567890abcdef12345")
+        testboth("%-023x", -big, "-1234567890abcdef12345 ")
+        testboth("%025x", -big, "-0001234567890abcdef12345")
+        testboth("%025x", big, "00001234567890abcdef12345")
+        testboth("%0+25x", big, "+0001234567890abcdef12345")
+        testboth("%+25x", big, "   +1234567890abcdef12345")
+        testboth("%25x", big, "    1234567890abcdef12345")
+        testboth("%.2x", big, "1234567890abcdef12345")
+        testboth("%.21x", big, "1234567890abcdef12345")
+        testboth("%.22x", big, "01234567890abcdef12345")
+        testboth("%23.22x", big, " 01234567890abcdef12345")
+        testboth("%-23.22x", big, "01234567890abcdef12345 ")
+        testboth("%X", big, "1234567890ABCDEF12345")
+        testboth("%#X", big, "0X1234567890ABCDEF12345")
+        testboth("%#x", big, "0x1234567890abcdef12345")
+        testboth("%#x", -big, "-0x1234567890abcdef12345")
+        testboth("%#.23x", -big, "-0x001234567890abcdef12345")
+        testboth("%#+.23x", big, "+0x001234567890abcdef12345")
+        testboth("%# .23x", big, " 0x001234567890abcdef12345")
+        testboth("%#+.23X", big, "+0X001234567890ABCDEF12345")
+        testboth("%#-+.23X", big, "+0X001234567890ABCDEF12345")
+        testboth("%#-+26.23X", big, "+0X001234567890ABCDEF12345")
+        testboth("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ")
+        testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345")
+        # next one gets two leading zeroes from precision, and another from the
+        # 0 flag and the width
+        testboth("%#+027.23X", big, "+0X0001234567890ABCDEF12345")
+        # same, except no 0 flag
+        testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345")
+        testboth("%x", float(big), "123456_______________", 6)
 
-# Some small ints, in both Python int and long flavors).
-testboth("%d", 42, "42")
-testboth("%d", -42, "-42")
-testboth("%d", 42L, "42")
-testboth("%d", -42L, "-42")
-testboth("%#x", 1, "0x1")
-testboth("%#x", 1L, "0x1")
-testboth("%#X", 1, "0X1")
-testboth("%#X", 1L, "0X1")
-testboth("%#o", 1, "01")
-testboth("%#o", 1L, "01")
-testboth("%#o", 0, "0")
-testboth("%#o", 0L, "0")
-testboth("%o", 0, "0")
-testboth("%o", 0L, "0")
-testboth("%d", 0, "0")
-testboth("%d", 0L, "0")
-testboth("%#x", 0, "0x0")
-testboth("%#x", 0L, "0x0")
-testboth("%#X", 0, "0X0")
-testboth("%#X", 0L, "0X0")
+        big = 012345670123456701234567012345670L  # 32 octal digits
+        testboth("%o", big, "12345670123456701234567012345670")
+        testboth("%o", -big, "-12345670123456701234567012345670")
+        testboth("%5o", -big, "-12345670123456701234567012345670")
+        testboth("%33o", -big, "-12345670123456701234567012345670")
+        testboth("%34o", -big, " -12345670123456701234567012345670")
+        testboth("%-34o", -big, "-12345670123456701234567012345670 ")
+        testboth("%034o", -big, "-012345670123456701234567012345670")
+        testboth("%-034o", -big, "-12345670123456701234567012345670 ")
+        testboth("%036o", -big, "-00012345670123456701234567012345670")
+        testboth("%036o", big, "000012345670123456701234567012345670")
+        testboth("%0+36o", big, "+00012345670123456701234567012345670")
+        testboth("%+36o", big, "   +12345670123456701234567012345670")
+        testboth("%36o", big, "    12345670123456701234567012345670")
+        testboth("%.2o", big, "12345670123456701234567012345670")
+        testboth("%.32o", big, "12345670123456701234567012345670")
+        testboth("%.33o", big, "012345670123456701234567012345670")
+        testboth("%34.33o", big, " 012345670123456701234567012345670")
+        testboth("%-34.33o", big, "012345670123456701234567012345670 ")
+        testboth("%o", big, "12345670123456701234567012345670")
+        testboth("%#o", big, "012345670123456701234567012345670")
+        testboth("%#o", -big, "-012345670123456701234567012345670")
+        testboth("%#.34o", -big, "-0012345670123456701234567012345670")
+        testboth("%#+.34o", big, "+0012345670123456701234567012345670")
+        testboth("%# .34o", big, " 0012345670123456701234567012345670")
+        testboth("%#+.34o", big, "+0012345670123456701234567012345670")
+        testboth("%#-+.34o", big, "+0012345670123456701234567012345670")
+        testboth("%#-+37.34o", big, "+0012345670123456701234567012345670  ")
+        testboth("%#+37.34o", big, "  +0012345670123456701234567012345670")
+        # next one gets one leading zero from precision
+        testboth("%.33o", big, "012345670123456701234567012345670")
+        # base marker shouldn't change that, since "0" is redundant
+        testboth("%#.33o", big, "012345670123456701234567012345670")
+        # but reduce precision, and base marker should add a zero
+        testboth("%#.32o", big, "012345670123456701234567012345670")
+        # one leading zero from precision, and another from "0" flag & width
+        testboth("%034.33o", big, "0012345670123456701234567012345670")
+        # base marker shouldn't change that
+        testboth("%0#34.33o", big, "0012345670123456701234567012345670")
+        testboth("%o", float(big), "123456__________________________", 6)
 
-testboth("%x", 0x42, "42")
-testboth("%x", -0x42, "-42")
-testboth("%x", 0x42L, "42")
-testboth("%x", -0x42L, "-42")
+        # Some small ints, in both Python int and long flavors).
+        testboth("%d", 42, "42")
+        testboth("%d", -42, "-42")
+        testboth("%d", 42L, "42")
+        testboth("%d", -42L, "-42")
+        testboth("%d", 42.0, "42")
+        testboth("%#x", 1, "0x1")
+        testboth("%#x", 1L, "0x1")
+        testboth("%#X", 1, "0X1")
+        testboth("%#X", 1L, "0X1")
+        testboth("%#x", 1.0, "0x1")
+        testboth("%#o", 1, "01")
+        testboth("%#o", 1L, "01")
+        testboth("%#o", 0, "0")
+        testboth("%#o", 0L, "0")
+        testboth("%o", 0, "0")
+        testboth("%o", 0L, "0")
+        testboth("%d", 0, "0")
+        testboth("%d", 0L, "0")
+        testboth("%#x", 0, "0x0")
+        testboth("%#x", 0L, "0x0")
+        testboth("%#X", 0, "0X0")
+        testboth("%#X", 0L, "0X0")
 
-testboth("%o", 042, "42")
-testboth("%o", -042, "-42")
-testboth("%o", 042L, "42")
-testboth("%o", -042L, "-42")
+        testboth("%x", 0x42, "42")
+        testboth("%x", -0x42, "-42")
+        testboth("%x", 0x42L, "42")
+        testboth("%x", -0x42L, "-42")
+        testboth("%x", float(0x42), "42")
 
-# Test exception for unknown format characters
-if verbose:
-    print 'Testing exceptions'
+        testboth("%o", 042, "42")
+        testboth("%o", -042, "-42")
+        testboth("%o", 042L, "42")
+        testboth("%o", -042L, "-42")
+        testboth("%o", float(042), "42")
 
-def test_exc(formatstr, args, exception, excmsg):
-    try:
-        testformat(formatstr, args)
-    except exception, exc:
-        if str(exc) == excmsg:
-            if verbose:
-                print "yes"
-        else:
-            if verbose: print 'no'
-            print 'Unexpected ', exception, ':', repr(str(exc))
-    except:
-        if verbose: print 'no'
-        print 'Unexpected exception'
-        raise
-    else:
-        raise TestFailed, 'did not get expected exception: %s' % excmsg
+        # alternate float formatting
+        testformat('%g', 1.1, '1.1')
+        testformat('%#g', 1.1, '1.10000')
 
-test_exc('abc %a', 1, ValueError,
-         "unsupported format character 'a' (0x61) at index 5")
-if have_unicode:
-    test_exc(unicode('abc %\u3000','raw-unicode-escape'), 1, ValueError,
-             "unsupported format character '?' (0x3000) at index 5")
+        # Regression test for http://bugs.python.org/issue15516.
+        class IntFails(object):
+            def __int__(self):
+                raise TestFailed
+            def __long__(self):
+                return 0
 
-test_exc('%d', '1', TypeError, "int argument required")
-test_exc('%g', '1', TypeError, "float argument required")
-test_exc('no format', '1', TypeError,
-         "not all arguments converted during string formatting")
-test_exc('no format', u'1', TypeError,
-         "not all arguments converted during string formatting")
-test_exc(u'no format', '1', TypeError,
-         "not all arguments converted during string formatting")
-test_exc(u'no format', u'1', TypeError,
-         "not all arguments converted during string formatting")
+        fst = IntFails()
+        testformat("%x", fst, '0')
 
-# for Jython, do we really need to support this? what's the use case
-# here!  the problem in a nutshell is that it changes __oct__, __hex__
-# such that they don't return a string, but later on the exception
-# will occur anyway. so seems like a lot of work for no value
+        # Test exception for unknown format characters
+        if verbose:
+            print 'Testing exceptions'
 
-# class Foobar(long):
-#     def __oct__(self):
-#         # Returning a non-string should not blow up.
-#         return self + 1
+        def test_exc(formatstr, args, exception, excmsg):
+            try:
+                testformat(formatstr, args)
+            except exception, exc:
+                if str(exc) == excmsg:
+                    if verbose:
+                        print "yes"
+                else:
+                    if verbose: print 'no'
+                    print 'Unexpected ', exception, ':', repr(str(exc))
+            except:
+                if verbose: print 'no'
+                print 'Unexpected exception'
+                raise
+            else:
+                raise TestFailed, 'did not get expected exception: %s' % excmsg
 
-#test_exc('%o', Foobar(), TypeError,
-#         "expected string or Unicode object, long found")
+        test_exc('abc %a', 1, ValueError,
+                 "unsupported format character 'a' (0x61) at index 5")
+        if have_unicode:
+            test_exc(unicode('abc %\u3000','raw-unicode-escape'), 1, ValueError,
+                     "unsupported format character '?' (0x3000) at index 5")
 
-if sys.maxint == 2**31-1 and not is_jython:
-    # crashes 2.2.1 and earlier:
-    try:
-        "%*d"%(sys.maxint, -127)
-    except MemoryError:
-        pass
-    else:
-        raise TestFailed, '"%*d"%(sys.maxint, -127) should fail'
+        test_exc('%d', '1', TypeError, "%d format: a number is required, not str")
+        test_exc('%g', '1', TypeError, "float argument required, not str")
+        test_exc('no format', '1', TypeError,
+                 "not all arguments converted during string formatting")
+        test_exc('no format', u'1', TypeError,
+                 "not all arguments converted during string formatting")
+        test_exc(u'no format', '1', TypeError,
+                 "not all arguments converted during string formatting")
+        test_exc(u'no format', u'1', TypeError,
+                 "not all arguments converted during string formatting")
+
+        # For Jython, we do not support this use case. The test aims at the,
+        # use of __oct__ within %o formatting of long. (Or __hex__ within %x
+        # formatting?) CPython does this for long (not int) and has dropped
+        # the idea again by v3. Jython's %o and %x are likewise direct.
+        class Foobar(long):
+            def __oct__(self):
+                # Returning a non-string should not blow up.
+                return self + 1
+
+        if not is_jython :
+            test_exc('%o', Foobar(), TypeError,
+                     "expected string or Unicode object, long found")
+
+        if maxsize == 2**31-1:
+            # crashes 2.2.1 and earlier:
+            try:
+                "%*d"%(maxsize, -127)
+            except MemoryError:
+                pass
+            else:
+                raise TestFailed, '"%*d"%(maxsize, -127) should fail'
+
+def test_main():
+    test_support.run_unittest(FormatTest)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_format_jy.py b/Lib/test/test_format_jy.py
--- a/Lib/test/test_format_jy.py
+++ b/Lib/test/test_format_jy.py
@@ -5,8 +5,9 @@
 from test import test_support
 import unittest
 
-class FormatTestCase(unittest.TestCase):
-    # Tests that %d converts values for custom classes implementing __int__
+class FormatSubclass(unittest.TestCase):
+    # Custom __int__ and __float__ should be respected by %-formatting
+
     def test_int_conversion_support(self):
         class Foo(object):
             def __init__(self, x): self.x = x
@@ -21,9 +22,71 @@
             def __float__(self): return self. x
         self.assertEqual('1.0', '%.1f' % Foo(1.0))
 
+class FormatUnicodeBase(unittest.TestCase):
+
+    # Test padding non-BMP result
+    def test_pad_string(self):
+        self.padcheck(u"architect")
+        self.padcheck(u'a\U00010001cde')
+
+class FormatUnicodeClassic(FormatUnicodeBase):
+    # Check using %-formatting
+
+    def padcheck(self, s):
+        self.assertEqual(10, len('%10.4s' % s))
+        self.assertEqual(u' '*6 + s[0:4], '%10.4s' % s)
+        self.assertEqual(u' '*6 + s[0:4], '% 10.4s' % s)
+        self.assertEqual(u' '*6 + s[0:4], '%010.4s' % s)
+        self.assertEqual(s[0:3] + u' '*5, '%-8.3s' % s)
+
+class FormatUnicodeModern(FormatUnicodeBase):
+    # Check using __format__
+
+    def padcheck(self, s):
+        self.assertEqual(10, len(format(s, '10.4s')))
+        self.assertEqual(s[0:3] + u' '*7, format(s, '10.3s'))
+        self.assertEqual(s[0:3] + u'~'*7, format(s, '~<10.3s'))
+        self.assertEqual(s[0:3] + u'~'*7, format(s, '~<10.3'))
+        self.assertEqual(u' '*6 + s[0:4], format(s, '>10.4s'))
+        self.assertEqual(u'*'*6 + s[0:4], format(s, '*>10.4s'))
+        self.assertEqual(u'*'*6 + s[0:4], format(s, '*>10.4'))
+
+
+class FormatMisc(unittest.TestCase):
+    # Odd tests Jython used to fail
+
+    def test_mixtures(self) :
+        # Check formatting to a common buffer in PyString
+        result = 'The cube of 0.5 -0.866j is -1 to 0.01%.'
+        self.assertEqual(result, 'The %s of %.3g -%.3fj is -%d to %.2f%%.' %
+                          ('cube', 0.5, 0.866, 1, 0.01))
+        self.assertEqual(result, 'The %s of %.3g %.3fj is %d to %.2f%%.' %
+                          ('cube', 0.5, -0.866, -1, 0.01))
+        self.assertEqual(result, 'The%5s of%4.3g%7.3fj is%3d to%5.2f%%.' %
+                          ('cube', 0.5, -0.866, -1, 0.01))
+        self.assertEqual(result, 'The %-5.4sof %-4.3g%.3fj is %-3dto %.4g%%.' %
+                          ('cubensis', 0.5, -0.866, -1, 0.01))
+
+    def test_percent_padded(self) :
+        self.assertEqual('%hello', '%%%s' % 'hello')
+        self.assertEqual(u'     %hello', '%6%%s' % u'hello')
+        self.assertEqual(u'%     hello', u'%-6%%s' % 'hello')
+
+        self.assertEqual('     %', '%6%' % ())
+        self.assertEqual('     %', '%06%' % ())
+        self.assertEqual('   %', '%*%' % 4)
+        self.assertEqual('%     ', '%-6%' % ())
+        self.assertEqual('%     ', '%-06%' % ())
+        self.assertEqual('%   ', '%*%' % -4)
+
 
 def test_main():
-    test_support.run_unittest(FormatTestCase)
+    test_support.run_unittest(
+            FormatSubclass,
+            FormatUnicodeClassic,
+            FormatUnicodeModern,
+            FormatMisc,
+    )
 
 if __name__ == '__main__':
     test_main()
diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -1,7 +1,9 @@
 # Python test set -- part 6, built-in types
 
 from test.test_support import run_unittest, have_unicode, run_with_locale, \
-                              check_py3k_warnings, is_jython
+                              check_py3k_warnings
+from test.test_support import is_jython
+
 import unittest
 import sys
 import locale
@@ -90,7 +92,6 @@
         if float(1) == 1.0 and float(-1) == -1.0 and float(0) == 0.0: pass
         else: self.fail('float() does not work properly')
 
-    @unittest.skipIf(is_jython, "FIXME: not working")
     def test_float_to_string(self):
         def test(f, result):
             self.assertEqual(f.__format__('e'), result)
@@ -407,8 +408,7 @@
         test(-123456, "#012X", '-0X00001E240')
 
         # issue 5782, commas with no specifier type
-        #FIXME: not working.
-        #test(1234, '010,', '00,001,234')
+        test(1234, '010,', '00,001,234')
 
         # make sure these are errors
 
@@ -424,21 +424,19 @@
         self.assertRaises(ValueError, 3 .__format__, ",c")
 
         # ensure that only int and float type specifiers work
-        #FIXME: not working.
-        #for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
-        #                    [chr(x) for x in range(ord('A'), ord('Z')+1)]):
-        #    if not format_spec in 'bcdoxXeEfFgGn%':
-        #        self.assertRaises(ValueError, 0 .__format__, format_spec)
-        #        self.assertRaises(ValueError, 1 .__format__, format_spec)
-        #        self.assertRaises(ValueError, (-1) .__format__, format_spec)
+        for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
+                            [chr(x) for x in range(ord('A'), ord('Z')+1)]):
+            if not format_spec in 'bcdoxXeEfFgGn%':
+                self.assertRaises(ValueError, 0 .__format__, format_spec)
+                self.assertRaises(ValueError, 1 .__format__, format_spec)
+                self.assertRaises(ValueError, (-1) .__format__, format_spec)
 
         # ensure that float type specifiers work; format converts
         #  the int to a float
-        #FIXME: not working.
-        #for format_spec in 'eEfFgG%':
-        #    for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
-        #        self.assertEqual(value.__format__(format_spec),
-        #                         float(value).__format__(format_spec))
+        for format_spec in 'eEfFgG%':
+            for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
+                self.assertEqual(value.__format__(format_spec),
+                                 float(value).__format__(format_spec))
 
         # Issue 6902
         test(123456, "0<20", '12345600000000000000')
@@ -534,23 +532,20 @@
         self.assertRaises(ValueError, 1L .__format__, "#+5x")
         self.assertRaises(ValueError, 1L .__format__, "+5#x")
 
-        #FIXME: this section broken in Jython.
         # ensure that only int and float type specifiers work
-        #for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
-        #                    [chr(x) for x in range(ord('A'), ord('Z')+1)]):
-        #    if not format_spec in 'bcdoxXeEfFgGn%':
-        #        self.assertRaises(ValueError, 0L .__format__, format_spec)
-        #        self.assertRaises(ValueError, 1L .__format__, format_spec)
-        #        self.assertRaises(ValueError, (-1L) .__format__, format_spec)
+        for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
+                            [chr(x) for x in range(ord('A'), ord('Z')+1)]):
+            if not format_spec in 'bcdoxXeEfFgGn%':
+                self.assertRaises(ValueError, 0L .__format__, format_spec)
+                self.assertRaises(ValueError, 1L .__format__, format_spec)
+                self.assertRaises(ValueError, (-1L) .__format__, format_spec)
 
         # ensure that float type specifiers work; format converts
         #  the long to a float
-
-        #FIXME: this section broken in Jython.
-        #for format_spec in 'eEfFgG%':
-        #    for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]:
-        #        self.assertEqual(value.__format__(format_spec),
-        #                         float(value).__format__(format_spec))
+        for format_spec in 'eEfFgG%':
+            for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]:
+                self.assertEqual(value.__format__(format_spec),
+                                 float(value).__format__(format_spec))
         # Issue 6902
         test(123456L, "0<20", '12345600000000000000')
         test(123456L, "1<20", '12345611111111111111')
@@ -562,7 +557,6 @@
         test(123456L, "1=20", '11111111111111123456')
         test(123456L, "*=20", '**************123456')
 
-    @unittest.skipIf(is_jython, "FIXME: not working")
     @run_with_locale('LC_NUMERIC', 'en_US.UTF8')
     def test_float__format__locale(self):
         # test locale support for __format__ code 'n'
@@ -576,13 +570,12 @@
     def test_int__format__locale(self):
         # test locale support for __format__ code 'n' for integers
 
-        #FIXME: not working in Jython.
-        #x = 123456789012345678901234567890
-        #for i in range(0, 30):
-        #    self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n'))
+        x = 123456789012345678901234567890
+        for i in range(0, 30):
+            self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n'))
 
-        #    # move to the next integer to test
-        #    x = x // 10
+            # move to the next integer to test
+            x = x // 10
 
         rfmt = ">20n"
         lfmt = "<20n"
@@ -661,30 +654,25 @@
         # a totaly empty format specifier means something else.
         # So, just use a sign flag
         test(1e200, '+g', '+1e+200')
-        #FIXME: not working.
-        #test(1e200, '+', '+1e+200')
+        test(1e200, '+', '+1e+200')
         test(1.1e200, '+g', '+1.1e+200')
-        #FIXME: not working.
-        ##test(1.1e200, '+', '+1.1e+200')
+        test(1.1e200, '+', '+1.1e+200')
 
         test(1.1e200, '+g', '+1.1e+200')
-        #FIXME: not working.
-        #test(1.1e200, '+', '+1.1e+200')
+        test(1.1e200, '+', '+1.1e+200')
 
         # 0 padding
         test(1234., '010f', '1234.000000')
         test(1234., '011f', '1234.000000')
         test(1234., '012f', '01234.000000')
         test(-1234., '011f', '-1234.000000')
-        #FIXME: not working.
-        #test(-1234., '012f', '-1234.000000')
-        #test(-1234., '013f', '-01234.000000')
-        #test(-1234.12341234, '013f', '-01234.123412')
-        #test(-123456.12341234, '011.2f', '-0123456.12')
+        test(-1234., '012f', '-1234.000000')
+        test(-1234., '013f', '-01234.000000')
+        test(-1234.12341234, '013f', '-01234.123412')
+        test(-123456.12341234, '011.2f', '-0123456.12')
 
         # issue 5782, commas with no specifier type
-        #FIXME: not working.
-        #test(1.2, '010,.2', '0,000,001.2')
+        test(1.2, '010,.2', '0,000,001.2')
 
         # 0 padding with commas
         test(1234., '011,f', '1,234.000000')
@@ -692,13 +680,12 @@
         test(1234., '013,f', '01,234.000000')
         test(-1234., '012,f', '-1,234.000000')
         test(-1234., '013,f', '-1,234.000000')
-        #FIXME: not working.
-        #test(-1234., '014,f', '-01,234.000000')
-        #test(-12345., '015,f', '-012,345.000000')
-        #test(-123456., '016,f', '-0,123,456.000000')
-        #test(-123456., '017,f', '-0,123,456.000000')
-        #test(-123456.12341234, '017,f', '-0,123,456.123412')
-        #test(-123456.12341234, '013,.2f', '-0,123,456.12')
+        test(-1234., '014,f', '-01,234.000000')
+        test(-12345., '015,f', '-012,345.000000')
+        test(-123456., '016,f', '-0,123,456.000000')
+        test(-123456., '017,f', '-0,123,456.000000')
+        test(-123456.12341234, '017,f', '-0,123,456.123412')
+        test(-123456.12341234, '013,.2f', '-0,123,456.12')
 
          # % formatting
         test(-1.0, '%', '-100.000000%')
@@ -721,23 +708,20 @@
                 self.assertRaises(ValueError, format, -1e-100, format_spec)
 
         # Alternate formatting is not supported
-        #FIXME: not working.
-        ##self.assertRaises(ValueError, format, 0.0, '#')
+        self.assertRaises(ValueError, format, 0.0, '#')
         self.assertRaises(ValueError, format, 0.0, '#20f')
 
         # Issue 6902
-        #FIXME: not working.
-        #test(12345.6, "0<20", '12345.60000000000000')
-        #test(12345.6, "1<20", '12345.61111111111111')
-        #test(12345.6, "*<20", '12345.6*************')
-        #test(12345.6, "0>20", '000000000000012345.6')
-        #test(12345.6, "1>20", '111111111111112345.6')
-        #test(12345.6, "*>20", '*************12345.6')
-        #test(12345.6, "0=20", '000000000000012345.6')
-        #test(12345.6, "1=20", '111111111111112345.6')
-        #test(12345.6, "*=20", '*************12345.6')
+        test(12345.6, "0<20", '12345.60000000000000')
+        test(12345.6, "1<20", '12345.61111111111111')
+        test(12345.6, "*<20", '12345.6*************')
+        test(12345.6, "0>20", '000000000000012345.6')
+        test(12345.6, "1>20", '111111111111112345.6')
+        test(12345.6, "*>20", '*************12345.6')
+        test(12345.6, "0=20", '000000000000012345.6')
+        test(12345.6, "1=20", '111111111111112345.6')
+        test(12345.6, "*=20", '*************12345.6')
 
-    @unittest.skipIf(is_jython, "FIXME: not working")
     def test_format_spec_errors(self):
         # int, float, and string all share the same format spec
         # mini-language parser.
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -357,13 +357,12 @@
         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000,  3.50')
         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000,  3.57')
         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
-        if not sys.platform.startswith('java'):
-            self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
+        self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
         self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
         self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
 
-        # self.assertEqual(u'%c' % 0x1234, u'\u1234')
-        # self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
+        self.assertEqual(u'%c' % 0x1234, u'\u1234')
+        self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
 
         # formatting jobs delegated from the string implementation:
         self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
diff --git a/src/org/python/core/PyComplex.java b/src/org/python/core/PyComplex.java
--- a/src/org/python/core/PyComplex.java
+++ b/src/org/python/core/PyComplex.java
@@ -4,6 +4,7 @@
 
 import org.python.core.stringlib.FloatFormatter;
 import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
 import org.python.core.stringlib.InternalFormat.Spec;
 import org.python.expose.ExposedGet;
 import org.python.expose.ExposedMethod;
@@ -174,7 +175,9 @@
      * @return formatted value
      */
     private String formatComplex(Spec spec) {
-        FloatFormatter f = new FloatFormatter(spec, 2, 3); // Two elements + "(j)".length
+        int size = 2 * FloatFormatter.size(spec) + 3;  // 2 floats + "(j)"
+        FloatFormatter f = new FloatFormatter(new StringBuilder(size), spec);
+        f.setBytes(true);
         // Even in r-format, complex strips *all* the trailing zeros.
         f.setMinFracDigits(0);
         if (Double.doubleToLongBits(real) == 0L) {
@@ -816,42 +819,87 @@
 
     @ExposedMethod(doc = BuiltinDocs.complex___format___doc)
     final PyObject complex___format__(PyObject formatSpec) {
-        if (!(formatSpec instanceof PyString)) {
-            throw Py.TypeError("__format__ requires str or unicode");
+
+        // Parse the specification
+        Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+
+        // fromText will have thrown if formatSpecStr is not a PyString (including PyUnicode)
+        PyString formatSpecStr = (PyString)formatSpec;
+        String result;
+
+        // Validate the specification and detect the special case for none-format
+        switch (checkSpecification(spec)) {
+
+            case 0: // None-format
+                // In none-format, we take the default type and precision from __str__.
+                spec = spec.withDefaults(SPEC_STR);
+                // And then we use the __str__ mechanism to get parentheses or real 0 elision.
+                result = formatComplex(spec);
+                break;
+
+            case 1: // Floating-point formats
+                // In any other format, defaults are those commonly used for numeric formats.
+                spec = spec.withDefaults(Spec.NUMERIC);
+                int size = 2 * FloatFormatter.size(spec) + 1; // 2 floats + "j"
+                FloatFormatter f = new FloatFormatter(new StringBuilder(size), spec);
+                f.setBytes(!(formatSpecStr instanceof PyUnicode));
+                // Convert both parts as per specification
+                f.format(real).format(imag, "+").append('j');
+                result = f.pad().getResult();
+                break;
+
+            default: // The type code was not recognised
+                throw Formatter.unknownFormat(spec.type, "complex");
         }
 
-        PyString formatSpecStr = (PyString)formatSpec;
-        String result;
-        try {
-            String specString = formatSpecStr.getString();
-            Spec spec = InternalFormat.fromText(specString);
-            if (spec.type != Spec.NONE && "efgEFGn%".indexOf(spec.type) < 0) {
-                throw FloatFormatter.unknownFormat(spec.type, "complex");
-            } else if (spec.alternate) {
-                throw FloatFormatter.alternateFormNotAllowed("complex");
-            } else if (spec.fill == '0') {
-                throw FloatFormatter.zeroPaddingNotAllowed("complex");
-            } else if (spec.align == '=') {
-                throw FloatFormatter.alignmentNotAllowed('=', "complex");
-            } else {
-                if (spec.type == Spec.NONE) {
-                    // In none-format, we take the default type and precision from __str__.
-                    spec = spec.withDefaults(SPEC_STR);
-                    // And then we use the __str__ mechanism to get parentheses or real 0 elision.
-                    result = formatComplex(spec);
+        // Wrap the result in the same type as the format string
+        return formatSpecStr.createInstance(result);
+    }
+
+    /**
+     * Validate a parsed specification, for <code>PyComplex</code>, returning 0 if it is a valid
+     * none-format specification, 1 if it is a valid float specification, and some other value if it
+     * not a valid type. If it has any other faults (e.g. alternate form was specified) the method
+     * raises a descriptive exception.
+     *
+     * @param spec a parsed PEP-3101 format specification.
+     * @return 0, 1, or other value for none-format, a float format, or incorrect type.
+     * @throws PyException(ValueError) if the specification is faulty.
+     */
+    @SuppressWarnings("fallthrough")
+    private static int checkSpecification(Spec spec) {
+
+        // Slight differences between format types
+        switch (spec.type) {
+
+            case 'n':
+                if (spec.grouping) {
+                    throw Formatter.notAllowed("Grouping", "complex", spec.type);
+                }
+                // Fall through
+
+            case Spec.NONE:
+            case 'e':
+            case 'f':
+            case 'g':
+            case 'E':
+            case 'F':
+            case 'G':
+                // Check for disallowed parts of the specification
+                if (spec.alternate) {
+                    throw FloatFormatter.alternateFormNotAllowed("complex");
+                } else if (spec.fill == '0') {
+                    throw FloatFormatter.zeroPaddingNotAllowed("complex");
+                } else if (spec.align == '=') {
+                    throw FloatFormatter.alignmentNotAllowed('=', "complex");
                 } else {
-                    // In any other format, defaults are those commonly used for numeric formats.
-                    spec = spec.withDefaults(Spec.NUMERIC);
-                    FloatFormatter f = new FloatFormatter(spec, 2, 1);// 2 floats + "j"
-                    // Convert both parts as per specification
-                    f.format(real).format(imag, "+").append('j');
-                    result = f.pad().getResult();
+                    return (spec.type == Spec.NONE) ? 0 : 1;
                 }
-            }
-        } catch (IllegalArgumentException e) {
-            throw Py.ValueError(e.getMessage());    // XXX Can this be reached?
+
+            default:
+                // spec.type is invalid for complex
+                return 2;
         }
-        return formatSpecStr.createInstance(result);
     }
 
     @Override
diff --git a/src/org/python/core/PyFloat.java b/src/org/python/core/PyFloat.java
--- a/src/org/python/core/PyFloat.java
+++ b/src/org/python/core/PyFloat.java
@@ -7,6 +7,7 @@
 
 import org.python.core.stringlib.FloatFormatter;
 import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
 import org.python.core.stringlib.InternalFormat.Spec;
 import org.python.expose.ExposedClassMethod;
 import org.python.expose.ExposedGet;
@@ -911,32 +912,66 @@
 
     @ExposedMethod(doc = BuiltinDocs.float___format___doc)
     final PyObject float___format__(PyObject formatSpec) {
-        if (!(formatSpec instanceof PyString)) {
-            throw Py.TypeError("__format__ requires str or unicode");
+
+        // Parse the specification
+        Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+
+        // Get a formatter for the specification
+        FloatFormatter f = prepareFormatter(spec);
+
+        if (f != null) {
+            // Bytes mode if formatSpec argument is not unicode.
+            f.setBytes(!(formatSpec instanceof PyUnicode));
+            // Convert as per specification.
+            f.format(value);
+            // Return a result that has the same type (str or unicode) as the formatSpec argument.
+            return f.pad().getPyResult();
+
+        } else {
+            // The type code was not recognised in prepareFormatter
+            throw Formatter.unknownFormat(spec.type, "float");
         }
+    }
 
-        PyString formatSpecStr = (PyString)formatSpec;
-        String result;
-        try {
-            String specString = formatSpecStr.getString();
-            Spec spec = InternalFormat.fromText(specString);
-            if (spec.type!=Spec.NONE && "efgEFGn%".indexOf(spec.type) < 0) {
-                throw FloatFormatter.unknownFormat(spec.type, "float");
-            } else if (spec.alternate) {
-                throw FloatFormatter.alternateFormNotAllowed("float");
-            } else {
+    /**
+     * Common code for PyFloat, {@link PyInteger} and {@link PyLong} to prepare a {@link FloatFormatter} from a parsed specification.
+     * The object returned has format method {@link FloatFormatter#format(double)}.
+     *
+     * @param spec a parsed PEP-3101 format specification.
+     * @return a formatter ready to use, or null if the type is not a floating point format type.
+     * @throws PyException(ValueError) if the specification is faulty.
+     */
+    @SuppressWarnings("fallthrough")
+    static FloatFormatter prepareFormatter(Spec spec) {
+
+        // Slight differences between format types
+        switch (spec.type) {
+
+            case 'n':
+                if (spec.grouping) {
+                    throw Formatter.notAllowed("Grouping", "float", spec.type);
+                }
+                // Fall through
+
+            case Spec.NONE:
+            case 'e':
+            case 'f':
+            case 'g':
+            case 'E':
+            case 'F':
+            case 'G':
+            case '%':
+                // Check for disallowed parts of the specification
+                if (spec.alternate) {
+                    throw FloatFormatter.alternateFormNotAllowed("float");
+                }
                 // spec may be incomplete. The defaults are those commonly used for numeric formats.
                 spec = spec.withDefaults(Spec.NUMERIC);
-                // Get a formatter for the spec.
-                FloatFormatter f = new FloatFormatter(spec);
-                // Convert as per specification.
-                f.format(value).pad();
-                result = f.getResult();
-            }
-        } catch (IllegalArgumentException e) {
-            throw Py.ValueError(e.getMessage());    // XXX Can this be reached?
+                return new FloatFormatter(spec);
+
+            default:
+                return null;
         }
-        return formatSpecStr.createInstance(result);
     }
 
     @ExposedMethod(doc = BuiltinDocs.float_as_integer_ratio_doc)
diff --git a/src/org/python/core/PyInteger.java b/src/org/python/core/PyInteger.java
--- a/src/org/python/core/PyInteger.java
+++ b/src/org/python/core/PyInteger.java
@@ -4,11 +4,12 @@
 
 import java.io.Serializable;
 import java.math.BigInteger;
-import java.text.NumberFormat;
-import java.util.Locale;
 
-import org.python.core.stringlib.InternalFormatSpec;
-import org.python.core.stringlib.InternalFormatSpecParser;
+import org.python.core.stringlib.FloatFormatter;
+import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
+import org.python.core.stringlib.InternalFormat.Spec;
 import org.python.expose.ExposedGet;
 import org.python.expose.ExposedMethod;
 import org.python.expose.ExposedNew;
@@ -953,11 +954,8 @@
 
     @ExposedMethod(doc = BuiltinDocs.int___oct___doc)
     final PyString int___oct__() {
-        if (getValue() < 0) {
-            return new PyString("-0" + Integer.toString(getValue() * -1, 8));
-        } else {
-            return new PyString("0" + Integer.toString(getValue(), 8));
-        }
+        // Use the prepared format specifier for octal.
+        return formatImpl(IntegerFormatter.OCT);
     }
 
     @Override
@@ -967,11 +965,21 @@
 
     @ExposedMethod(doc = BuiltinDocs.int___hex___doc)
     final PyString int___hex__() {
-        if (getValue() < 0) {
-            return new PyString("-0x" + Integer.toString(getValue() * -1, 16));
-        } else {
-            return new PyString("0x" + Integer.toString(getValue(), 16));
-        }
+        // Use the prepared format specifier for hexadecimal.
+        return formatImpl(IntegerFormatter.HEX);
+    }
+
+    /**
+     * Common code used by the number-base conversion method __oct__ and __hex__.
+     *
+     * @param spec prepared format-specifier.
+     * @return converted value of this object
+     */
+    private PyString formatImpl(Spec spec) {
+        // Traditional formatter (%-format) because #o means "-0123" not "-0o123".
+        IntegerFormatter f = new IntegerFormatter.Traditional(spec);
+        f.format(value);
+        return new PyString(f.getResult());
     }
 
     @ExposedMethod(doc = BuiltinDocs.int___getnewargs___doc)
@@ -1015,256 +1023,87 @@
 
     @ExposedMethod(doc = BuiltinDocs.int___format___doc)
     final PyObject int___format__(PyObject formatSpec) {
-        return formatImpl(getValue(), formatSpec);
-    }
 
-    static PyObject formatImpl(Object value, PyObject formatSpec) {
-        if (!(formatSpec instanceof PyString)) {
-            throw Py.TypeError("__format__ requires str or unicode");
+        // Parse the specification
+        Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+        InternalFormat.Formatter f;
+
+        // Try to make an integer formatter from the specification
+        IntegerFormatter fi = PyInteger.prepareFormatter(spec);
+        if (fi != null) {
+            // Bytes mode if formatSpec argument is not unicode.
+            fi.setBytes(!(formatSpec instanceof PyUnicode));
+            // Convert as per specification.
+            fi.format(value);
+            f = fi;
+
+        } else {
+            // Try to make a float formatter from the specification
+            FloatFormatter ff = PyFloat.prepareFormatter(spec);
+            if (ff != null) {
+                // Bytes mode if formatSpec argument is not unicode.
+                ff.setBytes(!(formatSpec instanceof PyUnicode));
+                // Convert as per specification.
+                ff.format(value);
+                f = ff;
+
+            } else {
+                // The type code was not recognised in either prepareFormatter
+                throw Formatter.unknownFormat(spec.type, "integer");
+            }
         }
 
-        PyString formatSpecStr = (PyString)formatSpec;
-        String result;
-        try {
-            String specString = formatSpecStr.getString();
-            InternalFormatSpec spec = new InternalFormatSpecParser(specString).parse();
-            result = formatIntOrLong(value, spec);
-        } catch (IllegalArgumentException e) {
-            throw Py.ValueError(e.getMessage());
-        }
-        return formatSpecStr.createInstance(result);
+        // Return a result that has the same type (str or unicode) as the formatSpec argument.
+        return f.pad().getPyResult();
     }
 
     /**
-     * Formats an integer or long number according to a PEP-3101 format specification.
+     * Common code for PyInteger and PyLong to prepare an IntegerFormatter. This object has an
+     * overloaded format method {@link IntegerFormatter#format(int)} and
+     * {@link IntegerFormatter#format(BigInteger)} to support the two types.
      *
-     * @param value Integer or BigInteger object specifying the value to format.
-     * @param spec parsed PEP-3101 format specification.
-     * @return result of the formatting.
+     * @param spec a parsed PEP-3101 format specification.
+     * @return a formatter ready to use, or null if the type is not an integer format type.
+     * @throws PyException(ValueError) if the specification is faulty.
      */
-    public static String formatIntOrLong(Object value, InternalFormatSpec spec) {
-        if (spec.precision != -1) {
-            throw new IllegalArgumentException("Precision not allowed in integer format specifier");
+    @SuppressWarnings("fallthrough")
+    static IntegerFormatter prepareFormatter(Spec spec) throws PyException {
+
+        // Slight differences between format types
+        switch (spec.type) {
+            case 'c':
+                // Character data: specific prohibitions.
+                if (Spec.specified(spec.sign)) {
+                    throw IntegerFormatter.signNotAllowed("integer", spec.type);
+                } else if (spec.alternate) {
+                    throw IntegerFormatter.alternateFormNotAllowed("integer", spec.type);
+                }
+                // Fall through
+
+            case 'x':
+            case 'X':
+            case 'o':
+            case 'b':
+            case 'n':
+                if (spec.grouping) {
+                    throw IntegerFormatter.notAllowed("Grouping", "integer", spec.type);
+                }
+                // Fall through
+
+            case Spec.NONE:
+            case 'd':
+                // Check for disallowed parts of the specification
+                if (Spec.specified(spec.precision)) {
+                    throw IntegerFormatter.precisionNotAllowed("integer");
+                }
+                // spec may be incomplete. The defaults are those commonly used for numeric formats.
+                spec = spec.withDefaults(Spec.NUMERIC);
+                // Get a formatter for the spec.
+                return new IntegerFormatter(spec);
+
+            default:
+                return null;
         }
-
-        int sign;
-        if (value instanceof Integer) {
-            int intValue = (Integer)value;
-            sign = intValue < 0 ? -1 : intValue == 0 ? 0 : 1;
-        } else {
-            sign = ((BigInteger)value).signum();
-        }
-
-        String strValue;
-        String strPrefix = "";
-        String strSign = "";
-
-        if (spec.type == 'c') {
-            if (spec.sign != '\0') {
-                throw new IllegalArgumentException("Sign not allowed with integer format "
-                        + "specifier 'c'");
-            }
-            if (value instanceof Integer) {
-                int intValue = (Integer)value;
-                if (intValue > 0xffff) {
-                    throw new IllegalArgumentException("%c arg not in range(0x10000)");
-                }
-                strValue = Character.toString((char)intValue);
-            } else {
-                BigInteger bigInt = (BigInteger)value;
-                if (bigInt.intValue() > 0xffff || bigInt.bitCount() > 16) {
-                    throw new IllegalArgumentException("%c arg not in range(0x10000)");
-                }
-                strValue = Character.toString((char)bigInt.intValue());
-            }
-        } else {
-            int radix = 10;
-            if (spec.type == 'o') {
-                radix = 8;
-            } else if (spec.type == 'x' || spec.type == 'X') {
-                radix = 16;
-            } else if (spec.type == 'b') {
-                radix = 2;
-            }
-
-            if (spec.type == 'n') {
-                strValue = NumberFormat.getNumberInstance().format(value);
-            } else if (spec.thousands_separators) {
-                NumberFormat format = NumberFormat.getNumberInstance(Locale.US);
-                format.setGroupingUsed(true);
-                strValue = format.format(value);
-            } else if (value instanceof BigInteger) {
-                switch (radix) {
-                    case 2:
-                        strValue = toBinString((BigInteger)value);
-                        break;
-                    case 8:
-                        strValue = toOctString((BigInteger)value);
-                        break;
-                    case 16:
-                        strValue = toHexString((BigInteger)value);
-                        break;
-                    default:
-                        // General case (v.slow in known implementations up to Java 7).
-                        strValue = ((BigInteger)value).toString(radix);
-                        break;
-                }
-            } else {
-                strValue = Integer.toString((Integer)value, radix);
-            }
-
-            if (spec.alternate) {
-                switch (radix) {
-                    case 2:
-                        strPrefix = "0b";
-                        break;
-                    case 8:
-                        strPrefix = "0o";
-                        break;
-                    case 16:
-                        strPrefix = "0x";
-                        break;
-                }
-
-                if (sign < 0) {
-                    assert (strValue.startsWith("-"));
-                    strSign = "-";
-                    strValue = strValue.substring(1);
-                }
-            }
-
-            if (spec.type == 'X') {
-                strPrefix = strPrefix.toUpperCase();
-                strValue = strValue.toUpperCase();
-            }
-
-            if (sign >= 0) {
-                switch (spec.sign) {
-                    case '+':
-                    case ' ':
-                        strSign = Character.toString(spec.sign);
-                        break;
-                }
-            }
-        }
-
-        if (spec.align == '=' && (spec.sign == '-' || spec.sign == '+' || spec.sign == ' ')) {
-            assert (strSign.length() == 1);
-            return strSign + strPrefix + spec.pad(strValue, '>', 1 + strPrefix.length());
-        }
-
-        if (spec.fill_char == 0) {
-            return spec.pad(strSign + strPrefix + strValue, '>', 0);
-        }
-
-        return strSign + strPrefix + spec.pad(strValue, '>', strSign.length() + strPrefix.length());
-    }
-
-    /**
-     * A more efficient algorithm for generating a hexadecimal representation of a byte array.
-     * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
-     * consequently, is implemented using expensive mathematical operations.
-     *
-     * @param value the value to generate a hexadecimal string from
-     * @return the hexadecimal representation of value, with "-" sign prepended if necessary
-     */
-    static final String toHexString(BigInteger value) {
-        int signum = value.signum();
-
-        // obvious shortcut
-        if (signum == 0) {
-            return "0";
-        }
-
-        // we want to work in absolute numeric value (negative sign is added afterward)
-        byte[] input = value.abs().toByteArray();
-        StringBuilder sb = new StringBuilder(input.length * 2);
-
-        int b;
-        for (int i = 0; i < input.length; i++) {
-            b = input[i] & 0xFF;
-            sb.append(LOOKUP.charAt(b >> 4));
-            sb.append(LOOKUP.charAt(b & 0x0F));
-        }
-
-        // before returning the char array as string, remove leading zeroes, but not the last one
-        String result = sb.toString().replaceFirst("^0+(?!$)", "");
-        return signum < 0 ? "-" + result : result;
-    }
-
-    /**
-     * A more efficient algorithm for generating an octal representation of a byte array.
-     * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
-     * consequently, is implemented using expensive mathematical operations.
-     *
-     * @param value the value to generate an octal string from
-     * @return the octal representation of value, with "-" sign prepended if necessary
-     */
-    static final String toOctString(BigInteger value) {
-        int signum = value.signum();
-
-        // obvious shortcut
-        if (signum == 0) {
-            return "0";
-        }
-
-        byte[] input = value.abs().toByteArray();
-        if (input.length < 3) {
-            return value.toString(8);
-        }
-
-        StringBuilder sb = new StringBuilder(input.length * 3);
-
-        // working backwards, three bytes at a time
-        int threebytes;
-        int trip1, trip2, trip3;    // most, middle, and least significant bytes in the triplet
-        for (int i = input.length - 1; i >= 0; i -= 3) {
-            trip3 = input[i] & 0xFF;
-            trip2 = ((i - 1) >= 0) ? (input[i - 1] & 0xFF) : 0x00;
-            trip1 = ((i - 2) >= 0) ? (input[i - 2] & 0xFF) : 0x00;
-            threebytes = trip3 | (trip2 << 8) | (trip1 << 16);
-
-            // convert the three-byte value into an eight-character octal string
-            for (int j = 0; j < 8; j++) {
-                sb.append(LOOKUP.charAt((threebytes >> (j * 3)) & 0x000007));
-            }
-        }
-
-        String result = sb.reverse().toString().replaceFirst("^0+(?!%)", "");
-        return signum < 0 ? "-" + result : result;
-    }
-
-    /**
-     * A more efficient algorithm for generating a binary representation of a byte array.
-     * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
-     * consequently, is implemented using expensive mathematical operations.
-     *
-     * @param value the value to generate a binary string from
-     * @return the binary representation of value, with "-" sign prepended if necessary
-     */
-    static final String toBinString(BigInteger value) {
-        int signum = value.signum();
-
-        // obvious shortcut
-        if (signum == 0) {
-            return "0";
-        }
-
-        // we want to work in absolute numeric value (negative sign is added afterward)
-        byte[] input = value.abs().toByteArray();
-        StringBuilder sb = new StringBuilder(value.bitCount());
-
-        int b;
-        for (int i = 0; i < input.length; i++) {
-            b = input[i] & 0xFF;
-            for (int bit = 7; bit >= 0; bit--) {
-                sb.append(((b >> bit) & 0x1) > 0 ? "1" : "0");
-            }
-        }
-
-        // before returning the char array as string, remove leading zeroes, but not the last one
-        String result = sb.toString().replaceFirst("^0+(?!$)", "");
-        return signum < 0 ? "-" + result : result;
     }
 
     @Override
diff --git a/src/org/python/core/PyLong.java b/src/org/python/core/PyLong.java
--- a/src/org/python/core/PyLong.java
+++ b/src/org/python/core/PyLong.java
@@ -1,13 +1,17 @@
-/*
- * Copyright (c) Corporation for National Research Initiatives
- * Copyright (c) Jython Developers
- */
+// Copyright (c) Corporation for National Research Initiatives
+// Copyright (c) Jython Developers
+
 package org.python.core;
 
 import java.io.Serializable;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 
+import org.python.core.stringlib.FloatFormatter;
+import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
+import org.python.core.stringlib.InternalFormat.Spec;
 import org.python.expose.ExposedGet;
 import org.python.expose.ExposedMethod;
 import org.python.expose.ExposedNew;
@@ -24,8 +28,8 @@
 
     public static final BigInteger MIN_LONG = BigInteger.valueOf(Long.MIN_VALUE);
     public static final BigInteger MAX_LONG = BigInteger.valueOf(Long.MAX_VALUE);
-    public static final BigInteger MAX_ULONG =
-            BigInteger.valueOf(1).shiftLeft(64).subtract(BigInteger.valueOf(1));
+    public static final BigInteger MAX_ULONG = BigInteger.valueOf(1).shiftLeft(64)
+            .subtract(BigInteger.valueOf(1));
 
     /** @deprecated Use MIN_INT instead. */
     @Deprecated
@@ -66,7 +70,7 @@
 
     @ExposedNew
     public static PyObject long___new__(PyNewWrapper new_, boolean init, PyType subtype,
-                                        PyObject[] args, String[] keywords) {
+            PyObject[] args, String[] keywords) {
         if (new_.for_type != subtype) {
             return longSubtypeNew(new_, init, subtype, args, keywords);
         }
@@ -74,7 +78,7 @@
         ArgParser ap = new ArgParser("long", args, keywords, new String[] {"x", "base"}, 0);
         PyObject x = ap.getPyObject(0, null);
         if (x != null && x.getJavaProxy() instanceof BigInteger) {
-            return new PyLong((BigInteger) x.getJavaProxy());
+            return new PyLong((BigInteger)x.getJavaProxy());
         }
         int base = ap.getInt(1, -909);
 
@@ -87,7 +91,7 @@
         if (!(x instanceof PyString)) {
             throw Py.TypeError("long: can't convert non-string with explicit base");
         }
-        return ((PyString) x).atol(base);
+        return ((PyString)x).atol(base);
     }
 
     /**
@@ -108,8 +112,9 @@
                 if (!pye2.match(Py.AttributeError)) {
                     throw pye2;
                 }
-                throw Py.TypeError(
-                    String.format("long() argument must be a string or a number, not '%.200s'", x.getType().fastGetName()));
+                throw Py.TypeError(String.format(
+                        "long() argument must be a string or a number, not '%.200s'", x.getType()
+                                .fastGetName()));
             }
         }
     }
@@ -123,7 +128,7 @@
             PyObject i = integral.invoke("__int__");
             if (!(i instanceof PyInteger) && !(i instanceof PyLong)) {
                 throw Py.TypeError(String.format("__trunc__ returned non-Integral (type %.200s)",
-                                                 integral.getType().fastGetName()));
+                        integral.getType().fastGetName()));
             }
             return i;
         }
@@ -133,24 +138,22 @@
     /**
      * Wimpy, slow approach to new calls for subtypes of long.
      *
-     * First creates a regular long from whatever arguments we got, then allocates a
-     * subtype instance and initializes it from the regular long. The regular long is then
-     * thrown away.
+     * First creates a regular long from whatever arguments we got, then allocates a subtype
+     * instance and initializes it from the regular long. The regular long is then thrown away.
      */
     private static PyObject longSubtypeNew(PyNewWrapper new_, boolean init, PyType subtype,
-                                           PyObject[] args, String[] keywords) {
+            PyObject[] args, String[] keywords) {
         PyObject tmp = long___new__(new_, init, TYPE, args, keywords);
         if (tmp instanceof PyInteger) {
-            int intValue = ((PyInteger) tmp).getValue();
+            int intValue = ((PyInteger)tmp).getValue();
             return new PyLongDerived(subtype, BigInteger.valueOf(intValue));
         } else {
-            return new PyLongDerived(subtype, ((PyLong) tmp).getValue());
+            return new PyLongDerived(subtype, ((PyLong)tmp).getValue());
         }
     }
 
     /**
-     * Convert a double to BigInteger, raising an OverflowError if
-     * infinite.
+     * Convert a double to BigInteger, raising an OverflowError if infinite.
      */
     private static BigInteger toBigInteger(double value) {
         if (Double.isInfinite(value)) {
@@ -249,7 +252,7 @@
     }
 
     public double scaledDoubleValue(int[] exp) {
-        return scaledDoubleValue(getValue(),exp);
+        return scaledDoubleValue(getValue(), exp);
     }
 
     public long getLong(long min, long max) {
@@ -273,14 +276,14 @@
 
     @Override
     public int asInt(int index) {
-        return (int) getLong(Integer.MIN_VALUE, Integer.MAX_VALUE,
-                             "long int too large to convert to int");
+        return (int)getLong(Integer.MIN_VALUE, Integer.MAX_VALUE,
+                "long int too large to convert to int");
     }
 
     @Override
     public int asInt() {
-        return (int) getLong(Integer.MIN_VALUE, Integer.MAX_VALUE,
-                             "long int too large to convert to int");
+        return (int)getLong(Integer.MIN_VALUE, Integer.MAX_VALUE,
+                "long int too large to convert to int");
     }
 
     @Override
@@ -292,13 +295,13 @@
     public Object __tojava__(Class<?> c) {
         try {
             if (c == Byte.TYPE || c == Byte.class) {
-                return new Byte((byte) getLong(Byte.MIN_VALUE, Byte.MAX_VALUE));
+                return new Byte((byte)getLong(Byte.MIN_VALUE, Byte.MAX_VALUE));
             }
             if (c == Short.TYPE || c == Short.class) {
-                return new Short((short) getLong(Short.MIN_VALUE, Short.MAX_VALUE));
+                return new Short((short)getLong(Short.MIN_VALUE, Short.MAX_VALUE));
             }
             if (c == Integer.TYPE || c == Integer.class) {
-                return new Integer((int) getLong(Integer.MIN_VALUE, Integer.MAX_VALUE));
+                return new Integer((int)getLong(Integer.MIN_VALUE, Integer.MAX_VALUE));
             }
             if (c == Long.TYPE || c == Long.class) {
                 return new Long(getLong(Long.MIN_VALUE, Long.MAX_VALUE));
@@ -307,7 +310,7 @@
                 return __float__().__tojava__(c);
             }
             if (c == BigInteger.class || c == Number.class || c == Object.class
-                || c == Serializable.class) {
+                    || c == Serializable.class) {
                 return getValue();
             }
         } catch (PyException e) {
@@ -340,14 +343,14 @@
     }
 
     /**
-     * Coercion logic for long. Implemented as a final method to avoid
-     * invocation of virtual methods from the exposed coerce.
+     * Coercion logic for long. Implemented as a final method to avoid invocation of virtual methods
+     * from the exposed coerce.
      */
     final Object long___coerce_ex__(PyObject other) {
         if (other instanceof PyLong) {
             return other;
         } else if (other instanceof PyInteger) {
-            return Py.newLong(((PyInteger) other).getValue());
+            return Py.newLong(((PyInteger)other).getValue());
         } else {
             return Py.None;
         }
@@ -359,9 +362,9 @@
 
     private static final BigInteger coerce(PyObject other) {
         if (other instanceof PyLong) {
-            return ((PyLong) other).getValue();
+            return ((PyLong)other).getValue();
         } else if (other instanceof PyInteger) {
-            return BigInteger.valueOf(((PyInteger) other).getValue());
+            return BigInteger.valueOf(((PyInteger)other).getValue());
         } else {
             throw Py.TypeError("xxx");
         }
@@ -421,7 +424,7 @@
     @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___mul___doc)
     final PyObject long___mul__(PyObject right) {
         if (right instanceof PySequence) {
-            return ((PySequence) right).repeat(coerceInt(this));
+            return ((PySequence)right).repeat(coerceInt(this));
         }
 
         if (!canCoerce(right)) {
@@ -438,7 +441,7 @@
     @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rmul___doc)
     final PyObject long___rmul__(PyObject left) {
         if (left instanceof PySequence) {
-            return ((PySequence) left).repeat(coerceInt(this));
+            return ((PySequence)left).repeat(coerceInt(this));
         }
         if (!canCoerce(left)) {
             return null;
@@ -479,7 +482,7 @@
         if (Options.division_warning > 0) {
             Py.warning(Py.DeprecationWarning, "classic long division");
         }
-        return Py.newLong(divide( getValue(), coerce(right)));
+        return Py.newLong(divide(getValue(), coerce(right)));
     }
 
     @Override
@@ -508,7 +511,7 @@
         if (!canCoerce(right)) {
             return null;
         }
-        return Py.newLong(divide( getValue(), coerce(right)));
+        return Py.newLong(divide(getValue(), coerce(right)));
     }
 
     @Override
@@ -564,7 +567,7 @@
         if (!canCoerce(right)) {
             return null;
         }
-        return true_divide( this.getValue(), coerce(right));
+        return true_divide(this.getValue(), coerce(right));
     }
 
     @Override
@@ -595,7 +598,7 @@
             return null;
         }
         BigInteger rightv = coerce(right);
-        return Py.newLong(modulo(getValue(),rightv, divide(getValue(),rightv)));
+        return Py.newLong(modulo(getValue(), rightv, divide(getValue(), rightv)));
     }
 
     @Override
@@ -624,8 +627,8 @@
         }
         BigInteger rightv = coerce(right);
 
-        BigInteger xdivy = divide(getValue(),rightv);
-        return new PyTuple(Py.newLong(xdivy), Py.newLong(modulo(getValue(),rightv, xdivy)));
+        BigInteger xdivy = divide(getValue(), rightv);
+        return new PyTuple(Py.newLong(xdivy), Py.newLong(modulo(getValue(), rightv, xdivy)));
     }
 
     @Override
@@ -650,7 +653,7 @@
     }
 
     @ExposedMethod(type = MethodType.BINARY, defaults = {"null"},
-                   doc = BuiltinDocs.long___pow___doc)
+            doc = BuiltinDocs.long___pow___doc)
     final PyObject long___pow__(PyObject right, PyObject modulo) {
         if (!canCoerce(right)) {
             return null;
@@ -659,7 +662,7 @@
         if (modulo != null && !canCoerce(right)) {
             return null;
         }
-        return _pow( getValue(), coerce(right), modulo, this, right);
+        return _pow(getValue(), coerce(right), modulo, this, right);
     }
 
     @Override
@@ -676,7 +679,7 @@
     }
 
     public static PyObject _pow(BigInteger value, BigInteger y, PyObject modulo, PyObject left,
-                                PyObject right) {
+            PyObject right) {
         if (y.compareTo(BigInteger.ZERO) < 0) {
             if (value.compareTo(BigInteger.ZERO) != 0) {
                 return left.__float__().__pow__(right, modulo);
@@ -700,32 +703,32 @@
             }
 
             if (z.compareTo(BigInteger.valueOf(0)) <= 0) {
-                // Handle negative modulo's specially
-                /*if (z.compareTo(BigInteger.valueOf(0)) == 0) {
-                    throw Py.ValueError("pow(x, y, z) with z == 0");
-                }*/
+                // Handle negative modulo specially
+                // if (z.compareTo(BigInteger.valueOf(0)) == 0) {
+                // throw Py.ValueError("pow(x, y, z) with z == 0");
+                // }
                 y = value.modPow(y, z.negate());
                 if (y.compareTo(BigInteger.valueOf(0)) > 0) {
                     return Py.newLong(z.add(y));
                 } else {
                     return Py.newLong(y);
                 }
-                //return __pow__(right).__mod__(modulo);
+                // return __pow__(right).__mod__(modulo);
             } else {
                 // XXX: 1.1 no longer supported so review this.
                 // This is buggy in SUN's jdk1.1.5
                 // Extra __mod__ improves things slightly
                 return Py.newLong(value.modPow(y, z));
-                //return __pow__(right).__mod__(modulo);
+                // return __pow__(right).__mod__(modulo);
             }
         }
     }
 
     private static final int coerceInt(PyObject other) {
         if (other instanceof PyLong) {
-            return ((PyLong) other).asInt();
+            return ((PyLong)other).asInt();
         } else if (other instanceof PyInteger) {
-            return ((PyInteger) other).getValue();
+            return ((PyInteger)other).getValue();
         } else {
             throw Py.TypeError("xxx");
         }
@@ -915,7 +918,8 @@
 
     @ExposedMethod(doc = BuiltinDocs.long___int___doc)
     final PyObject long___int__() {
-        if (getValue().compareTo(PyInteger.MAX_INT) <= 0 && getValue().compareTo(PyInteger.MIN_INT) >= 0) {
+        if (getValue().compareTo(PyInteger.MAX_INT) <= 0
+                && getValue().compareTo(PyInteger.MIN_INT) >= 0) {
             return Py.newInteger(getValue().intValue());
         }
         return long___long__();
@@ -977,14 +981,8 @@
 
     @ExposedMethod(doc = BuiltinDocs.long___oct___doc)
     final PyString long___oct__() {
-        String s = PyInteger.toOctString(getValue());
-        if (s.startsWith("-")) {
-            return new PyString("-0" + s.substring(1, s.length()) + "L");
-        } else if (s.startsWith("0")) {
-            return new PyString(s + "L");
-        } else {
-            return new PyString("0" + s + "L");
-        }
+        // Use the prepared format specifier for octal.
+        return formatImpl(IntegerFormatter.OCT);
     }
 
     @Override
@@ -994,12 +992,21 @@
 
     @ExposedMethod(doc = BuiltinDocs.long___hex___doc)
     final PyString long___hex__() {
-        String s = PyInteger.toHexString(getValue());
-        if (s.startsWith("-")) {
-            return new PyString("-0x" + s.substring(1, s.length()) + "L");
-        } else {
-            return new PyString("0x" + s + "L");
-        }
+        // Use the prepared format specifier for hexadecimal.
+        return formatImpl(IntegerFormatter.HEX);
+    }
+
+    /**
+     * Common code used by the number-base conversion method __oct__ and __hex__.
+     *
+     * @param spec prepared format-specifier.
+     * @return converted value of this object
+     */
+    private PyString formatImpl(Spec spec) {
+        // Traditional formatter (%-format) because #o means "-0123" not "-0o123".
+        IntegerFormatter f = new IntegerFormatter.Traditional(spec);
+        f.format(value).append('L');
+        return new PyString(f.getResult());
     }
 
     @ExposedMethod(doc = BuiltinDocs.long___str___doc)
@@ -1058,7 +1065,38 @@
 
     @ExposedMethod(doc = BuiltinDocs.long___format___doc)
     final PyObject long___format__(PyObject formatSpec) {
-        return PyInteger.formatImpl(getValue(), formatSpec);
+
+        // Parse the specification
+        Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+        InternalFormat.Formatter f;
+
+        // Try to make an integer formatter from the specification
+        IntegerFormatter fi = PyInteger.prepareFormatter(spec);
+        if (fi != null) {
+            // Bytes mode if formatSpec argument is not unicode.
+            fi.setBytes(!(formatSpec instanceof PyUnicode));
+            // Convert as per specification.
+            fi.format(value);
+            f = fi;
+
+        } else {
+            // Try to make a float formatter from the specification
+            FloatFormatter ff = PyFloat.prepareFormatter(spec);
+            if (ff != null) {
+                // Bytes mode if formatSpec argument is not unicode.
+                ff.setBytes(!(formatSpec instanceof PyUnicode));
+                // Convert as per specification.
+                ff.format(value.doubleValue());
+                f = ff;
+
+            } else {
+                // The type code was not recognised in either prepareFormatter
+                throw Formatter.unknownFormat(spec.type, "integer");
+            }
+        }
+
+        // Return a result that has the same type (str or unicode) as the formatSpec argument.
+        return f.pad().getPyResult();
     }
 
     @Override
@@ -1076,7 +1114,7 @@
             }
             return tooLow ? Integer.MIN_VALUE : Integer.MAX_VALUE;
         }
-        return (int) getValue().longValue();
+        return (int)getValue().longValue();
     }
 
     @Override
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -9,10 +9,12 @@
 import org.python.core.buffer.SimpleStringBuffer;
 import org.python.core.stringlib.FieldNameIterator;
 import org.python.core.stringlib.FloatFormatter;
+import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
 import org.python.core.stringlib.InternalFormat.Spec;
-import org.python.core.stringlib.InternalFormatSpec;
-import org.python.core.stringlib.InternalFormatSpecParser;
 import org.python.core.stringlib.MarkupIterator;
+import org.python.core.stringlib.TextFormatter;
 import org.python.core.util.StringUtil;
 import org.python.expose.ExposedMethod;
 import org.python.expose.ExposedNew;
@@ -3897,50 +3899,68 @@
 
     @ExposedMethod(doc = BuiltinDocs.str___format___doc)
     final PyObject str___format__(PyObject formatSpec) {
-        if (!(formatSpec instanceof PyString)) {
-            throw Py.TypeError("__format__ requires str or unicode");
+
+        // Parse the specification
+        Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+
+        // Get a formatter for the specification
+        TextFormatter f = prepareFormatter(spec);
+        if (f == null) {
+            // The type code was not recognised
+            throw Formatter.unknownFormat(spec.type, "string");
         }
 
-        PyString formatSpecStr = (PyString)formatSpec;
-        String result;
-        try {
-            String specString = formatSpecStr.getString();
-            InternalFormatSpec spec = new InternalFormatSpecParser(specString).parse();
-            result = formatString(getString(), spec);
-        } catch (IllegalArgumentException e) {
-            throw Py.ValueError(e.getMessage());
+        // Bytes mode if neither this nor formatSpec argument is Unicode.
+        boolean unicode = this instanceof PyUnicode || formatSpec instanceof PyUnicode;
+        f.setBytes(!unicode);
+
+        // Convert as per specification.
+        f.format(getString());
+
+        // Return a result that has the same type (str or unicode) as the formatSpec argument.
+        return f.pad().getPyResult();
+    }
+
+    /**
+     * Common code for {@link PyString} and {@link PyUnicode} to prepare a {@link TextFormatter}
+     * from a parsed specification. The object returned has format method
+     * {@link TextFormatter#format(String)} that treats its argument as UTF-16 encoded unicode (not
+     * just <code>char</code>s). That method will format its argument ( <code>str</code> or
+     * <code>unicode</code>) according to the PEP 3101 formatting specification supplied here. This
+     * would be used during <code>text.__format__(".5s")</code> or
+     * <code>"{:.5s}".format(text)</code> where <code>text</code> is this Python string.
+     *
+     * @param spec a parsed PEP-3101 format specification.
+     * @return a formatter ready to use, or null if the type is not a string format type.
+     * @throws PyException(ValueError) if the specification is faulty.
+     */
+    @SuppressWarnings("fallthrough")
+    static TextFormatter prepareFormatter(Spec spec) throws PyException {
+        // Slight differences between format types
+        switch (spec.type) {
+
+            case Spec.NONE:
+            case 's':
+                // Check for disallowed parts of the specification
+                if (spec.grouping) {
+                    throw Formatter.notAllowed("Grouping", "string", spec.type);
+                } else if (Spec.specified(spec.sign)) {
+                    throw Formatter.signNotAllowed("string", '\0');
+                } else if (spec.alternate) {
+                    throw Formatter.alternateFormNotAllowed("string");
+                } else if (spec.align == '=') {
+                    throw Formatter.alignmentNotAllowed('=', "string");
+                }
+                // spec may be incomplete. The defaults are those commonly used for string formats.
+                spec = spec.withDefaults(Spec.STRING);
+                // Get a formatter for the specification
+                return new TextFormatter(spec);
+
+            default:
+                // The type code was not recognised
+                return null;
         }
-        return formatSpecStr.createInstance(result);
-    }
-
-    /**
-     * Format the given text according to a parsed PEP 3101 formatting specification, as during
-     * <code>text.__format__(format_spec)</code> or <code>"{:s}".format(text)</code> where
-     * <code>text</code> is a Python string.
-     *
-     * @param text to format
-     * @param spec the parsed PEP 3101 formatting specification
-     * @return the result of the formatting
-     */
-    public static String formatString(String text, InternalFormatSpec spec) {
-        if (spec.sign != '\0') {
-            throw new IllegalArgumentException("Sign not allowed in string format specifier");
-        }
-        if (spec.alternate) {
-            throw new IllegalArgumentException(
-                    "Alternate form (#) not allowed in string format specifier");
-        }
-        if (spec.align == '=') {
-            throw new IllegalArgumentException(
-                    "'=' alignment not allowed in string format specifier");
-        }
-        if (spec.precision >= 0 && text.length() > spec.precision) {
-            text = text.substring(0, spec.precision);
-        }
-        return spec.pad(text, '<', 0);
-    }
-
-    /* arguments' conversion helper */
+    }
 
     @Override
     public String asString(int index) throws PyObject.ConversionException {
@@ -4005,10 +4025,6 @@
     String format;
     /** Where the output is built. */
     StringBuilder buffer;
-    /** Remembers that the value currently converted is negative */
-    boolean negative;
-    /** Precision from format specification. */
-    int precision;
     /**
      * Index into args of argument currently being worked, or special values indicating -1: a single
      * item that has not yet been used, -2: a single item that has already been used, -3: a mapping.
@@ -4017,7 +4033,7 @@
     /** Arguments supplied to {@link #format(PyObject)} method. */
     PyObject args;
     /** Indicate a <code>PyUnicode</code> result is expected. */
-    boolean unicodeCoercion;
+    boolean needUnicode;
 
     final char pop() {
         try {
@@ -4053,7 +4069,7 @@
     public StringFormatter(String format, boolean unicodeCoercion) {
         index = 0;
         this.format = format;
-        this.unicodeCoercion = unicodeCoercion;
+        this.needUnicode = unicodeCoercion;
         buffer = new StringBuilder(format.length() + 100);
     }
 
@@ -4106,211 +4122,129 @@
         }
     }
 
-    private void checkPrecision(String type) {
-        if (precision > 250) {
-            // A magic number. Larger than in CPython.
-            throw Py.OverflowError("formatted " + type + " is too long (precision too long?)");
-        }
-
-    }
-
     /**
-     * Format the argument interpreted as a long, using the argument's <code>__str__</code>,
-     * <code>__oct__</code>, or <code>__hex__</code> method according to <code>type</code>. If v is
-     * being treated as signed, the sign of v is transferred to {@link #negative} and the absolute
-     * value is converted. The <code>altFlag</code> argument controls the appearance of a "0x" or
-     * "0X" prefix in the hex case, or a "0" prefix in the octal case. The hexadecimal case, the
-     * case of characters and digits will match the type ('x' meaning lowercase, 'X' meaning
-     * uppercase).
+     * Return the argument as either a {@link PyInteger} or a {@link PyLong} according to its
+     * <code>__int__</code> method, or its <code>__long__</code> method. If the argument has neither
+     * method, or both raise an exception, we return the argument itself. The caller must check the
+     * return type.
      *
      * @param arg to convert
-     * @param type one of 'o' for octal, 'x' or 'X' for hex, anything else calls
-     *            <code>arg.__str__</code>.
-     * @param altFlag if true there will be a prefix
-     * @return converted value as <code>String</code>
+     * @return PyInteger or PyLong if possible
      */
-    private String formatLong(PyObject arg, char type, boolean altFlag) {
-        // Convert using the appropriate type
-        // XXX Results in behaviour divergent from CPython when any of the methods is overridden.
-        PyString argAsString;
-        switch (type) {
-            case 'o':
-                argAsString = arg.__oct__();
-                break;
-            case 'x':
-            case 'X':
-                argAsString = arg.__hex__();
-                break;
-            default:
-                argAsString = arg.__str__();
-                break;
-        }
-
-        checkPrecision("long");
-        String s = argAsString.toString();
-        int end = s.length();
-        int ptr = 0;
-
-        // In the hex case, the __hex__ return starts 0x
-        // XXX (we assume, perhaps falsely)
-        int numnondigits = 0;
-        if (type == 'x' || type == 'X') {
-            numnondigits = 2;
-        }
-
-        // Strip a "long" indicator
-        if (s.endsWith("L")) {
-            end--;
-        }
-
-        // Strip a possible sign to member negative
-        negative = s.charAt(0) == '-';
-        if (negative) {
-            ptr++;
-        }
-
-        // The formatted number is s[ptr:end] and starts with numnondigits non-digits.
-        int numdigits = end - numnondigits - ptr;
-        if (!altFlag) {
-            // We should have no "base tag" '0' or "0x" on the front.
-            switch (type) {
-                case 'o':
-                    // Strip the '0'
-                    if (numdigits > 1) {
-                        ++ptr;
-                        --numdigits;
-                    }
-                    break;
-                case 'x':
-                case 'X':
-                    // Strip the "0x"
-                    ptr += 2;
-                    numnondigits -= 2;
-                    break;
-            }
-        }
-
-        // If necessary, add leading zeros to the numerical digits part.
-        if (precision > numdigits) {
-            // Recompose the formatted number in this buffer
-            StringBuilder buf = new StringBuilder();
-            // The base indicator prefix
-            for (int i = 0; i < numnondigits; ++i) {
-                buf.append(s.charAt(ptr++));
-            }
-            // The extra zeros
-            for (int i = 0; i < precision - numdigits; i++) {
-                buf.append('0');
-            }
-            // The previously known digits
-            for (int i = 0; i < numdigits; i++) {
-                buf.append(s.charAt(ptr++));
-            }
-            s = buf.toString();
-        } else if (end < s.length() || ptr > 0) {
-            // It's only necessary to extract the formatted number from s
-            s = s.substring(ptr, end);
-        }
-
-        // And finally, deal with the case, so it matches x or X.
-        switch (type) {
-            case 'X':
-                s = s.toUpperCase();
-                break;
-        }
-        return s;
-    }
-
-    /**
-     * Formats arg as an integer, with the specified radix. The integer value is obtained from the
-     * result of <code>arg.__int__()</code>. <code>type</code> and <code>altFlag</code> are passed
-     * to {@link #formatLong(PyObject, char, boolean)} in case the result is a PyLong.
-     *
-     * @param arg to convert
-     * @param radix in which to express <code>arg</code>
-     * @param unsigned true if required to interpret a 32-bit integer as unsigned ('u' legacy?).
-     * @param type of conversion ('d', 'o', 'x', or 'X')
-     * @param altFlag '#' present in format (causes "0x" prefix in hex, and '0' prefix in octal)
-     * @return string form of the value
-     */
-    private String formatInteger(PyObject arg, int radix, boolean unsigned, char type,
-            boolean altFlag) {
-        PyObject argAsInt;
+    private PyObject asNumber(PyObject arg) {
         if (arg instanceof PyInteger || arg instanceof PyLong) {
-            argAsInt = arg;
+            // arg is already acceptable
+            return arg;
+
         } else {
-            // use __int__ to get an int (or long)
-            if (arg instanceof PyFloat) {
-                // safe to call __int__:
-                argAsInt = arg.__int__();
+            // use __int__ or __long__to get an int (or long)
+            if (arg.getClass() == PyFloat.class) {
+                // A common case where it is safe to return arg.__int__()
+                return arg.__int__();
+
             } else {
-                // We can't simply call arg.__int__() because PyString implements
-                // it without exposing it to python (i.e, str instances has no
-                // __int__ attribute). So, we would support strings as arguments
-                // for %d format, which is forbidden by CPython tests (on
-                // test_format.py).
+                /*
+                 * In general, we can't simply call arg.__int__() because PyString implements it
+                 * without exposing it to python (str has no __int__). This would make str
+                 * acceptacle to integer format specifiers, which is forbidden by CPython tests
+                 * (test_format.py). PyString implements __int__ perhaps only to help the int
+                 * constructor. Maybe that was a bad idea?
+                 */
                 try {
-                    argAsInt = arg.__getattr__("__int__").__call__();
+                    // Result is the result of arg.__int__() if that works
+                    return arg.__getattr__("__int__").__call__();
                 } catch (PyException e) {
-                    // XXX: Swallow custom AttributeError throws from __int__ methods
-                    // No better alternative for the moment
-                    if (e.match(Py.AttributeError)) {
-                        throw Py.TypeError("int argument required");
-                    }
-                    throw e;
+                    // Swallow the exception
+                }
+
+                // Try again with arg.__long__()
+                try {
+                    // Result is the result of arg.__long__() if that works
+                    return arg.__getattr__("__long__").__call__();
+                } catch (PyException e) {
+                    // No __long__ defined (at Python level)
+                    return arg;
                 }
             }
         }
-        if (argAsInt instanceof PyInteger) {
-            // This call does not provide the prefix and will be lowercase.
-            return formatInteger(((PyInteger)argAsInt).getValue(), radix, unsigned);
-        } else { // must be a PyLong (as per __int__ contract)
-            // This call provides the base prefix and case-matches with 'x' or 'X'.
-            return formatLong(argAsInt, type, altFlag);
-        }
     }
 
     /**
-     * Convert a 32-bit integer (as from a {@link PyInteger}) to characters, signed or unsigned. The
-     * values is presented in a <code>long</code>. The string result is left-padded with zeros to
-     * the stated {@link #precision}. If v is being treated as signed, the sign of v is transferred
-     * to {@link #negative} and the absolute value is converted. Otherwise (unsigned case)
-     * <code>0x100000000L + v</code> is converted. This method does not provide the '0' or "0x"
-     * prefix, just the padded digit string.
+     * Return the argument as a {@link PyFloat} according to its <code>__float__</code> method. If
+     * the argument has no such method, or it raises an exception, we return the argument itself.
+     * The caller must check the return type.
      *
-     * @param v value to convert
-     * @param radix of conversion
-     * @param unsigned if should be treated as unsigned
-     * @return string form
+     * @param arg to convert
+     * @return PyFloat if possible
      */
-    private String formatInteger(long v, int radix, boolean unsigned) {
-        checkPrecision("integer");
-        if (unsigned) {
-            // If the high bit was set, this will have been sign-extended: correct that.
-            if (v < 0) {
-                v = 0x100000000l + v;
-            }
+    private PyObject asFloat(PyObject arg) {
+
+        if (arg instanceof PyFloat) {
+            // arg is already acceptable
+            return arg;
+
         } else {
-            // If the high bit was set, the sign extension was correct, but we need sign + abs(v).
-            if (v < 0) {
-                negative = true;
-                v = -v;
+            // use __float__ to get a float.
+            if (arg.getClass() == PyFloat.class) {
+                // A common case where it is safe to return arg.__float__()
+                return arg.__float__();
+
+            } else {
+                /*
+                 * In general, we can't simply call arg.__float__() because PyString implements it
+                 * without exposing it to python (str has no __float__). This would make str
+                 * acceptacle to float format specifiers, which is forbidden by CPython tests
+                 * (test_format.py). PyString implements __float__ perhaps only to help the float
+                 * constructor. Maybe that was a bad idea?
+                 */
+                try {
+                    // Result is the result of arg.__float__() if that works
+                    return arg.__getattr__("__float__").__call__();
+                } catch (PyException e) {
+                    // No __float__ defined (at Python level)
+                    return arg;
+                }
             }
         }
-        // Use the method in java.lang.Long (lowercase, no prefix)
-        String s = Long.toString(v, radix);
-        // But zero pad to the requested precision
-        while (s.length() < precision) {
-            s = "0" + s;
-        }
-        return s;
-    }
-
-    private double asDouble(PyObject obj) {
-        try {
-            return obj.asDouble();
-        } catch (PyException pye) {
-            throw !pye.match(Py.TypeError) ? pye : Py.TypeError("float argument required");
+    }
+
+    /**
+     * Return the argument as either a {@link PyString} or a {@link PyUnicode}, and set the
+     * {@link #needUnicode} member accordingly. If we already know we are building a Unicode string
+     * (<code>needUnicode==true</code>), then any argument that is not already a
+     * <code>PyUnicode</code> will be converted by calling its <code>__unicode__</code> method.
+     * Conversely, if we are not yet building a Unicode string (<code>needUnicode==false</code> ),
+     * then a PyString will pass unchanged, a <code>PyUnicode</code> will switch us to Unicode mode
+     * (<code>needUnicode=true</code>), and any other type will be converted by calling its
+     * <code>__str__</code> method, which will return a <code>PyString</code>, or possibly a
+     * <code>PyUnicode</code>, which will switch us to Unicode mode.
+     *
+     * @param arg to convert
+     * @return PyString or PyUnicode equivalent
+     */
+    private PyString asText(PyObject arg) {
+
+        if (arg instanceof PyUnicode) {
+            // arg is already acceptable.
+            needUnicode = true;
+            return (PyUnicode)arg;
+
+        } else if (needUnicode) {
+            // The string being built is unicode, so we need that version of the arg.
+            return arg.__unicode__();
+
+        } else if (arg instanceof PyString) {
+            // The string being built is not unicode, so arg is already acceptable.
+            return (PyString)arg;
+
+        } else {
+            // The string being built is not unicode, so use __str__ to get a PyString.
+            PyString s = arg.__str__();
+            // But __str__ might return PyUnicode, and we have to notice that.
+            if (s instanceof PyUnicode) {
+                needUnicode = true;
+            }
+            return s;
         }
     }
 
@@ -4325,7 +4259,7 @@
     public PyString format(PyObject args) {
         PyObject dict = null;
         this.args = args;
-        boolean needUnicode = unicodeCoercion;
+
         if (args instanceof PyTuple) {
             // We will simply work through the tuple elements
             argIndex = 0;
@@ -4341,16 +4275,6 @@
 
         while (index < format.length()) {
 
-            // Attributes to be parsed from the next format specifier
-            boolean ljustFlag = false;
-            boolean signFlag = false;
-            boolean blankFlag = false;
-            boolean altFlag = false;
-            boolean zeroFlag = false;
-
-            int width = -1;
-            precision = -1;
-
             // Read one character from the format string
             char c = pop();
             if (c != '%') {
@@ -4360,6 +4284,14 @@
 
             // It's a %, so the beginning of a conversion specifier. Parse it.
 
+            // Attributes to be parsed from the next format specifier
+            boolean altFlag = false;
+            char sign = Spec.NONE;
+            char fill = ' ';
+            char align = '>';
+            int width = Spec.UNSPECIFIED;
+            int precision = Spec.UNSPECIFIED;
+
             // A conversion specifier contains the following components, in this order:
             // + The '%' character, which marks the start of the specifier.
             // + Mapping key (optional), consisting of a parenthesised sequence of characters.
@@ -4399,19 +4331,22 @@
             while (true) {
                 switch (c = pop()) {
                     case '-':
-                        ljustFlag = true;
+                        align = '<';
                         continue;
                     case '+':
-                        signFlag = true;
+                        sign = '+';
                         continue;
                     case ' ':
-                        blankFlag = true;
+                        if (!Spec.specified(sign)) {
+                            // Blank sign only wins if '+' not specified.
+                            sign = ' ';
+                        }
                         continue;
                     case '#':
                         altFlag = true;
                         continue;
                     case '0':
-                        zeroFlag = true;
+                        fill = '0';
                         continue;
                 }
                 break;
@@ -4428,7 +4363,7 @@
             width = getNumber();
             if (width < 0) {
                 width = -width;
-                ljustFlag = true;
+                align = '<';
             }
 
             /*
@@ -4451,284 +4386,149 @@
                 c = pop();
             }
 
-            // c is now the conversion type.
-            if (c == '%') {
-                // It was just a percent sign after all
-                buffer.append(c);
-                continue;
+            /*
+             * As a function of the conversion type (currently in c) override some of the formatting
+             * flags we read from the format specification.
+             */
+            switch (c) {
+                case 's':
+                case 'r':
+                case 'c':
+                case '%':
+                    // These have string-like results: fill, if needed, is always blank.
+                    fill = ' ';
+                    break;
+
+                default:
+                    if (fill == '0' && align == '>') {
+                        // Zero-fill comes after the sign in right-justification.
+                        align = '=';
+                    } else {
+                        // If left-justifying, the fill is always blank.
+                        fill = ' ';
+                    }
             }
 
             /*
+             * Encode as an InternalFormat.Spec. The values in the constructor always have specified
+             * values, except for sign, width and precision.
+             */
+            Spec spec = new Spec(fill, align, sign, altFlag, width, false, precision, c);
+
+            /*
              * Process argument according to format specification decoded from the string. It is
-             * important we don't read the argumnent from the list until this point because of the
+             * important we don't read the argument from the list until this point because of the
              * possibility that width and precision were specified via the argument list.
              */
-            PyObject arg = getarg();
-            String string = null;
-            negative = false;
-
-            // Independent of type, decide the padding character based on decoded flags.
-            char fill = ' ';
-            if (zeroFlag) {
-                fill = '0';
-            } else {
-                fill = ' ';
-            }
-
-            // Perform the type-specific formatting
-            switch (c) {
-
-                case 's':
-                    // String (converts any Python object using str()).
-                    if (arg instanceof PyUnicode) {
-                        needUnicode = true;
+
+            // Depending on the type of conversion, we use one of these formatters:
+            FloatFormatter ff;
+            IntegerFormatter fi;
+            TextFormatter ft;
+            Formatter f; // = ff, fi or ft, whichever we actually use.
+
+            switch (spec.type) {
+
+                case 's': // String: converts any object using __str__(), __unicode__() ...
+                case 'r': // ... or repr().
+                    PyObject arg = getarg();
+
+                    // Get hold of the actual object to display (may set needUnicode)
+                    PyString argAsString = asText(spec.type == 's' ? arg : arg.__repr__());
+                    // Format the str/unicode form of the argument using this Spec.
+                    f = ft = new TextFormatter(buffer, spec);
+                    ft.setBytes(!needUnicode);
+                    ft.format(argAsString.getString());
+                    break;
+
+                case 'd': // All integer formats (+case for X).
+                case 'o':
+                case 'x':
+                case 'X':
+                case 'c': // Single character (accepts integer or single character string).
+                case 'u': // Obsolete type identical to 'd'.
+                case 'i': // Compatibility with scanf().
+
+                    // Format the argument using this Spec.
+                    f = fi = new IntegerFormatter.Traditional(buffer, spec);
+                    // If not producing PyUnicode, disallow codes >255.
+                    fi.setBytes(!needUnicode);
+
+                    arg = getarg();
+
+                    if (arg instanceof PyString && spec.type == 'c') {
+                        if (arg.__len__() != 1) {
+                            throw Py.TypeError("%c requires int or char");
+                        } else {
+                            if (!needUnicode && arg instanceof PyUnicode) {
+                                // Change of mind forced by encountering unicode object.
+                                needUnicode = true;
+                                fi.setBytes(false);
+                            }
+                            fi.format(((PyString)arg).getString().codePointAt(0));
+                        }
+
+                    } else {
+                        // Note various types accepted here as long as they have an __int__ method.
+                        PyObject argAsNumber = asNumber(arg);
+
+                        // We have to check what we got back.
+                        if (argAsNumber instanceof PyInteger) {
+                            fi.format(((PyInteger)argAsNumber).getValue());
+                        } else if (argAsNumber instanceof PyLong) {
+                            fi.format(((PyLong)argAsNumber).getValue());
+                        } else {
+                            // It couldn't be converted, raise the error here
+                            throw Py.TypeError("%" + spec.type
+                                    + " format: a number is required, not "
+                                    + arg.getType().fastGetName());
+                        }
                     }
-                    // fall through ...
-
-                case 'r':
-                    // String (converts any Python object using repr()).
-                    fill = ' ';
-                    if (c == 's') {
-                        if (needUnicode) {
-                            string = arg.__unicode__().toString();
-                        } else {
-                            string = arg.__str__().toString();
-                        }
-                    } else {
-                        string = arg.__repr__().toString();
-                    }
-                    if (precision >= 0 && string.length() > precision) {
-                        string = string.substring(0, precision);
-                    }
 
                     break;
 
-                case 'i':
-                case 'd':
-                    // Signed integer decimal. Note floats accepted.
-                    if (arg instanceof PyLong) {
-                        string = formatLong(arg, c, altFlag);
-                    } else {
-                        string = formatInteger(arg, 10, false, c, altFlag);
-                    }
-                    break;
-
-                case 'u':
-                    // Obsolete type – it is identical to 'd'. (Why not identical here?)
-                    if (arg instanceof PyLong) {
-                        string = formatLong(arg, c, altFlag);
-                    } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
-                        string = formatInteger(arg, 10, false, c, altFlag);
-                    } else {
-                        throw Py.TypeError("int argument required");
-                    }
-                    break;
-
-                case 'o':
-                    // Signed octal value. Note floats accepted.
-                    if (arg instanceof PyLong) {
-                        // This call provides the base prefix '0' if altFlag.
-                        string = formatLong(arg, c, altFlag);
-                    } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
-                        // This call does not provide the '0' prefix and will be lowercase ...
-                        // ... except where arg.__int__ returns PyLong, then it's like formatLong.
-                        string = formatInteger(arg, 8, false, c, altFlag);
-                        if (altFlag && string.charAt(0) != '0') {
-                            string = "0" + string;
-                        }
-                    } else {
-                        throw Py.TypeError("int argument required");
-                    }
-                    break;
-
-                case 'x':
-                    // Signed hexadecimal (lowercase). Note floats accepted.
-                    if (arg instanceof PyLong) {
-                        // This call provides the base prefix "0x" if altFlag and case-matches c.
-                        string = formatLong(arg, c, altFlag);
-                    } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
-                        // This call does not provide the "0x" prefix and will be lowercase.
-                        // ... except where arg.__int__ returns PyLong, then it's like formatLong.
-                        string = formatInteger(arg, 16, false, c, altFlag);
-                        string = string.toLowerCase();
-                        if (altFlag) {
-                            string = "0x" + string;
-                        }
-                    } else {
-                        throw Py.TypeError("int argument required");
-                    }
-                    break;
-
-                case 'X':
-                    // Signed hexadecimal (uppercase). Note floats accepted.
-                    if (arg instanceof PyLong) {
-                        // This call provides the base prefix "0x" if altFlag and case-matches c.
-                        string = formatLong(arg, c, altFlag);
-                    } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
-                        // This call does not provide the "0x" prefix and will be lowercase.
-                        // ... except where arg.__int__ returns PyLong, then it's like formatLong.
-                        string = formatInteger(arg, 16, false, c, altFlag);
-                        string = string.toUpperCase();
-                        if (altFlag) {
-                            string = "0X" + string;
-                        }
-                    } else {
-                        throw Py.TypeError("int argument required");
-                    }
-                    break;
-
-                case 'e':
+                case 'e': // All floating point formats (+case).
                 case 'E':
                 case 'f':
                 case 'F':
                 case 'g':
                 case 'G':
-                    // All floating point formats (+case).
-
-                    // Convert the flags (local variables) to the form needed in the Spec object.
-                    char align = ljustFlag ? '<' : '>';
-                    char sign = signFlag ? '+' : (blankFlag ? ' ' : Spec.NONE);
-                    int w = Spec.UNSPECIFIED;
-                    Spec spec = new Spec(fill, align, sign, altFlag, w, false, precision, c);
 
                     // Format using this Spec the double form of the argument.
-                    FloatFormatter f = new FloatFormatter(spec);
-                    double v = asDouble(arg);
-                    f.format(v);
-                    string = f.getResult();
-
-                    // Suppress subsequent attempts to insert a correct sign, done already.
-                    signFlag = blankFlag = negative = false;
+                    f = ff = new FloatFormatter(buffer, spec);
+                    ff.setBytes(!needUnicode);
+
+                    // Note various types accepted here as long as they have a __float__ method.
+                    arg = getarg();
+                    PyObject argAsFloat = asFloat(arg);
+
+                    // We have to check what we got back..
+                    if (argAsFloat instanceof PyFloat) {
+                        ff.format(((PyFloat)argAsFloat).getValue());
+                    } else {
+                        // It couldn't be converted, raise the error here
+                        throw Py.TypeError("float argument required, not "
+                                + arg.getType().fastGetName());
+                    }
+
                     break;
 
-                case 'c':
-                    // Single character (accepts integer or single character string).
-                    fill = ' ';
-                    if (arg instanceof PyString) {
-                        string = ((PyString)arg).toString();
-                        if (string.length() != 1) {
-                            throw Py.TypeError("%c requires int or char");
-                        }
-                        if (arg instanceof PyUnicode) {
-                            needUnicode = true;
-                        }
-                        break;
-                    }
-
-                    // arg is not a str (or unicode)
-                    int val;
-                    try {
-                        // Explicitly __int__ so we can look for an AttributeError (which is
-                        // less invasive to mask than a TypeError)
-                        val = arg.__int__().asInt();
-                    } catch (PyException e) {
-                        if (e.match(Py.AttributeError)) {
-                            throw Py.TypeError("%c requires int or char");
-                        }
-                        throw e;
-                    }
-                    // Range check, according to ultimate type of result as presentl;y known.
-                    if (!needUnicode) {
-                        if (val < 0) {
-                            throw Py.OverflowError("unsigned byte integer is less than minimum");
-                        } else if (val > 255) {
-                            throw Py.OverflowError("unsigned byte integer is greater than maximum");
-                        }
-                    } else if (val < 0 || val > PySystemState.maxunicode) {
-                        throw Py.OverflowError("%c arg not in range(0x110000) (wide Python build)");
-                    }
-                    string = new String(new int[] {val}, 0, 1);
+                case '%': // Percent symbol, but surprisingly, padded.
+
+                    // We use an integer formatter.
+                    f = fi = new IntegerFormatter.Traditional(buffer, spec);
+                    fi.setBytes(!needUnicode);
+                    fi.format('%');
                     break;
 
                 default:
                     throw Py.ValueError("unsupported format character '"
-                            + codecs.encode(Py.newString(c), null, "replace") + "' (0x"
-                            + Integer.toHexString(c) + ") at index " + (index - 1));
+                            + codecs.encode(Py.newString(spec.type), null, "replace") + "' (0x"
+                            + Integer.toHexString(spec.type) + ") at index " + (index - 1));
             }
 
-            /*
-             * We have now dealt with the translation of the (absolute value of the) argument, in
-             * variable string[]. In the next sections we deal with sign, padding and base prefix.
-             */
-            int length = string.length();
-            int skip = 0;
-
-            // Decide how to represent the sign according to format and actual sign of argument.
-            String signString = null;
-            if (negative) {
-                signString = "-";
-            } else {
-                if (signFlag) {
-                    signString = "+";
-                } else if (blankFlag) {
-                    signString = " ";
-                }
-            }
-
-            // The width (from here on) will be the remaining width on the line.
-            if (width < length) {
-                width = length;
-            }
-
-            // Insert the sign in the buffer and adjust the width.
-            if (signString != null) {
-                if (fill != ' ') {
-                    // When the fill is not space, the sign comes before the fill.
-                    buffer.append(signString);
-                }
-                // Adjust width for sign.
-                if (width > length) {
-                    width--;
-                }
-            }
-
-            // Insert base prefix used with alternate mode for hexadecimal.
-            if (altFlag && (c == 'x' || c == 'X')) {
-                if (fill != ' ') {
-                    // When the fill is not space, this base prefix comes before the fill.
-                    buffer.append('0');
-                    buffer.append(c);
-                    skip += 2;
-                }
-                // Adjust width for base prefix.
-                width -= 2;
-                if (width < 0) {
-                    width = 0;
-                }
-                length -= 2;
-            }
-
-            // Fill on the left of the item.
-            if (width > length && !ljustFlag) {
-                do {
-                    buffer.append(fill);
-                } while (--width > length);
-            }
-
-            // If the fill is spaces, we will have deferred the sign and hex base prefix
-            if (fill == ' ') {
-                if (signString != null) {
-                    buffer.append(signString);
-                }
-                if (altFlag && (c == 'x' || c == 'X')) {
-                    buffer.append('0');
-                    buffer.append(c);
-                    skip += 2;
-                }
-            }
-
-            // Now append the converted argument.
-            if (skip > 0) {
-                // The string contains a hex-prefix, but we have already inserted one.
-                buffer.append(string.substring(skip));
-            } else {
-                buffer.append(string);
-            }
-
-            // If this hasn't filled the space required, add right-padding.
-            while (--width >= length) {
-                buffer.append(' ');
-            }
+            // Pad the result as specified (in-place, in the buffer).
+            f.pad();
         }
 
         /*
@@ -4743,10 +4543,7 @@
         }
 
         // Return the final buffer contents as a str or unicode as appropriate.
-        if (needUnicode) {
-            return new PyUnicode(buffer);
-        }
-        return new PyString(buffer);
+        return needUnicode ? new PyUnicode(buffer) : new PyString(buffer);
     }
 
 }
diff --git a/src/org/python/core/__builtin__.java b/src/org/python/core/__builtin__.java
--- a/src/org/python/core/__builtin__.java
+++ b/src/org/python/core/__builtin__.java
@@ -4,20 +4,20 @@
  */
 package org.python.core;
 
-import java.io.EOFException;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.nio.ByteBuffer;
 import java.util.Iterator;
 import java.util.Map;
 
 import org.python.antlr.base.mod;
+import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Spec;
 import org.python.core.util.ExtraMath;
 import org.python.core.util.RelativeFile;
-import org.python.core.util.StringUtil;
 import org.python.modules._functools._functools;
 
 class BuiltinFunctions extends PyBuiltinFunctionSet {
@@ -768,7 +768,7 @@
     /**
      * Built-in Python function ord() applicable to the string-like types <code>str</code>,
      * <code>bytearray</code>, <code>unicode</code>.
-     * 
+     *
      * @param c string-like object of length 1
      * @return ordinal value of character or byte value in
      * @throws PyException (TypeError) if not a string-like type
@@ -1245,10 +1245,10 @@
         PyObject[] args;
         if (level < 0) {
         	// for backward compatibility provide only 4 arguments
-        	args = new PyObject[] {Py.newString(name), globals, locals, 
+        	args = new PyObject[] {Py.newString(name), globals, locals,
         			fromlist};
         } else {
-        	args = new PyObject[] {Py.newString(name), globals, locals, 
+        	args = new PyObject[] {Py.newString(name), globals, locals,
         			fromlist, Py.newInteger(level)};
         }
         PyObject module = __import__.__call__(args);
@@ -1469,7 +1469,7 @@
                 endObject = useUnicode ? Py.newUnicode(end) : Py.newString(end);
             }
 
-            out.print(values, sepObject, endObject); 
+            out.print(values, sepObject, endObject);
         }
         return Py.None;
     }
@@ -1774,10 +1774,6 @@
     public PyObject __call__(PyObject args[], String kwds[]) {
         ArgParser ap = new ArgParser("bin", args, kwds, new String[] {"number"}, 1);
         ap.noKeywords();
-        PyObject number = ap.getPyObject(0);
-
-        //XXX: this could be made more efficient by using a binary only formatter
-        //     instead of using generic formatting.
-        return number.__format__(new PyString("#b"));
+        return IntegerFormatter.bin(ap.getPyObject(0));
     }
 }
diff --git a/src/org/python/core/stringlib/FloatFormatter.java b/src/org/python/core/stringlib/FloatFormatter.java
--- a/src/org/python/core/stringlib/FloatFormatter.java
+++ b/src/org/python/core/stringlib/FloatFormatter.java
@@ -18,6 +18,10 @@
     /** The rounding mode dominant in the formatter. */
     static final RoundingMode ROUND_PY = RoundingMode.HALF_EVEN;
 
+    /** Limit the size of results. */
+    // No-one needs more than log(Double.MAX_VALUE) - log2(Double.MIN_VALUE) = 1383 digits.
+    static final int MAX_PRECISION = 1400;
+
     /** If it contains no decimal point, this length is zero, and 1 otherwise. */
     private int lenPoint;
     /** The length of the fractional part, right of the decimal point. */
@@ -30,26 +34,14 @@
     private int minFracDigits;
 
     /**
-     * Construct the formatter from a specification. A reference is held to this specification, but
-     * it will not be modified by the actions of this class.
+     * Construct the formatter from a client-supplied buffer, to which the result will be appended,
+     * and a specification. Sets {@link #mark} to the end of the buffer.
      *
+     * @param result destination buffer
      * @param spec parsed conversion specification
      */
-    public FloatFormatter(Spec spec) {
-        // Space for result is based on padded width, or precision, whole part & furniture.
-        this(spec, 1, 0);
-    }
-
-    /**
-     * Construct the formatter from a specification and an explicit initial buffer capacity. A
-     * reference is held to this specification, but it will not be modified by the actions of this
-     * class.
-     *
-     * @param spec parsed conversion specification
-     * @param width expected for the formatted result
-     */
-    public FloatFormatter(Spec spec, int width) {
-        super(spec, width);
+    public FloatFormatter(StringBuilder result, Spec spec) {
+        super(result, spec);
         if (spec.alternate) {
             // Alternate form means do not trim the zero fractional digits.
             minFracDigits = -1;
@@ -66,20 +58,26 @@
     }
 
     /**
-     * Construct the formatter from a specification and two extra hints about the initial buffer
-     * capacity. A reference is held to this specification, but it will not be modified by the
-     * actions of this class.
+     * Construct the formatter from a specification, allocating a buffer internally for the result.
      *
      * @param spec parsed conversion specification
-     * @param count of elements likely to be formatted
-     * @param margin for elements formatted only once
      */
-    public FloatFormatter(Spec spec, int count, int margin) {
-        /*
-         * Rule of thumb used here: in e format w = (p-1) + len("+1.e+300") = p+7; in f format w = p
-         * + len("1,000,000.") = p+10. If we're wrong, the result will have to grow. No big deal.
-         */
-        this(spec, Math.max(spec.width + 1, count * (spec.precision + 10) + margin));
+    public FloatFormatter(Spec spec) {
+        this(new StringBuilder(size(spec)), spec);
+    }
+
+    /**
+     * Recommend a buffer size for a given specification, assuming one float is converted. This will
+     * be a "right" answer for e and g-format, and for f-format with values up to 9,999,999.
+     *
+     * @param spec parsed conversion specification
+     */
+    public static int size(Spec spec) {
+        // Rule of thumb used here (no right answer):
+        // in e format each float occupies: (p-1) + len("+1.e+300") = p+7;
+        // in f format each float occupies: p + len("1,000,000.%") = p+11;
+        // or an explicit (minimum) width may be given, with one overshoot possible.
+        return Math.max(spec.width + 1, spec.getPrecision(6) + 11);
     }
 
     /**
@@ -160,12 +158,19 @@
         // Precision defaults to 6 (or 12 for none-format)
         int precision = spec.getPrecision(Spec.specified(spec.type) ? 6 : 12);
 
+        // Guard against excessive result precision
+        // XXX Possibly better raised before result is allocated/sized.
+        if (precision > MAX_PRECISION) {
+            throw precisionTooLarge("float");
+        }
+
         /*
          * By default, the prefix of a positive number is "", but the format specifier may override
          * it, and the built-in type complex needs to override the format.
          */
-        if (positivePrefix == null && Spec.specified(spec.sign) && spec.sign != '-') {
-            positivePrefix = Character.toString(spec.sign);
+        char sign = spec.sign;
+        if (positivePrefix == null && Spec.specified(sign) && sign != '-') {
+            positivePrefix = Character.toString(sign);
         }
 
         // Different process for each format type, ignoring case for now.
@@ -905,8 +910,8 @@
     }
 
     /**
-     * Return the index in {@link #result} of the first letter. helper for {@link #uppercase()} and
-     * {@link #getExponent()}
+     * Return the index in {@link #result} of the first letter. This is a helper for
+     * {@link #uppercase()} and {@link #getExponent()}
      */
     private int indexOfMarker() {
         return start + lenSign + lenWhole + lenPoint + lenFraction;
diff --git a/src/org/python/core/stringlib/IntegerFormatter.java b/src/org/python/core/stringlib/IntegerFormatter.java
new file mode 100644
--- /dev/null
+++ b/src/org/python/core/stringlib/IntegerFormatter.java
@@ -0,0 +1,779 @@
+// Copyright (c) Jython Developers
+package org.python.core.stringlib;
+
+import java.math.BigInteger;
+
+import org.python.core.Py;
+import org.python.core.PyInteger;
+import org.python.core.PyLong;
+import org.python.core.PyObject;
+import org.python.core.PyString;
+import org.python.core.PySystemState;
+import org.python.core.stringlib.InternalFormat.Spec;
+
+/**
+ * A class that provides the implementation of integer formatting. In a limited way, it acts like a
+ * StringBuilder to which text and one or more numbers may be appended, formatted according to the
+ * format specifier supplied at construction. These are ephemeral objects that are not, on their
+ * own, thread safe.
+ */
+public class IntegerFormatter extends InternalFormat.Formatter {
+
+    /**
+     * Construct the formatter from a client-supplied buffer, to which the result will be appended,
+     * and a specification. Sets {@link #mark} to the end of the buffer.
+     *
+     * @param result destination buffer
+     * @param spec parsed conversion specification
+     */
+    public IntegerFormatter(StringBuilder result, Spec spec) {
+        super(result, spec);
+    }
+
+    /**
+     * Construct the formatter from a specification, allocating a buffer internally for the result.
+     *
+     * @param spec parsed conversion specification
+     */
+    public IntegerFormatter(Spec spec) {
+        // Rule of thumb: big enough for 32-bit binary with base indicator 0b
+        this(new StringBuilder(34), spec);
+    }
+
+    /*
+     * Re-implement the text appends so they return the right type.
+     */
+    @Override
+    public IntegerFormatter append(char c) {
+        super.append(c);
+        return this;
+    }
+
+    @Override
+    public IntegerFormatter append(CharSequence csq) {
+        super.append(csq);
+        return this;
+    }
+
+    @Override
+    public IntegerFormatter append(CharSequence csq, int start, int end) //
+            throws IndexOutOfBoundsException {
+        super.append(csq, start, end);
+        return this;
+    }
+
+    /**
+     * Format a {@link BigInteger}, which is the implementation type of Jython <code>long</code>,
+     * according to the specification represented by this <code>IntegerFormatter</code>. The
+     * conversion type, and flags for grouping or base prefix are dealt with here. At the point this
+     * is used, we know the {@link #spec} is one of the integer types.
+     *
+     * @param value to convert
+     * @return this object
+     */
+    @SuppressWarnings("fallthrough")
+    public IntegerFormatter format(BigInteger value) {
+        try {
+            // Different process for each format type.
+            switch (spec.type) {
+                case 'd':
+                case Spec.NONE:
+                case 'u':
+                case 'i':
+                    // None format or d-format: decimal
+                    format_d(value);
+                    break;
+
+                case 'x':
+                    // hexadecimal.
+                    format_x(value, false);
+                    break;
+
+                case 'X':
+                    // HEXADECIMAL!
+                    format_x(value, true);
+                    break;
+
+                case 'o':
+                    // Octal.
+                    format_o(value);
+                    break;
+
+                case 'b':
+                    // Binary.
+                    format_b(value);
+                    break;
+
+                case 'c':
+                    // Binary.
+                    format_c(value);
+                    break;
+
+                case 'n':
+                    // Locale-sensitive version of d-format should be here.
+                    format_d(value);
+                    break;
+
+                default:
+                    // Should never get here, since this was checked in caller.
+                    throw unknownFormat(spec.type, "long");
+            }
+
+            // If required to, group the whole-part digits.
+            if (spec.grouping) {
+                groupDigits(3, ',');
+            }
+
+            return this;
+
+        } catch (OutOfMemoryError eme) {
+            // Most probably due to excessive precision.
+            throw precisionTooLarge("long");
+        }
+    }
+
+    /**
+     * Format the value as decimal (into {@link #result}). The option for mandatory sign is dealt
+     * with by reference to the format specification.
+     *
+     * @param value to convert
+     */
+    void format_d(BigInteger value) {
+        String number;
+        if (value.signum() < 0) {
+            // Negative value: deal with sign and base, and convert magnitude.
+            negativeSign(null);
+            number = value.negate().toString();
+        } else {
+            // Positive value: deal with sign, base and magnitude.
+            positiveSign(null);
+            number = value.toString();
+        }
+        appendNumber(number);
+    }
+
+    /**
+     * Format the value as hexadecimal (into {@link #result}), with the option of using upper-case
+     * or lower-case letters. The options for mandatory sign and for the presence of a base-prefix
+     * "0x" or "0X" are dealt with by reference to the format specification.
+     *
+     * @param value to convert
+     * @param upper if the hexadecimal should be upper case
+     */
+    void format_x(BigInteger value, boolean upper) {
+        String base = upper ? "0X" : "0x";
+        String number;
+        if (value.signum() < 0) {
+            // Negative value: deal with sign and base, and convert magnitude.
+            negativeSign(base);
+            number = toHexString(value.negate());
+        } else {
+            // Positive value: deal with sign, base and magnitude.
+            positiveSign(base);
+            number = toHexString(value);
+        }
+        // Append to result, case-shifted if necessary.
+        if (upper) {
+            number = number.toUpperCase();
+        }
+        appendNumber(number);
+    }
+
+    /**
+     * Format the value as octal (into {@link #result}). The options for mandatory sign and for the
+     * presence of a base-prefix "0o" are dealt with by reference to the format specification.
+     *
+     * @param value to convert
+     */
+    void format_o(BigInteger value) {
+        String base = "0o";
+        String number;
+        if (value.signum() < 0) {
+            // Negative value: deal with sign and base, and convert magnitude.
+            negativeSign(base);
+            number = toOctalString(value.negate());
+        } else {
+            // Positive value: deal with sign, base and magnitude.
+            positiveSign(base);
+            number = toOctalString(value);
+        }
+        // Append to result.
+        appendNumber(number);
+    }
+
+    /**
+     * Format the value as binary (into {@link #result}). The options for mandatory sign and for the
+     * presence of a base-prefix "0b" are dealt with by reference to the format specification.
+     *
+     * @param value to convert
+     */
+    void format_b(BigInteger value) {
+        String base = "0b";
+        String number;
+        if (value.signum() < 0) {
+            // Negative value: deal with sign and base, and convert magnitude.
+            negativeSign(base);
+            number = toBinaryString(value.negate());
+        } else {
+            // Positive value: deal with sign, base and magnitude.
+            positiveSign(base);
+            number = toBinaryString(value);
+        }
+        // Append to result.
+        appendNumber(number);
+    }
+
+    /**
+     * Format the value as a character (into {@link #result}).
+     *
+     * @param value to convert
+     */
+    void format_c(BigInteger value) {
+        // Limit is 256 if we're formatting for byte output, unicode range otherwise.
+        BigInteger limit = bytes ? LIMIT_BYTE : LIMIT_UNICODE;
+        if (value.signum() < 0 || value.compareTo(limit) >= 0) {
+            throw Py.OverflowError("%c arg not in range(0x" + toHexString(limit) + ")");
+        } else {
+            result.appendCodePoint(value.intValue());
+        }
+    }
+
+    // Limits used in format_c(BigInteger)
+    private static final BigInteger LIMIT_UNICODE = BigInteger
+            .valueOf(PySystemState.maxunicode + 1);
+    private static final BigInteger LIMIT_BYTE = BigInteger.valueOf(256);
+
+    /**
+     * Format an integer according to the specification represented by this
+     * <code>IntegerFormatter</code>. The conversion type, and flags for grouping or base prefix are
+     * dealt with here. At the point this is used, we know the {@link #spec} is one of the integer
+     * types.
+     *
+     * @param value to convert
+     * @return this object
+     */
+    @SuppressWarnings("fallthrough")
+    public IntegerFormatter format(int value) {
+        try {
+            // Scratch all instance variables and start = result.length().
+            setStart();
+
+            // Different process for each format type.
+            switch (spec.type) {
+                case 'd':
+                case Spec.NONE:
+                case 'u':
+                case 'i':
+                    // None format or d-format: decimal
+                    format_d(value);
+                    break;
+
+                case 'x':
+                    // hexadecimal.
+                    format_x(value, false);
+                    break;
+
+                case 'X':
+                    // HEXADECIMAL!
+                    format_x(value, true);
+                    break;
+
+                case 'o':
+                    // Octal.
+                    format_o(value);
+                    break;
+
+                case 'b':
+                    // Binary.
+                    format_b(value);
+                    break;
+
+                case 'c':
+                case '%':
+                    // Binary.
+                    format_c(value);
+                    break;
+
+                case 'n':
+                    // Locale-sensitive version of d-format should be here.
+                    format_d(value);
+                    break;
+
+                default:
+                    // Should never get here, since this was checked in caller.
+                    throw unknownFormat(spec.type, "integer");
+            }
+
+            // If required to, group the whole-part digits.
+            if (spec.grouping) {
+                groupDigits(3, ',');
+            }
+
+            return this;
+        } catch (OutOfMemoryError eme) {
+            // Most probably due to excessive precision.
+            throw precisionTooLarge("integer");
+        }
+    }
+
+    /**
+     * Format the value as decimal (into {@link #result}). The option for mandatory sign is dealt
+     * with by reference to the format specification.
+     *
+     * @param value to convert
+     */
+    void format_d(int value) {
+        String number;
+        if (value < 0) {
+            // Negative value: deal with sign and base, and convert magnitude.
+            negativeSign(null);
+            number = Integer.toString(-value);
+        } else {
+            // Positive value: deal with sign, base and magnitude.
+            positiveSign(null);
+            number = Integer.toString(value);
+        }
+        appendNumber(number);
+    }
+
+    /**
+     * Format the value as hexadecimal (into {@link #result}), with the option of using upper-case
+     * or lower-case letters. The options for mandatory sign and for the presence of a base-prefix
+     * "0x" or "0X" are dealt with by reference to the format specification.
+     *
+     * @param value to convert
+     * @param upper if the hexadecimal should be upper case
+     */
+    void format_x(int value, boolean upper) {
+        String base = upper ? "0X" : "0x";
+        String number;
+        if (value < 0) {
+            // Negative value: deal with sign and base, and convert magnitude.
+            negativeSign(base);
+            number = Integer.toHexString(-value);
+        } else {
+            // Positive value: deal with sign, base and magnitude.
+            positiveSign(base);
+            number = Integer.toHexString(value);
+        }
+        // Append to result, case-shifted if necessary.
+        if (upper) {
+            number = number.toUpperCase();
+        }
+        appendNumber(number);
+    }
+
+    /**
+     * Format the value as octal (into {@link #result}). The options for mandatory sign and for the
+     * presence of a base-prefix "0o" are dealt with by reference to the format specification.
+     *
+     * @param value to convert
+     */
+    void format_o(int value) {
+        String base = "0o";
+        String number;
+        if (value < 0) {
+            // Negative value: deal with sign and base, and convert magnitude.
+            negativeSign(base);
+            number = Integer.toOctalString(-value);
+        } else {
+            // Positive value: deal with sign, base and magnitude.
+            positiveSign(base);
+            number = Integer.toOctalString(value);
+        }
+        // Append to result.
+        appendNumber(number);
+    }
+
+    /**
+     * Format the value as binary (into {@link #result}). The options for mandatory sign and for the
+     * presence of a base-prefix "0b" are dealt with by reference to the format specification.
+     *
+     * @param value to convert
+     */
+    void format_b(int value) {
+        String base = "0b";
+        String number;
+        if (value < 0) {
+            // Negative value: deal with sign and base, and convert magnitude.
+            negativeSign(base);
+            number = Integer.toBinaryString(-value);
+        } else {
+            // Positive value: deal with sign, base and magnitude.
+            positiveSign(base);
+            number = Integer.toBinaryString(value);
+        }
+        // Append to result.
+        appendNumber(number);
+    }
+
+    /**
+     * Format the value as a character (into {@link #result}).
+     *
+     * @param value to convert
+     */
+    void format_c(int value) {
+        // Limit is 256 if we're formatting for byte output, unicode range otherwise.
+        int limit = bytes ? 256 : PySystemState.maxunicode + 1;
+        if (value < 0 || value >= limit) {
+            throw Py.OverflowError("%c arg not in range(0x" + Integer.toHexString(limit) + ")");
+        } else {
+            result.appendCodePoint(value);
+        }
+    }
+
+    /**
+     * Append to {@link #result} buffer a sign (if one is specified for positive numbers) and, in
+     * alternate mode, the base marker provided. The sign and base marker are together considered to
+     * be the "sign" of the converted number, spanned by {@link #lenSign}. This is relevant when we
+     * come to insert padding.
+     *
+     * @param base marker "0x" or "0X" for hex, "0o" for octal, "0b" for binary, "" or
+     *            <code>null</code> for decimal.
+     */
+    final void positiveSign(String base) {
+        // Does the format specify a sign for positive values?
+        char sign = spec.sign;
+        if (Spec.specified(sign) && sign != '-') {
+            append(sign);
+            lenSign = 1;
+        }
+        // Does the format call for a base prefix?
+        if (base != null && spec.alternate) {
+            append(base);
+            lenSign += base.length();
+        }
+    }
+
+    /**
+     * Append to {@link #result} buffer a minus sign and, in alternate mode, the base marker
+     * provided. The sign and base marker are together considered to be the "sign" of the converted
+     * number, spanned by {@link #lenSign}. This is relevant when we come to insert padding.
+     *
+     * @param base marker ("0x" or "0X" for hex, "0" for octal, <code>null</code> or "" for decimal.
+     */
+    final void negativeSign(String base) {
+        // Insert a minus sign unconditionally.
+        append('-');
+        lenSign = 1;
+        // Does the format call for a base prefix?
+        if (base != null && spec.alternate) {
+            append(base);
+            lenSign += base.length();
+        }
+    }
+
+    /**
+     * Append a string (number) to {@link #result} and set {@link #lenWhole} to its length .
+     *
+     * @param number to append
+     */
+    void appendNumber(String number) {
+        lenWhole = number.length();
+        append(number);
+    }
+
+    // For hex-conversion by lookup
+    private static final String LOOKUP = "0123456789abcdef";
+
+    /**
+     * A more efficient algorithm for generating a hexadecimal representation of a byte array.
+     * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
+     * consequently, is implemented using expensive mathematical operations.
+     *
+     * @param value the value to generate a hexadecimal string from
+     * @return the hexadecimal representation of value, with "-" sign prepended if necessary
+     */
+    private static final String toHexString(BigInteger value) {
+        int signum = value.signum();
+
+        // obvious shortcut
+        if (signum == 0) {
+            return "0";
+        }
+
+        // we want to work in absolute numeric value (negative sign is added afterward)
+        byte[] input = value.abs().toByteArray();
+        StringBuilder sb = new StringBuilder(input.length * 2);
+
+        int b;
+        for (int i = 0; i < input.length; i++) {
+            b = input[i] & 0xFF;
+            sb.append(LOOKUP.charAt(b >> 4));
+            sb.append(LOOKUP.charAt(b & 0x0F));
+        }
+
+        // before returning the char array as string, remove leading zeroes, but not the last one
+        String result = sb.toString().replaceFirst("^0+(?!$)", "");
+        return signum < 0 ? "-" + result : result;
+    }
+
+    /**
+     * A more efficient algorithm for generating an octal representation of a byte array.
+     * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
+     * consequently, is implemented using expensive mathematical operations.
+     *
+     * @param value the value to generate an octal string from
+     * @return the octal representation of value, with "-" sign prepended if necessary
+     */
+    private static final String toOctalString(BigInteger value) {
+        int signum = value.signum();
+
+        // obvious shortcut
+        if (signum == 0) {
+            return "0";
+        }
+
+        byte[] input = value.abs().toByteArray();
+        if (input.length < 3) {
+            return value.toString(8);
+        }
+
+        StringBuilder sb = new StringBuilder(input.length * 3);
+
+        // working backwards, three bytes at a time
+        int threebytes;
+        int trip1, trip2, trip3;    // most, middle, and least significant bytes in the triplet
+        for (int i = input.length - 1; i >= 0; i -= 3) {
+            trip3 = input[i] & 0xFF;
+            trip2 = ((i - 1) >= 0) ? (input[i - 1] & 0xFF) : 0x00;
+            trip1 = ((i - 2) >= 0) ? (input[i - 2] & 0xFF) : 0x00;
+            threebytes = trip3 | (trip2 << 8) | (trip1 << 16);
+
+            // convert the three-byte value into an eight-character octal string
+            for (int j = 0; j < 8; j++) {
+                sb.append(LOOKUP.charAt((threebytes >> (j * 3)) & 0x000007));
+            }
+        }
+
+        String result = sb.reverse().toString().replaceFirst("^0+(?!%)", "");
+        return signum < 0 ? "-" + result : result;
+    }
+
+    /**
+     * A more efficient algorithm for generating a binary representation of a byte array.
+     * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
+     * consequently, is implemented using expensive mathematical operations.
+     *
+     * @param value the value to generate a binary string from
+     * @return the binary representation of value, with "-" sign prepended if necessary
+     */
+    private static final String toBinaryString(BigInteger value) {
+        int signum = value.signum();
+
+        // obvious shortcut
+        if (signum == 0) {
+            return "0";
+        }
+
+        // we want to work in absolute numeric value (negative sign is added afterward)
+        byte[] input = value.abs().toByteArray();
+        StringBuilder sb = new StringBuilder(value.bitCount());
+
+        int b;
+        for (int i = 0; i < input.length; i++) {
+            b = input[i] & 0xFF;
+            for (int bit = 7; bit >= 0; bit--) {
+                sb.append(((b >> bit) & 0x1) > 0 ? "1" : "0");
+            }
+        }
+
+        // before returning the char array as string, remove leading zeroes, but not the last one
+        String result = sb.toString().replaceFirst("^0+(?!$)", "");
+        return signum < 0 ? "-" + result : result;
+    }
+
+    /** Format specification used by bin(). */
+    public static final Spec BIN = InternalFormat.fromText("#b");
+
+    /** Format specification used by oct(). */
+    public static final Spec OCT = InternalFormat.fromText("#o");
+
+    /** Format specification used by hex(). */
+    public static final Spec HEX = InternalFormat.fromText("#x");
+
+    /**
+     * Convert the object to binary according to the conventions of Python built-in
+     * <code>bin()</code>. The object's __index__ method is called, and is responsible for raising
+     * the appropriate error (which the base {@link PyObject#__index__()} does).
+     *
+     * @param number to convert
+     * @return PyString converted result
+     */
+    // Follow this pattern in Python 3, where objects no longer have __hex__, __oct__ members.
+    public static PyString bin(PyObject number) {
+        return formatNumber(number, BIN);
+    }
+
+    /**
+     * Convert the object according to the conventions of Python built-in <code>hex()</code>, or
+     * <code>oct()</code>. The object's <code>__index__</code> method is called, and is responsible
+     * for raising the appropriate error (which the base {@link PyObject#__index__()} does).
+     *
+     * @param number to convert
+     * @return PyString converted result
+     */
+    public static PyString formatNumber(PyObject number, Spec spec) {
+        number = number.__index__();
+        IntegerFormatter f = new IntegerFormatter(spec);
+        if (number instanceof PyInteger) {
+            f.format(((PyInteger)number).getValue());
+        } else {
+            f.format(((PyLong)number).getValue());
+        }
+        return new PyString(f.getResult());
+    }
+
+    /**
+     * A minor variation on {@link IntegerFormatter} to handle "traditional" %-formatting. The
+     * difference is in support for <code>spec.precision</code>, the formatting octal in "alternate"
+     * mode (0 and 0123, not 0o0 and 0o123), and in c-format (in the error logic).
+     */
+    public static class Traditional extends IntegerFormatter {
+
+        /**
+         * Construct the formatter from a client-supplied buffer, to which the result will be
+         * appended, and a specification. Sets {@link #mark} to the end of the buffer.
+         *
+         * @param result destination buffer
+         * @param spec parsed conversion specification
+         */
+        public Traditional(StringBuilder result, Spec spec) {
+            super(result, spec);
+        }
+
+        /**
+         * Construct the formatter from a specification, allocating a buffer internally for the
+         * result.
+         *
+         * @param spec parsed conversion specification
+         */
+        public Traditional(Spec spec) {
+            this(new StringBuilder(), spec);
+        }
+
+        /**
+         * Format the value as octal (into {@link #result}). The options for mandatory sign and for
+         * the presence of a base-prefix "0" are dealt with by reference to the format
+         * specification.
+         *
+         * @param value to convert
+         */
+        @Override
+        void format_o(BigInteger value) {
+            String number;
+            int signum = value.signum();
+            if (signum < 0) {
+                // Negative value: deal with sign and base, and convert magnitude.
+                negativeSign(null);
+                number = toOctalString(value.negate());
+            } else {
+                // Positive value: deal with sign, base and magnitude.
+                positiveSign(null);
+                number = toOctalString(value);
+            }
+            // Append to result.
+            appendOctalNumber(number);
+        }
+
+        /**
+         * Format the value as a character (into {@link #result}).
+         *
+         * @param value to convert
+         */
+        @Override
+        void format_c(BigInteger value) {
+            if (value.signum() < 0) {
+                throw Py.OverflowError("unsigned byte integer is less than minimum");
+            } else {
+                // Limit is 256 if we're formatting for byte output, unicode range otherwise.
+                BigInteger limit = bytes ? LIMIT_BYTE : LIMIT_UNICODE;
+                if (value.compareTo(limit) >= 0) {
+                    throw Py.OverflowError("unsigned byte integer is greater than maximum");
+                } else {
+                    result.appendCodePoint(value.intValue());
+                }
+            }
+        }
+
+        /**
+         * Format the value as octal (into {@link #result}). The options for mandatory sign and for
+         * the presence of a base-prefix "0" are dealt with by reference to the format
+         * specification.
+         *
+         * @param value to convert
+         */
+        @Override
+        void format_o(int value) {
+            String number;
+            if (value < 0) {
+                // Negative value: deal with sign and convert magnitude.
+                negativeSign(null);
+                number = Integer.toOctalString(-value);
+            } else {
+                // Positive value: deal with sign, base and magnitude.
+                positiveSign(null);
+                number = Integer.toOctalString(value);
+            }
+            // Append to result.
+            appendOctalNumber(number);
+        }
+
+        /**
+         * Format the value as a character (into {@link #result}).
+         *
+         * @param value to convert
+         */
+        @Override
+        void format_c(int value) {
+            if (value < 0) {
+                throw Py.OverflowError("unsigned byte integer is less than minimum");
+            } else {
+                // Limit is 256 if we're formatting for byte output, unicode range otherwise.
+                int limit = bytes ? 256 : PySystemState.maxunicode + 1;
+                if (value >= limit) {
+                    throw Py.OverflowError("unsigned byte integer is greater than maximum");
+                } else {
+                    result.appendCodePoint(value);
+                }
+            }
+        }
+
+        /**
+         * Append a string (number) to {@link #result}, but insert leading zeros first in order
+         * that, on return, the whole-part length #lenWhole should be no less than the precision.
+         *
+         * @param number to append
+         */
+        @Override
+        void appendNumber(String number) {
+            int n, p = spec.getPrecision(0);
+            for (n = number.length(); n < p; n++) {
+                result.append('0');
+            }
+            lenWhole = n;
+            append(number);
+        }
+
+        /**
+         * Append a string (number) to {@link #result}, but insert leading zeros first in order
+         * that, on return, the whole-part length #lenWhole should be no less than the precision.
+         * Octal numbers must begin with a zero if <code>spec.alternate==true</code>, so if the
+         * number passed in does not start with a zero, at least one will be inserted.
+         *
+         * @param number to append
+         */
+        void appendOctalNumber(String number) {
+            int n = number.length(), p = spec.getPrecision(0);
+            if (spec.alternate && number.charAt(0) != '0' && n >= p) {
+                p = n + 1;
+            }
+            for (; n < p; n++) {
+                result.append('0');
+            }
+            lenWhole = n;
+            append(number);
+        }
+
+    }
+}
diff --git a/src/org/python/core/stringlib/InternalFormat.java b/src/org/python/core/stringlib/InternalFormat.java
--- a/src/org/python/core/stringlib/InternalFormat.java
+++ b/src/org/python/core/stringlib/InternalFormat.java
@@ -3,6 +3,9 @@
 
 import org.python.core.Py;
 import org.python.core.PyException;
+import org.python.core.PyObject;
+import org.python.core.PyString;
+import org.python.core.PyUnicode;
 
 public class InternalFormat {
 
@@ -14,7 +17,25 @@
      */
     public static Spec fromText(String text) {
         Parser parser = new Parser(text);
-        return parser.parse();
+        try {
+            return parser.parse();
+        } catch (IllegalArgumentException e) {
+            throw Py.ValueError(e.getMessage());
+        }
+    }
+
+    /**
+     * Create a {@link Spec} object by parsing a format specification, supplied as an object.
+     *
+     * @param text to parse
+     * @return parsed equivalent to text
+     */
+    public static Spec fromText(PyObject text, String method) {
+        if (text instanceof PyString) {
+            return fromText(((PyString)text).getString());
+        } else {
+            throw Py.TypeError(method + " requires str or unicode");
+        }
     }
 
     /**
@@ -30,23 +51,72 @@
         /** The (partial) result. */
         protected StringBuilder result;
 
-        /** The number we are working on floats at the end of the result, and starts here. */
+        /**
+         * Signals the client's intention to make a PyString (or other byte-like) interpretation of
+         * {@link #result}, rather than a PyUnicode one.
+         */
+        protected boolean bytes;
+
+        /** The start of the formatted data for padding purposes, <={@link #start} */
+        protected int mark;
+        /** The latest number we are working on floats at the end of the result, and starts here. */
         protected int start;
-        /** If it contains no sign, this length is zero, and 1 otherwise. */
+        /** If it contains no sign, this length is zero, and >0 otherwise. */
         protected int lenSign;
         /** The length of the whole part (to left of the decimal point or exponent) */
         protected int lenWhole;
 
         /**
-         * Construct the formatter from a specification and initial buffer capacity. A reference is
-         * held to this specification, but it will not be modified by the actions of this class.
+         * Construct the formatter from a client-supplied buffer and a specification. Sets
+         * {@link #mark} and {@link #start} to the end of the buffer. The new formatted object will
+         * therefore be appended there and, when the time comes, padding will be applied to (just)
+         * the new text.
+         *
+         * @param result destination buffer
+         * @param spec parsed conversion specification
+         */
+        public Formatter(StringBuilder result, Spec spec) {
+            this.spec = spec;
+            this.result = result;
+            this.start = this.mark = result.length();
+        }
+
+        /**
+         * Construct the formatter from a specification and initial buffer capacity. Sets
+         * {@link #mark} to the end of the buffer.
          *
          * @param spec parsed conversion specification
          * @param width of buffer initially
          */
         public Formatter(Spec spec, int width) {
-            this.spec = spec;
-            result = new StringBuilder(width);
+            this(new StringBuilder(width), spec);
+        }
+
+        /**
+         * Signals the client's intention to make a PyString (or other byte-like) interpretation of
+         * {@link #result}, rather than a PyUnicode one. Only formatters that could produce
+         * characters >255 are affected by this (e.g. c-format). Idiom:
+         *
+         * <pre>
+         * MyFormatter f = new MyFormatter( InternalFormatter.fromText(formatSpec) );
+         * f.setBytes(!(formatSpec instanceof PyUnicode));
+         * // ... formatting work
+         * return f.getPyResult();
+         * </pre>
+         *
+         * @param bytes true to signal the intention to make a byte-like interpretation
+         */
+        public void setBytes(boolean bytes) {
+            this.bytes = bytes;
+        }
+
+        /**
+         * Whether initialised for a byte-like interpretation.
+         *
+         * @return bytes attribute
+         */
+        public boolean isBytes() {
+            return bytes;
         }
 
         /**
@@ -58,6 +128,22 @@
             return result.toString();
         }
 
+        /**
+         * Convenience method to return the current result of the formatting, as a
+         * <code>PyObject</code>, either {@link PyString} or {@link PyUnicode} according to
+         * {@link #bytes}.
+         *
+         * @return formatted result
+         */
+        public PyString getPyResult() {
+            String r = getResult();
+            if (bytes) {
+                return new PyString(r);
+            } else {
+                return new PyUnicode(r);
+            }
+        }
+
         /*
          * Implement Appendable interface by delegation to the result buffer.
          *
@@ -84,21 +170,28 @@
 
         /**
          * Clear the instance variables describing the latest object in {@link #result}, ready to
-         * receive a new number
+         * receive a new one: sets {@link #start} and calls {@link #reset()}. This is necessary when
+         * a <code>Formatter</code> is to be re-used. Note that this leaves {@link #mark} where it
+         * is. In the core, we need this to support <code>complex</code>: two floats in the same
+         * format, but padded as a unit.
          */
         public void setStart() {
-            // Mark the end of the buffer as the start of the current object and reset all.
+            // The new value will float at the current end of the result buffer.
             start = result.length();
-            // Clear the variable describing the latest number in result.
-            reset();
+            // If anything has been added since construction, reset all state.
+            if (start > mark) {
+                // Clear the variable describing the latest number in result.
+                reset();
+            }
         }
 
         /**
          * Clear the instance variables describing the latest object in {@link #result}, ready to
-         * receive a new one.
+         * receive a new one. This is called from {@link #setStart()}. Subclasses override this
+         * method and call {@link #setStart()} at the start of their format method.
          */
         protected void reset() {
-            // Clear the variable describing the latest object in result.
+            // Clear the variables describing the latest object in result.
             lenSign = lenWhole = 0;
         }
 
@@ -215,19 +308,19 @@
         }
 
         /**
-         * Pad the result so far (defined as the entire contents of {@link #result}) using the
-         * alignment, target width and fill character defined in {@link #spec}. The action of
-         * padding will increase the overall length of the result to the target width, if that is
-         * greater than the current length.
+         * Pad the result so far (defined as the contents of {@link #result} from {@link #mark} to
+         * the end) using the alignment, target width and fill character defined in {@link #spec}.
+         * The action of padding will increase the length of this segment to the target width, if
+         * that is greater than the current length.
          * <p>
          * When the padding method has decided that that it needs to add n padding characters, it
-         * will affect {@link #start} or {@link #lenSign} as follows.
+         * will affect {@link #start} or {@link #lenWhole} as follows.
          * <table border style>
          * <tr>
          * <th>align</th>
          * <th>meaning</th>
          * <th>start</th>
-         * <th>lenSign</th>
+         * <th>lenWhole</th>
          * <th>result.length()</th>
          * </tr>
          * <tr>
@@ -259,69 +352,79 @@
          * <td>+n</td>
          * </tr>
          * </table>
-         * Note that we may have converted more than one value into the result buffer (for example
-         * when formatting a complex number). The pointer <code>start</code> is at the start of the
-         * last number converted. Padding with zeros, and the "pad after sign" mode, will produce a
-         * result you probably don't want. It is up to the client to disallow this (which
-         * <code>complex</code> does).
+         * Note that in the "pad after sign" mode, only the last number into the buffer receives the
+         * padding. This padding gets incorporated into the whole part of the number. (In other
+         * modes, the padding is around <code>result[mark:]</code>.) When this would not be
+         * appropriate, it is up to the client to disallow this (which <code>complex</code> does).
          *
-         * @return this object
+         * @return this Formatter object
          */
         public Formatter pad() {
+            // We'll need this many pad characters (if>0). Note Spec.UNDEFINED<0.
+            int n = spec.width - (result.length() - mark);
+            if (n > 0) {
+                pad(mark, n);
+            }
+            return this;
+        }
 
-            // We'll need this many pad characters (if>0). Note Spec.UNDEFINED<0.
-            int n = spec.width - result.length();
-            if (n > 0) {
+        /**
+         * Pad the last result (defined as the contents of {@link #result} from argument
+         * <code>leftIndex</code> to the end) using the alignment, by <code>n</code> repetitions of
+         * the fill character defined in {@link #spec}, and distributed according to
+         * <code>spec.align</code>. The value of <code>leftIndex</code> is only used if the
+         * alignment is '>' (left) or '^' (both). The value of the critical lengths (lenWhole,
+         * lenSign, etc.) are not affected, because we assume that <code>leftIndex <= </code>
+         * {@link #start}.
+         *
+         * @param leftIndex the index in result at which to insert left-fill characters.
+         * @param n number of fill characters to insert.
+         */
+        protected void pad(int leftIndex, int n) {
+            char align = spec.getAlign('>'); // Right for numbers (strings will supply '<' align)
+            char fill = spec.getFill(' ');
 
-                char align = spec.getAlign('>'); // Right for numbers (wrong for strings)
-                char fill = spec.getFill(' ');
+            // Start by assuming padding is all leading ('>' case or '=')
+            int leading = n;
 
-                // Start by assuming padding is all leading ('>' case or '=')
-                int leading = n;
+            // Split the total padding according to the alignment
+            if (align == '^') {
+                // Half the padding before
+                leading = n / 2;
+            } else if (align == '<') {
+                // All the padding after
+                leading = 0;
+            }
 
-                // Split the total padding according to the alignment
-                if (align == '^') {
-                    // Half the padding before
-                    leading = n / 2;
-                } else if (align == '<') {
-                    // All the padding after
-                    leading = 0;
+            // All padding that is not leading is trailing
+            int trailing = n - leading;
+
+            // Insert the leading space
+            if (leading > 0) {
+                if (align == '=') {
+                    // Incorporate into the (latest) whole part
+                    leftIndex = start + lenSign;
+                    lenWhole += leading;
+                } else {
+                    // Default is to insert at the stated leftIndex <= start.
+                    start += leading;
                 }
-
-                // All padding that is not leading is trailing
-                int trailing = n - leading;
-
-                // Insert the leading space
-                if (leading > 0) {
-                    int pos;
-                    if (align == '=') {
-                        // Incorporate into the (latest) whole part
-                        pos = start + lenSign;
-                        lenWhole += leading;
-                    } else {
-                        // Insert at the very beginning (not start) by default.
-                        pos = 0;
-                        start += leading;
-                    }
-                    makeSpaceAt(pos, leading);
-                    for (int i = 0; i < leading; i++) {
-                        result.setCharAt(pos + i, fill);
-                    }
-                }
-
-                // Append the trailing space
-                for (int i = 0; i < trailing; i++) {
-                    result.append(fill);
-                }
-
-                // Check for special case
-                if (align == '=' && fill == '0' && spec.grouping) {
-                    // We must extend the grouping separator into the padding
-                    zeroPadAfterSignWithGroupingFixup(3, ',');
+                makeSpaceAt(leftIndex, leading);
+                for (int i = 0; i < leading; i++) {
+                    result.setCharAt(leftIndex + i, fill);
                 }
             }
 
-            return this;
+            // Append the trailing space
+            for (int i = 0; i < trailing; i++) {
+                result.append(fill);
+            }
+
+            // Check for special case
+            if (align == '=' && fill == '0' && spec.grouping) {
+                // We must extend the grouping separator into the padding
+                zeroPadAfterSignWithGroupingFixup(3, ',');
+            }
         }
 
         /**
@@ -345,7 +448,7 @@
          * </pre>
          *
          * The padding has increased the overall length of the result to the target width. About one
-         * in three call to this method adds one to the width, because the whole part cannot start
+         * in three calls to this method adds one to the width, because the whole part cannot start
          * with a comma.
          *
          * <pre>
@@ -355,9 +458,6 @@
          * '-<b>0</b>,000,000,001,200,000,000.0000'
          * </pre>
          *
-         * Insert grouping characters (conventionally commas) into the whole part of the number.
-         * {@link #lenWhole} will increase correspondingly.
-         *
          * @param groupSize normally 3.
          * @param comma or some other character to use as a separator.
          */
@@ -386,10 +486,9 @@
                  * Suppose the format call was format(-12e8, "0=30,.4f"). At the beginning, we had
                  * something like this in result: . [-|000000000001,200,000,000|.|0000||]
                  *
-                 * And now, result looks like this: [-|0000,000,001,200,000,000|.|0000||] in which
-                 * the first zero is wrong as it stands, nor can it just be over-written with a
-                 * comma. We have to insert another zero, even though this makes the result longer
-                 * than we were given.
+                 * And now, result looks like this: [-|,000,000,001,200,000,000|.|0000||] in which
+                 * the first comma is wrong, but so would be a zero. We have to insert another zero,
+                 * even though this makes the result longer than we were asked for.
                  */
                 result.insert(firstZero, '0');
                 lenWhole += 1;
@@ -418,7 +517,19 @@
          * @return exception to throw
          */
         public static PyException alternateFormNotAllowed(String forType) {
-            return notAllowed("Alternate form (#)", forType);
+            return alternateFormNotAllowed(forType, '\0');
+        }
+
+        /**
+         * Convenience method returning a {@link Py#ValueError} reporting that alternate form is not
+         * allowed in a format specifier for the named type and specified typoe code.
+         *
+         * @param forType the type it was found applied to
+         * @param code the formatting code (or '\0' not to mention one)
+         * @return exception to throw
+         */
+        public static PyException alternateFormNotAllowed(String forType, char code) {
+            return notAllowed("Alternate form (#)", forType, code);
         }
 
         /**
@@ -430,7 +541,30 @@
          * @return exception to throw
          */
         public static PyException alignmentNotAllowed(char align, String forType) {
-            return notAllowed("'" + align + "' alignment flag", forType);
+            return notAllowed("'" + align + "' alignment flag", forType, '\0');
+        }
+
+        /**
+         * Convenience method returning a {@link Py#ValueError} reporting that specifying a sign is
+         * not allowed in a format specifier for the named type.
+         *
+         * @param forType the type it was found applied to
+         * @param code the formatting code (or '\0' not to mention one)
+         * @return exception to throw
+         */
+        public static PyException signNotAllowed(String forType, char code) {
+            return notAllowed("Sign", forType, code);
+        }
+
+        /**
+         * Convenience method returning a {@link Py#ValueError} reporting that specifying a
+         * precision is not allowed in a format specifier for the named type.
+         *
+         * @param forType the type it was found applied to
+         * @return exception to throw
+         */
+        public static PyException precisionNotAllowed(String forType) {
+            return notAllowed("Precision", forType, '\0');
         }
 
         /**
@@ -441,22 +575,63 @@
          * @return exception to throw
          */
         public static PyException zeroPaddingNotAllowed(String forType) {
-            return notAllowed("Zero padding", forType);
+            return notAllowed("Zero padding", forType, '\0');
         }
 
         /**
          * Convenience method returning a {@link Py#ValueError} reporting that some format specifier
-         * feature is not allowed for the named type.
+         * feature is not allowed for the named data type.
          *
-         * @param particularOutrage committed in the present case
-         * @param forType the type it where it is an outrage
+         * @param outrage committed in the present case
+         * @param forType the data type (e.g. "integer") it where it is an outrage
          * @return exception to throw
          */
-        protected static PyException notAllowed(String particularOutrage, String forType) {
-            String msg = particularOutrage + " is not allowed in " + forType + " format specifier";
+        public static PyException notAllowed(String outrage, String forType) {
+            return notAllowed(outrage, forType, '\0');
+        }
+
+        /**
+         * Convenience method returning a {@link Py#ValueError} reporting that some format specifier
+         * feature is not allowed for the named format code and data type. Produces a message like:
+         * <p>
+         * <code>outrage+" not allowed with "+forType+" format specifier '"+code+"'"</code>
+         * <p>
+         * <code>outrage+" not allowed in "+forType+" format specifier"</code>
+         *
+         * @param outrage committed in the present case
+         * @param forType the data type (e.g. "integer") it where it is an outrage
+         * @param code the formatting code for which it is an outrage (or '\0' not to mention one)
+         * @return exception to throw
+         */
+        public static PyException notAllowed(String outrage, String forType, char code) {
+            // Try really hard to be like CPython
+            String codeAsString, withOrIn;
+            if (code == 0) {
+                withOrIn = "in ";
+                codeAsString = "";
+            } else {
+                withOrIn = "with ";
+                codeAsString = " '" + code + "'";
+            }
+            String msg =
+                    outrage + " not allowed " + withOrIn + forType + " format specifier"
+                            + codeAsString;
             return Py.ValueError(msg);
         }
 
+        /**
+         * Convenience method returning a {@link Py#OverflowError} reporting:
+         * <p>
+         * <code>"formatted "+type+" is too long (precision too large?)"</code>
+         *
+         * @param type of formatting ("integer", "float")
+         * @return exception to throw
+         */
+        public static PyException precisionTooLarge(String type) {
+            String msg = "formatted " + type + " is too long (precision too large?)";
+            return Py.OverflowError(msg);
+        }
+
     }
 
     /**
@@ -636,6 +811,12 @@
                 false, Spec.UNSPECIFIED, Spec.NONE);
 
         /**
+         * Defaults applicable to string types. Equivalent to " <"
+         */
+        public static final Spec STRING = new Spec(' ', '<', Spec.NONE, false, Spec.UNSPECIFIED,
+                false, Spec.UNSPECIFIED, Spec.NONE);
+
+        /**
          * Constructor offering just precision and type.
          *
          * <pre>
@@ -775,11 +956,6 @@
                 throw new IllegalArgumentException("Invalid conversion specification");
             }
 
-            // Restrict grouping to known formats. (Mirrors CPython, but misplaced?)
-            if (grouping && "defgEG%F\0".indexOf(type) == -1) {
-                throw new IllegalArgumentException("Cannot specify ',' with '" + type + "'.");
-            }
-
             // Create a specification
             return new Spec(fill, align, sign, alternate, width, grouping, precision, type);
         }
diff --git a/src/org/python/core/stringlib/InternalFormatSpec.java b/src/org/python/core/stringlib/InternalFormatSpec.java
deleted file mode 100644
--- a/src/org/python/core/stringlib/InternalFormatSpec.java
+++ /dev/null
@@ -1,88 +0,0 @@
-package org.python.core.stringlib;
-
-/**
- * Parsed PEP-3101 format specification of a single field. This class holds the several attributes
- * that might be decoded from a format specifier. It provides a method
- * {@link #pad(String, char, int)} for adjusting a string using those attributes related to padding
- * to a string assumed to be the result of formatting to the given precision.
- * <p>
- * This structure is returned by {@link InternalFormatSpecParser#parse()} and having public members
- * is freely used by {@link InternalFormatSpecParser}, and the __format__ methods of client object
- * types.
- * <p>
- * The fields correspond to the elements of a format specification. The grammar of a format
- * specification is:
- *
- * <pre>
- * [[fill]align][sign][#][0][width][,][.precision][type]
- * </pre>
- */
-public final class InternalFormatSpec {
-
-    /** The fill specified in the grammar. */
-    public char fill_char;
-    /** Alignment indicator is 0, or one of {<code>'<', '^', '>', '='</code> . */
-    public char align;
-    /** The alternative format flag '#' was given. */
-    public boolean alternate;
-    /** Sign-handling flag, one of <code>'+'</code>, <code>'-'</code>, or <code>' '</code>. */
-    public char sign;
-    /** Width to which to pad the resault in {@link #pad(String, char, int)}. */
-    public int width = -1;
-    /** Insert the grouping separator (which in Python always indicates a group-size of 3). */
-    public boolean thousands_separators;
-    /** Precision decoded from the format. */
-    public int precision = -1;
-    /** Type key from the format. */
-    public char type;
-
-    /**
-     * Pad value, using {@link #fill_char} (or <code>' '</code>) before and after, to {@link #width}
-     * <code>-leaveWidth</code>, aligned according to {@link #align} (or according to
-     * <code>defaultAlign</code>).
-     *
-     * @param value to pad
-     * @param defaultAlign to use if <code>this.align</code>=0 (one of <code>'<'</code>,
-     *            <code>'^'</code>, <code>'>'</code>, or <code>'='</code>).
-     * @param leaveWidth to reduce effective <code>this.width</code> by
-     * @return padded value
-     */
-    public String pad(String value, char defaultAlign, int leaveWidth) {
-
-        // We'll need this many pad characters (if>0)
-        int remaining = width - value.length() - leaveWidth;
-        if (remaining <= 0) {
-            return value;
-        }
-
-        // Use this.align or defaultAlign
-        int useAlign = align;
-        if (useAlign == 0) {
-            useAlign = defaultAlign;
-        }
-
-        // By default all padding is leading padding ('<' case or '=')
-        int leading = remaining;
-        if (useAlign == '^') {
-            // Half the padding before
-            leading = remaining / 2;
-        } else if (useAlign == '<') {
-            // All the padding after
-            leading = 0;
-        }
-
-        // Now build the result
-        StringBuilder result = new StringBuilder();
-        char fill = fill_char != 0 ? fill_char : ' ';
-
-        for (int i = 0; i < leading; i++) { // before
-            result.append(fill);
-        }
-        result.append(value);
-        for (int i = 0; i < remaining - leading; i++) { // after
-            result.append(fill);
-        }
-
-        return result.toString();
-    }
-}
diff --git a/src/org/python/core/stringlib/InternalFormatSpecParser.java b/src/org/python/core/stringlib/InternalFormatSpecParser.java
deleted file mode 100644
--- a/src/org/python/core/stringlib/InternalFormatSpecParser.java
+++ /dev/null
@@ -1,118 +0,0 @@
-package org.python.core.stringlib;
-
-/**
- * Parser for PEP-3101 field format specifications. This class provides a {@link #parse()} method
- * that translates the format specification into an <code>InternalFormatSpec</code> object.
- */
-public class InternalFormatSpecParser {
-
-    private String spec;
-    private int index;
-
-    /**
-     * Constructor simply holds the specification streang ahead of the {@link #parse()} operation.
-     *
-     * @param spec format specifier to parse (e.g. "<+12.3f")
-     */
-    public InternalFormatSpecParser(String spec) {
-        this.spec = spec;
-        this.index = 0;
-    }
-
-    private static boolean isAlign(char c) {
-        switch (c) {
-            case '<':
-            case '>':
-            case '=':
-            case '^':
-                return true;
-            default:
-                return false;
-        }
-    }
-
-    /**
-     * Parse the specification with which this object was initialised into an
-     * {@link InternalFormatSpec}, which is an object encapsulating the format for use by formatting
-     * methods. This parser deals only with the format specifiers themselves, as accepted by the
-     * <code>__format__</code> method of a type, or the <code>format()</code> built-in, not format
-     * strings in general as accepted by <code>str.format()</code>. A typical idiom is:
-     *
-     * <pre>
-     * InternalFormatSpec spec = new InternalFormatSpecParser(specString).parse();
-     * </pre>
-     *
-     * @return the <code>InternalFormatSpec</code> equivalent to the constructor argument
-     */
-    /*
-     * This method is the equivalent of CPython's parse_internal_render_format_spec() in
-     * ~/Objects/stringlib/formatter.h.
-     */
-    // XXX Better encapsulated as a constructor of InternalFormatSpec?
-    public InternalFormatSpec parse() {
-        InternalFormatSpec result = new InternalFormatSpec();
-        if (spec.length() >= 1 && isAlign(spec.charAt(0))) {
-            result.align = spec.charAt(index);
-            index++;
-        } else if (spec.length() >= 2 && isAlign(spec.charAt(1))) {
-            result.fill_char = spec.charAt(0);
-            result.align = spec.charAt(1);
-            index += 2;
-        }
-        if (isAt("+- ")) {
-            result.sign = spec.charAt(index);
-            index++;
-        }
-        if (isAt("#")) {
-            result.alternate = true;
-            index++;
-        }
-        if (result.fill_char == '\0' && isAt("0")) {
-            result.fill_char = '0';
-            if (result.align == '\0') {
-                result.align = '=';
-            }
-            index++;
-        }
-        result.width = getInteger();
-        if (isAt(",")) {
-            result.thousands_separators = true;
-            index++;
-        }
-        if (isAt(".")) {
-            index++;
-            result.precision = getInteger();
-            if (result.precision == -1) {
-                throw new IllegalArgumentException("Format specifier missing precision");
-            }
-        }
-        if (index < spec.length()) {
-            result.type = spec.charAt(index);
-            if (index + 1 != spec.length()) {
-                throw new IllegalArgumentException("Invalid conversion specification");
-            }
-        }
-        if (result.thousands_separators && "defgEG%F\0".indexOf(result.type) == -1) {
-            throw new IllegalArgumentException("Cannot specify ',' with '" + result.type + "'.");
-        }
-        return result;
-    }
-
-    private int getInteger() {
-        int value = 0;
-        boolean empty = true;
-        while (index < spec.length() && spec.charAt(index) >= '0' && spec.charAt(index) <= '9') {
-            value = value * 10 + spec.charAt(index) - '0';
-            index++;
-            empty = false;
-        }
-        if (empty) {
-            return -1;
-        }
-        return value;
-    }
-
-    private boolean isAt(String chars) {
-        return index < spec.length() && chars.indexOf(spec.charAt(index)) >= 0;
-    }
-}
diff --git a/src/org/python/core/stringlib/TextFormatter.java b/src/org/python/core/stringlib/TextFormatter.java
new file mode 100644
--- /dev/null
+++ b/src/org/python/core/stringlib/TextFormatter.java
@@ -0,0 +1,119 @@
+// Copyright (c) Jython Developers
+package org.python.core.stringlib;
+
+import org.python.core.stringlib.InternalFormat.Formatter;
+import org.python.core.stringlib.InternalFormat.Spec;
+
+/**
+ * A class that provides the implementation of <code>str</code> and <code>unicode</code> formatting.
+ * In a limited way, it acts like a StringBuilder to which text, formatted according to the format
+ * specifier supplied at construction. These are ephemeral objects that are not, on their own,
+ * thread safe.
+ */
+public class TextFormatter extends InternalFormat.Formatter {
+
+    /**
+     * Construct the formatter from a client-supplied buffer, to which the result will be appended,
+     * and a specification. Sets {@link #mark} to the end of the buffer.
+     *
+     * @param result destination buffer
+     * @param spec parsed conversion specification
+     */
+    public TextFormatter(StringBuilder result, Spec spec) {
+        super(result, spec);
+    }
+
+    /**
+     * Construct the formatter from a specification, allocating a buffer internally for the result.
+     *
+     * @param spec parsed conversion specification
+     */
+    public TextFormatter(Spec spec) {
+        this(new StringBuilder(), spec);
+    }
+
+    /*
+     * Re-implement the text appends so they return the right type.
+     */
+    @Override
+    public TextFormatter append(char c) {
+        super.append(c);
+        return this;
+    }
+
+    @Override
+    public TextFormatter append(CharSequence csq) {
+        super.append(csq);
+        return this;
+    }
+
+    @Override
+    public TextFormatter append(CharSequence csq, int start, int end) //
+            throws IndexOutOfBoundsException {
+        super.append(csq, start, end);
+        return this;
+    }
+
+    /**
+     * Format the given <code>String</code> into the <code>result</code> buffer. Largely, this is a
+     * matter of copying the value of the argument, but a subtlety arises when the string contains
+     * supplementary (non-BMP) Unicode characters, which are represented as surrogate pairs. The
+     * precision specified in the format relates to a count of Unicode characters (code points), not
+     * Java <code>char</code>s. The method deals with this correctly, essentially by not counting
+     * the high-surrogates in the allowance. The final value of {@link #lenWhole} counts the UTF-16
+     * units added.
+     *
+     * @param value to format
+     * @return this <code>TextFormatter</code> object
+     */
+    public TextFormatter format(String value) {
+
+        // Scratch all instance variables and start = result.length().
+        setStart();
+
+        int p = spec.precision, n = value.length();
+
+        if (Spec.specified(p) && p < n) {
+            // Estimate the space for the converted result (preempt multiple re-allocation)
+            int space = Math.max(spec.width, p);
+            result.ensureCapacity(result.length() + space + (bytes ? 0 : space / 4));
+            /*
+             * A precision p was specified less than the length: we may have to truncate. Note we
+             * compared p with the UTF-16 length, even though it is the code point length that
+             * matters. But the code point length cannot be greater than n.
+             */
+            int count = 0;
+            while (count < p) {
+                // count is the number of UTF-16 chars.
+                char c = value.charAt(count++);
+                result.append(c);
+                // A high-surrogate will always be followed by a low, so doesn't count.
+                if (Character.isHighSurrogate(c) && p < n) {
+                    // Accomplish "not counting" by bumping the limit p, within the array bounds.
+                    p += 1;
+                }
+            }
+            // Record the UTF-16 count as the length in buffer
+            lenWhole = count;
+
+        } else {
+            // We definitely don't need to truncate. Append the whole string.
+            lenWhole = n;
+            result.append(value);
+        }
+
+        return this;
+    }
+
+    // Variant to deal with supplementary characters: other formatters don't produce them.
+    @Override
+    public TextFormatter pad() {
+        // We'll need this many pad characters (if>0). Note Spec.UNDEFINED<0.
+        int n = spec.width - result.codePointCount(mark, result.length());
+        if (n > 0) {
+            pad(mark, n);
+        }
+        return this;
+    }
+
+}
diff --git a/tests/java/org/python/core/StringFormatTest.java b/tests/java/org/python/core/StringFormatTest.java
--- a/tests/java/org/python/core/StringFormatTest.java
+++ b/tests/java/org/python/core/StringFormatTest.java
@@ -1,43 +1,58 @@
 package org.python.core;
 
+import java.math.BigInteger;
+
 import junit.framework.TestCase;
+
 import org.python.core.stringlib.FieldNameIterator;
-import org.python.core.stringlib.InternalFormatSpec;
-import org.python.core.stringlib.InternalFormatSpecParser;
+import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
 import org.python.core.stringlib.MarkupIterator;
+import org.python.core.stringlib.TextFormatter;
+import org.python.core.stringlib.InternalFormat.Spec;
+import org.python.util.PythonInterpreter;
 
 /**
  * Tests for internal bits and pieces of string.format implementation.
  */
 public class StringFormatTest extends TestCase {
+
+    /** Exception-raising seems to need the interpreter to be initialised **/
+    PythonInterpreter interp = new PythonInterpreter();
+
     public void testInternalFormatSpec() {
-        InternalFormatSpec spec = new InternalFormatSpecParser("x").parse();
+        InternalFormat.Spec spec;
+        spec = InternalFormat.fromText("x");
+        assertFalse(Spec.specified(spec.align));
+        assertFalse(Spec.specified(spec.fill));
+        assertFalse(Spec.specified(spec.width));
+        assertFalse(Spec.specified(spec.precision));
         assertEquals('x', spec.type);
 
-        spec = new InternalFormatSpecParser("<x").parse();
+        spec = InternalFormat.fromText("<x");
         assertEquals('<', spec.align);
         assertEquals('x', spec.type);
 
-        spec = new InternalFormatSpecParser("~<x").parse();
-        assertEquals('~', spec.fill_char);
+        spec = InternalFormat.fromText("~<x");
+        assertEquals('~', spec.fill);
         assertEquals('<', spec.align);
         assertEquals('x', spec.type);
 
-        spec = new InternalFormatSpecParser("+x").parse();
+        spec = InternalFormat.fromText("+x");
         assertEquals('+', spec.sign);
         assertEquals('x', spec.type);
 
-        spec = new InternalFormatSpecParser("#x").parse();
+        spec = InternalFormat.fromText("#x");
         assertEquals(true, spec.alternate);
 
-        spec = new InternalFormatSpecParser("0x").parse();
+        spec = InternalFormat.fromText("0x");
         assertEquals('=', spec.align);
-        assertEquals('0', spec.fill_char);
+        assertEquals('0', spec.fill);
 
-        spec = new InternalFormatSpecParser("123x").parse();
+        spec = InternalFormat.fromText("123x");
         assertEquals(123, spec.width);
 
-        spec = new InternalFormatSpecParser("123.456x").parse();
+        spec = InternalFormat.fromText("123.456x");
         assertEquals(123, spec.width);
         assertEquals(456, spec.precision);
 
@@ -45,105 +60,182 @@
 
         assertParseError("123xx", "Invalid conversion specification");
 
-        spec = new InternalFormatSpecParser("").parse();
-        assertEquals(0, spec.type);
+        spec = InternalFormat.fromText("");
+        assertEquals(Spec.NONE, spec.type);
     }
 
     private void assertParseError(String spec, String expected) {
         String error = null;
         try {
-            new InternalFormatSpecParser(spec).parse();
-        } catch (IllegalArgumentException e) {
-            error = e.getMessage();
+            InternalFormat.fromText(spec);
+        } catch (PyException e) {
+            assertEquals(Py.ValueError, e.type);
+            error = e.value.toString();
         }
         assertEquals(expected, error);
     }
 
-    public void testFormatIntOrLong() {
-        InternalFormatSpec spec = new InternalFormatSpec();
-        spec.type = 'd';
-        assertEquals("123", PyInteger.formatIntOrLong(123, spec));
-        spec.type = 'o';
-        assertEquals("173", PyInteger.formatIntOrLong(123, spec));
-        spec.type = 'x';
-        assertEquals("7b", PyInteger.formatIntOrLong(123, spec));
-        spec.type = 'X';
-        assertEquals("7B", PyInteger.formatIntOrLong(123, spec));
-        spec.type = 'b';
-        assertEquals("1111011", PyInteger.formatIntOrLong(123, spec));
+    /**
+     * Test the IntegerFormatter returned by {@link PyInteger#prepareFormat}. This is based on the original
+     * <code>testFormatIntOrLong</code> which tested <code>PyInteger.formatIntOrLong</code>.
+     */
+    public void testPrepareFormatter() {
+        int v = 123;
+        IntegerFormatter f;
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("d"));
+        assertEquals("123", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("o"));
+        assertEquals("173", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("x"));
+        assertEquals("7b", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("X"));
+        assertEquals("7B", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("b"));
+        assertEquals("1111011", f.format(v).pad().getResult());
 
-        spec.thousands_separators = true;
-        spec.type = 'd';
-        assertEquals("1,234", PyInteger.formatIntOrLong(1234, spec));
-        spec.thousands_separators = false;
+        int v2 = 1234567890;
+        f = PyInteger.prepareFormatter(InternalFormat.fromText(",d"));
+        assertEquals("1,234,567,890", f.format(v2).pad().getResult());
 
-        spec.alternate = true;
-        spec.type = 'o';
-        assertEquals("0o173", PyInteger.formatIntOrLong(123, spec));
-        spec.type = 'X';
-        assertEquals("0X7B", PyInteger.formatIntOrLong(123, spec));
-        spec.alternate = false;
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("#o"));
+        assertEquals("0o173", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("#X"));
+        assertEquals("0X7B", f.format(v).pad().getResult());
 
-        spec.type = 'c';
-        assertEquals("{", PyInteger.formatIntOrLong(123, spec));
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+        assertEquals("{", f.format(v).pad().getResult());
 
-        spec.type = 'd';
-        spec.sign = '+';
-        assertEquals("+123", PyInteger.formatIntOrLong(123, spec));
-        spec.sign = ' ';
-        assertEquals(" 123", PyInteger.formatIntOrLong(123, spec));
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("+d"));
+        assertEquals("+123", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText(" d"));
+        assertEquals(" 123", f.format(v).pad().getResult());
 
-        spec.sign = 0;
-        spec.width = 5;
-        assertEquals("  123", PyInteger.formatIntOrLong(123, spec));
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("5"));
+        assertEquals("  123", f.format(v).pad().getResult());
 
-        spec.align = '^';
-        spec.width = 6;
-        assertEquals(" 123  ", PyInteger.formatIntOrLong(123, spec));
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("^6"));
+        assertEquals(" 123  ", f.format(v).pad().getResult());
 
-        spec.align = '<';
-        spec.width = 5;
-        spec.fill_char = '~';
-        assertEquals("123~~", PyInteger.formatIntOrLong(123, spec));
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("~<5"));
+        assertEquals("123~~", f.format(v).pad().getResult());
 
-        spec.align = '=';
-        spec.width = 6;
-        spec.fill_char = '0';
-        spec.sign = '+';
-        assertEquals("+00123", PyInteger.formatIntOrLong(123, spec));
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("0=+6"));
+        assertEquals("+00123", f.format(v).pad().getResult());
 
-        spec.precision = 1;
-        assertFormatError(123, spec, "Precision not allowed in integer format specifier");
+        assertValueError("0=+6.1", "Precision not allowed in integer format specifier");
+        assertValueError("+c", "Sign not allowed with integer format specifier 'c'");
 
-        spec.precision = -1;
-        spec.sign = '+';
-        spec.type = 'c';
-        assertFormatError(123, spec, "Sign not allowed with integer format specifier 'c'");
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+        f.setBytes(true);
+        assertOverflowError(256, f, "%c arg not in range(0x100)");
+        assertOverflowError(-1, f, "%c arg not in range(0x100)");
+        assertOverflowError(0x110000, f, "%c arg not in range(0x100)");
 
-        spec.sign = 0;
-        assertFormatError(0x11111, spec, "%c arg not in range(0x10000)");
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+        assertOverflowError(0x110000, f, "%c arg not in range(0x110000)");
+        assertOverflowError(-1, f, "%c arg not in range(0x110000)");
+    }
+
+    /**
+     * Test the IntegerFormatter returned by {@link PyInteger#prepareFormat}. This is based on the original
+     * <code>testFormatIntOrLong</code> which tested <code>PyInteger.formatIntOrLong</code>.
+     */
+    public void testPrepareFormatterLong() {
+        BigInteger v = BigInteger.valueOf(123);
+        IntegerFormatter f;
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("d"));
+        assertEquals("123", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("o"));
+        assertEquals("173", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("x"));
+        assertEquals("7b", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("X"));
+        assertEquals("7B", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("b"));
+        assertEquals("1111011", f.format(v).pad().getResult());
+
+        BigInteger v2 = BigInteger.valueOf(1234567890);
+        f = PyInteger.prepareFormatter(InternalFormat.fromText(",d"));
+        assertEquals("1,234,567,890", f.format(v2).pad().getResult());
+
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("#o"));
+        assertEquals("0o173", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("#X"));
+        assertEquals("0X7B", f.format(v).pad().getResult());
+
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+        assertEquals("{", f.format(v).pad().getResult());
+
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("+d"));
+        assertEquals("+123", f.format(v).pad().getResult());
+        f = PyInteger.prepareFormatter(InternalFormat.fromText(" d"));
+        assertEquals(" 123", f.format(v).pad().getResult());
+
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("5"));
+        assertEquals("  123", f.format(v).pad().getResult());
+
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("^6"));
+        assertEquals(" 123  ", f.format(v).pad().getResult());
+
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("~<5"));
+        assertEquals("123~~", f.format(v).pad().getResult());
+
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("0=+6"));
+        assertEquals("+00123", f.format(v).pad().getResult());
+
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+        f.setBytes(true);
+        assertOverflowError(BigInteger.valueOf(256), f, "%c arg not in range(0x100)");
+        assertOverflowError(BigInteger.valueOf(-1), f, "%c arg not in range(0x100)");
+        assertOverflowError(BigInteger.valueOf(0x110000), f, "%c arg not in range(0x100)");
+
+        f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+        assertOverflowError(BigInteger.valueOf(0x110000), f, "%c arg not in range(0x110000)");
+        assertOverflowError(BigInteger.valueOf(-1), f, "%c arg not in range(0x110000)");
+    }
+
+    private void assertValueError(String formatSpec, String expected) {
+        try {
+            IntegerFormatter f = PyInteger.prepareFormatter(InternalFormat.fromText(formatSpec));
+            // f.format(123).pad().getResult();
+            fail("ValueError not thrown, expected: " + expected);
+        } catch (PyException pye) {
+            assertEquals(expected, pye.value.toString());
+        }
+    }
+
+    private void assertOverflowError(int v, IntegerFormatter f, String expected) {
+        // Test with Java int for PyInteger
+        try {
+            f.format(v).pad().getResult();
+            fail("OverflowError not thrown, expected: " + expected);
+        } catch (PyException pye) {
+            assertEquals(expected, pye.value.toString());
+        }
+    }
+
+    private void assertOverflowError(BigInteger v, IntegerFormatter f, String expected) {
+        // Test with BigInteger for PyLong
+        try {
+            f.format(v).pad().getResult();
+            fail("OverflowError not thrown, expected: " + expected);
+        } catch (PyException pye) {
+            assertEquals(expected, pye.value.toString());
+        }
     }
 
     public void testFormatString() {
-        InternalFormatSpec spec = new InternalFormatSpec();
-        assertEquals("abc", PyString.formatString("abc", spec));
+        String v = "abc";
+        TextFormatter f;
+        f = PyString.prepareFormatter(InternalFormat.fromText(""));
+        assertEquals("abc", f.format(v).pad().getResult());
 
-        spec.precision = 3;
-        assertEquals("abc", PyString.formatString("abcdef", spec));
+        String v2 = "abcdef";
+        f = PyString.prepareFormatter(InternalFormat.fromText(".3"));
+        assertEquals("abc", f.format(v2).pad().getResult());
 
-        spec.precision = -1;
-        spec.width = 6;
-        assertEquals("abc   ", PyString.formatString("abc", spec));
-    }
-
-    private void assertFormatError(int value, InternalFormatSpec spec, String expected) {
-        String error = null;
-        try {
-            PyInteger.formatIntOrLong(value, spec);
-        } catch (IllegalArgumentException e) {
-            error = e.getMessage();
-        }
-        assertEquals(expected, error);
+        f = PyString.prepareFormatter(InternalFormat.fromText("6"));
+        assertEquals("abc   ", f.format(v).pad().getResult());
     }
 
     public void testMarkupIterator() {

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list