[Jython-checkins] jython (merge default -> default): Merge of formatting work to trunk
jeff.allen
jython-checkins at python.org
Sun Jun 8 14:13:11 CEST 2014
http://hg.python.org/jython/rev/6cee6fef06f0
changeset: 7287:6cee6fef06f0
parent: 7278:9cd9ab75eade
parent: 7286:234d1492dde4
user: Jeff Allen <ja.py at farowl.co.uk>
date: Sun Jun 08 10:03:49 2014 +0100
summary:
Merge of formatting work to trunk
files:
Lib/test/test_builtin.py | 13 +-
Lib/test/test_format.py | 473 +++--
Lib/test/test_format_jy.py | 69 +-
Lib/test/test_types.py | 122 +-
Lib/test/test_unicode.py | 7 +-
src/org/python/core/PyComplex.java | 112 +-
src/org/python/core/PyFloat.java | 77 +-
src/org/python/core/PyInteger.java | 349 +---
src/org/python/core/PyLong.java | 178 +-
src/org/python/core/PyString.java | 811 +++------
src/org/python/core/__builtin__.java | 20 +-
src/org/python/core/stringlib/FloatFormatter.java | 69 +-
src/org/python/core/stringlib/IntegerFormatter.java | 779 +++++++++
src/org/python/core/stringlib/InternalFormat.java | 360 +++-
src/org/python/core/stringlib/InternalFormatSpec.java | 88 -
src/org/python/core/stringlib/InternalFormatSpecParser.java | 118 -
src/org/python/core/stringlib/TextFormatter.java | 119 +
tests/java/org/python/core/StringFormatTest.java | 264 ++-
18 files changed, 2424 insertions(+), 1604 deletions(-)
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -361,8 +361,7 @@
self.assertEqual(eval('a', g, m), 12)
self.assertRaises(NameError, eval, 'b', g, m)
self.assertEqual(eval('dir()', g, m), list('xyz'))
- if not is_jython: #FIXME #1861
- self.assertEqual(eval('globals()', g, m), g)
+ self.assertEqual(eval('globals()', g, m), g)
self.assertEqual(eval('locals()', g, m), m)
# Jython allows arbitrary mappings for globals
@@ -386,8 +385,7 @@
self.assertEqual(eval('a', g, d), 12)
self.assertRaises(NameError, eval, 'b', g, d)
self.assertEqual(eval('dir()', g, d), list('xyz'))
- if not is_jython: #FIXME #1861
- self.assertEqual(eval('globals()', g, d), g)
+ self.assertEqual(eval('globals()', g, d), g)
self.assertEqual(eval('locals()', g, d), d)
# Verify locals stores (used by list comps)
@@ -1320,7 +1318,6 @@
self.assertRaises(TypeError, round, t)
self.assertRaises(TypeError, round, t, 0)
- @unittest.skipIf(is_jython, "FIXME #1861: not working in Jython")
def test_round_large(self):
# Issue #1869: integral floats should remain unchanged
self.assertEqual(round(5e15-1), 5e15-1)
@@ -1387,7 +1384,6 @@
b = 2
return vars()
- @unittest.skipIf(is_jython, "FIXME #1861: not working in Jython")
def test_vars(self):
self.assertEqual(set(vars()), set(dir()))
import sys
@@ -1491,9 +1487,8 @@
self.assertEqual(format(DerivedFromSimple2(10), 'abcdef'),
'10abcdef')
- if not is_jython: #FIXME #1861 check again when __format__ works better.
- class_test(*classes_new())
- class_test(*classes_classic())
+ class_test(*classes_new())
+ class_test(*classes_classic())
def empty_format_spec(value):
# test that:
diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py
--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -1,14 +1,17 @@
-from test.test_support import verbose, have_unicode, TestFailed, is_jython
import sys
+from test.test_support import verbose, have_unicode, TestFailed
+from test.test_support import is_jython
+import test.test_support as test_support
+import unittest
+
+maxsize = test_support.MAX_Py_ssize_t
# test string formatting operator (I am not sure if this is being tested
# elsewhere but, surely, some of the given cases are *not* tested because
# they crash python)
# test on unicode strings as well
-overflowok = 1
-
-def testformat(formatstr, args, output=None):
+def testformat(formatstr, args, output=None, limit=None, overflowok=False):
if verbose:
if output:
print "%s %% %s =? %s ..." %\
@@ -23,231 +26,289 @@
if verbose:
print 'overflow (this is fine)'
else:
- if output and result != output:
+ if output and limit is None and result != output:
if verbose:
print 'no'
- print "%s %% %s == %s != %s" %\
- (repr(formatstr), repr(args), repr(result), repr(output))
+ raise AssertionError("%r %% %r == %r != %r" %
+ (formatstr, args, result, output))
+ # when 'limit' is specified, it determines how many characters
+ # must match exactly; lengths must always match.
+ # ex: limit=5, '12345678' matches '12345___'
+ # (mainly for floating point format tests for which an exact match
+ # can't be guaranteed due to rounding and representation errors)
+ elif output and limit is not None and (
+ len(result)!=len(output) or result[:limit]!=output[:limit]):
+ if verbose:
+ print 'no'
+ print "%s %% %s == %s != %s" % \
+ (repr(formatstr), repr(args), repr(result), repr(output))
else:
if verbose:
print 'yes'
-def testboth(formatstr, *args):
- testformat(formatstr, *args)
+
+def testboth(formatstr, *args, **kwargs):
+ testformat(formatstr, *args, **kwargs)
if have_unicode:
- testformat(unicode(formatstr), *args)
+ testformat(unicode(formatstr), *args, **kwargs)
-testboth("%.1d", (1,), "1")
-testboth("%.*d", (sys.maxint,1)) # expect overflow
-testboth("%.100d", (1,), '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001')
-testboth("%#.117x", (1,), '0x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001')
-testboth("%#.118x", (1,), '0x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001')
+class FormatTest(unittest.TestCase):
+ def test_format(self):
+ testboth("%.1d", (1,), "1")
+ testboth("%.*d", (sys.maxint,1), overflowok=True) # expect overflow
+ testboth("%.100d", (1,), '00000000000000000000000000000000000000'
+ '000000000000000000000000000000000000000000000000000000'
+ '00000001', overflowok=True)
+ testboth("%#.117x", (1,), '0x00000000000000000000000000000000000'
+ '000000000000000000000000000000000000000000000000000000'
+ '0000000000000000000000000001',
+ overflowok=True)
+ testboth("%#.118x", (1,), '0x00000000000000000000000000000000000'
+ '000000000000000000000000000000000000000000000000000000'
+ '00000000000000000000000000001',
+ overflowok=True)
-testboth("%f", (1.0,), "1.000000")
-# these are trying to test the limits of the internal magic-number-length
-# formatting buffer, if that number changes then these tests are less
-# effective
-testboth("%#.*g", (109, -1.e+49/3.))
-testboth("%#.*g", (110, -1.e+49/3.))
-testboth("%#.*g", (110, -1.e+100/3.))
+ testboth("%f", (1.0,), "1.000000")
+ # these are trying to test the limits of the internal magic-number-length
+ # formatting buffer, if that number changes then these tests are less
+ # effective
+ testboth("%#.*g", (109, -1.e+49/3.))
+ testboth("%#.*g", (110, -1.e+49/3.))
+ testboth("%#.*g", (110, -1.e+100/3.))
-# test some ridiculously large precision, expect overflow
-testboth('%12.*f', (123456, 1.0))
+ # test some ridiculously large precision, expect overflow
+ # ... Jython remains consistent with the original comment.
+ testboth('%12.*f', (123456, 1.0), overflowok=is_jython)
-# Formatting of long integers. Overflow is not ok
-overflowok = 0
-testboth("%x", 10L, "a")
-testboth("%x", 100000000000L, "174876e800")
-testboth("%o", 10L, "12")
-testboth("%o", 100000000000L, "1351035564000")
-testboth("%d", 10L, "10")
-testboth("%d", 100000000000L, "100000000000")
+ # check for internal overflow validation on length of precision
+ # these tests should no longer cause overflow in Python
+ # 2.7/3.1 and later.
+ testboth("%#.*g", (110, -1.e+100/3.))
+ testboth("%#.*G", (110, -1.e+100/3.))
+ testboth("%#.*f", (110, -1.e+100/3.))
+ testboth("%#.*F", (110, -1.e+100/3.))
-big = 123456789012345678901234567890L
-testboth("%d", big, "123456789012345678901234567890")
-testboth("%d", -big, "-123456789012345678901234567890")
-testboth("%5d", -big, "-123456789012345678901234567890")
-testboth("%31d", -big, "-123456789012345678901234567890")
-testboth("%32d", -big, " -123456789012345678901234567890")
-testboth("%-32d", -big, "-123456789012345678901234567890 ")
-testboth("%032d", -big, "-0123456789012345678901234567890")
-testboth("%-032d", -big, "-123456789012345678901234567890 ")
-testboth("%034d", -big, "-000123456789012345678901234567890")
-testboth("%034d", big, "0000123456789012345678901234567890")
-testboth("%0+34d", big, "+000123456789012345678901234567890")
-testboth("%+34d", big, " +123456789012345678901234567890")
-testboth("%34d", big, " 123456789012345678901234567890")
-testboth("%.2d", big, "123456789012345678901234567890")
-testboth("%.30d", big, "123456789012345678901234567890")
-testboth("%.31d", big, "0123456789012345678901234567890")
-testboth("%32.31d", big, " 0123456789012345678901234567890")
+ # Formatting of long integers. Overflow is not ok
+ testboth("%x", 10L, "a")
+ testboth("%x", 100000000000L, "174876e800")
+ testboth("%o", 10L, "12")
+ testboth("%o", 100000000000L, "1351035564000")
+ testboth("%d", 10L, "10")
+ testboth("%d", 100000000000L, "100000000000")
-big = 0x1234567890abcdef12345L # 21 hex digits
-testboth("%x", big, "1234567890abcdef12345")
-testboth("%x", -big, "-1234567890abcdef12345")
-testboth("%5x", -big, "-1234567890abcdef12345")
-testboth("%22x", -big, "-1234567890abcdef12345")
-testboth("%23x", -big, " -1234567890abcdef12345")
-testboth("%-23x", -big, "-1234567890abcdef12345 ")
-testboth("%023x", -big, "-01234567890abcdef12345")
-testboth("%-023x", -big, "-1234567890abcdef12345 ")
-testboth("%025x", -big, "-0001234567890abcdef12345")
-testboth("%025x", big, "00001234567890abcdef12345")
-testboth("%0+25x", big, "+0001234567890abcdef12345")
-testboth("%+25x", big, " +1234567890abcdef12345")
-testboth("%25x", big, " 1234567890abcdef12345")
-testboth("%.2x", big, "1234567890abcdef12345")
-testboth("%.21x", big, "1234567890abcdef12345")
-testboth("%.22x", big, "01234567890abcdef12345")
-testboth("%23.22x", big, " 01234567890abcdef12345")
-testboth("%-23.22x", big, "01234567890abcdef12345 ")
-testboth("%X", big, "1234567890ABCDEF12345")
-testboth("%#X", big, "0X1234567890ABCDEF12345")
-testboth("%#x", big, "0x1234567890abcdef12345")
-testboth("%#x", -big, "-0x1234567890abcdef12345")
-testboth("%#.23x", -big, "-0x001234567890abcdef12345")
-testboth("%#+.23x", big, "+0x001234567890abcdef12345")
-testboth("%# .23x", big, " 0x001234567890abcdef12345")
-testboth("%#+.23X", big, "+0X001234567890ABCDEF12345")
-testboth("%#-+.23X", big, "+0X001234567890ABCDEF12345")
-testboth("%#-+26.23X", big, "+0X001234567890ABCDEF12345")
-testboth("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ")
-testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345")
-# next one gets two leading zeroes from precision, and another from the
-# 0 flag and the width
-testboth("%#+027.23X", big, "+0X0001234567890ABCDEF12345")
-# same, except no 0 flag
-testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345")
+ big = 123456789012345678901234567890L
+ testboth("%d", big, "123456789012345678901234567890")
+ testboth("%d", -big, "-123456789012345678901234567890")
+ testboth("%5d", -big, "-123456789012345678901234567890")
+ testboth("%31d", -big, "-123456789012345678901234567890")
+ testboth("%32d", -big, " -123456789012345678901234567890")
+ testboth("%-32d", -big, "-123456789012345678901234567890 ")
+ testboth("%032d", -big, "-0123456789012345678901234567890")
+ testboth("%-032d", -big, "-123456789012345678901234567890 ")
+ testboth("%034d", -big, "-000123456789012345678901234567890")
+ testboth("%034d", big, "0000123456789012345678901234567890")
+ testboth("%0+34d", big, "+000123456789012345678901234567890")
+ testboth("%+34d", big, " +123456789012345678901234567890")
+ testboth("%34d", big, " 123456789012345678901234567890")
+ testboth("%.2d", big, "123456789012345678901234567890")
+ testboth("%.30d", big, "123456789012345678901234567890")
+ testboth("%.31d", big, "0123456789012345678901234567890")
+ testboth("%32.31d", big, " 0123456789012345678901234567890")
+ testboth("%d", float(big), "123456________________________", 6)
-big = 012345670123456701234567012345670L # 32 octal digits
-testboth("%o", big, "12345670123456701234567012345670")
-testboth("%o", -big, "-12345670123456701234567012345670")
-testboth("%5o", -big, "-12345670123456701234567012345670")
-testboth("%33o", -big, "-12345670123456701234567012345670")
-testboth("%34o", -big, " -12345670123456701234567012345670")
-testboth("%-34o", -big, "-12345670123456701234567012345670 ")
-testboth("%034o", -big, "-012345670123456701234567012345670")
-testboth("%-034o", -big, "-12345670123456701234567012345670 ")
-testboth("%036o", -big, "-00012345670123456701234567012345670")
-testboth("%036o", big, "000012345670123456701234567012345670")
-testboth("%0+36o", big, "+00012345670123456701234567012345670")
-testboth("%+36o", big, " +12345670123456701234567012345670")
-testboth("%36o", big, " 12345670123456701234567012345670")
-testboth("%.2o", big, "12345670123456701234567012345670")
-testboth("%.32o", big, "12345670123456701234567012345670")
-testboth("%.33o", big, "012345670123456701234567012345670")
-testboth("%34.33o", big, " 012345670123456701234567012345670")
-testboth("%-34.33o", big, "012345670123456701234567012345670 ")
-testboth("%o", big, "12345670123456701234567012345670")
-testboth("%#o", big, "012345670123456701234567012345670")
-testboth("%#o", -big, "-012345670123456701234567012345670")
-testboth("%#.34o", -big, "-0012345670123456701234567012345670")
-testboth("%#+.34o", big, "+0012345670123456701234567012345670")
-testboth("%# .34o", big, " 0012345670123456701234567012345670")
-testboth("%#+.34o", big, "+0012345670123456701234567012345670")
-testboth("%#-+.34o", big, "+0012345670123456701234567012345670")
-testboth("%#-+37.34o", big, "+0012345670123456701234567012345670 ")
-testboth("%#+37.34o", big, " +0012345670123456701234567012345670")
-# next one gets one leading zero from precision
-testboth("%.33o", big, "012345670123456701234567012345670")
-# base marker shouldn't change that, since "0" is redundant
-testboth("%#.33o", big, "012345670123456701234567012345670")
-# but reduce precision, and base marker should add a zero
-testboth("%#.32o", big, "012345670123456701234567012345670")
-# one leading zero from precision, and another from "0" flag & width
-testboth("%034.33o", big, "0012345670123456701234567012345670")
-# base marker shouldn't change that
-testboth("%0#34.33o", big, "0012345670123456701234567012345670")
+ big = 0x1234567890abcdef12345L # 21 hex digits
+ testboth("%x", big, "1234567890abcdef12345")
+ testboth("%x", -big, "-1234567890abcdef12345")
+ testboth("%5x", -big, "-1234567890abcdef12345")
+ testboth("%22x", -big, "-1234567890abcdef12345")
+ testboth("%23x", -big, " -1234567890abcdef12345")
+ testboth("%-23x", -big, "-1234567890abcdef12345 ")
+ testboth("%023x", -big, "-01234567890abcdef12345")
+ testboth("%-023x", -big, "-1234567890abcdef12345 ")
+ testboth("%025x", -big, "-0001234567890abcdef12345")
+ testboth("%025x", big, "00001234567890abcdef12345")
+ testboth("%0+25x", big, "+0001234567890abcdef12345")
+ testboth("%+25x", big, " +1234567890abcdef12345")
+ testboth("%25x", big, " 1234567890abcdef12345")
+ testboth("%.2x", big, "1234567890abcdef12345")
+ testboth("%.21x", big, "1234567890abcdef12345")
+ testboth("%.22x", big, "01234567890abcdef12345")
+ testboth("%23.22x", big, " 01234567890abcdef12345")
+ testboth("%-23.22x", big, "01234567890abcdef12345 ")
+ testboth("%X", big, "1234567890ABCDEF12345")
+ testboth("%#X", big, "0X1234567890ABCDEF12345")
+ testboth("%#x", big, "0x1234567890abcdef12345")
+ testboth("%#x", -big, "-0x1234567890abcdef12345")
+ testboth("%#.23x", -big, "-0x001234567890abcdef12345")
+ testboth("%#+.23x", big, "+0x001234567890abcdef12345")
+ testboth("%# .23x", big, " 0x001234567890abcdef12345")
+ testboth("%#+.23X", big, "+0X001234567890ABCDEF12345")
+ testboth("%#-+.23X", big, "+0X001234567890ABCDEF12345")
+ testboth("%#-+26.23X", big, "+0X001234567890ABCDEF12345")
+ testboth("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ")
+ testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345")
+ # next one gets two leading zeroes from precision, and another from the
+ # 0 flag and the width
+ testboth("%#+027.23X", big, "+0X0001234567890ABCDEF12345")
+ # same, except no 0 flag
+ testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345")
+ testboth("%x", float(big), "123456_______________", 6)
-# Some small ints, in both Python int and long flavors).
-testboth("%d", 42, "42")
-testboth("%d", -42, "-42")
-testboth("%d", 42L, "42")
-testboth("%d", -42L, "-42")
-testboth("%#x", 1, "0x1")
-testboth("%#x", 1L, "0x1")
-testboth("%#X", 1, "0X1")
-testboth("%#X", 1L, "0X1")
-testboth("%#o", 1, "01")
-testboth("%#o", 1L, "01")
-testboth("%#o", 0, "0")
-testboth("%#o", 0L, "0")
-testboth("%o", 0, "0")
-testboth("%o", 0L, "0")
-testboth("%d", 0, "0")
-testboth("%d", 0L, "0")
-testboth("%#x", 0, "0x0")
-testboth("%#x", 0L, "0x0")
-testboth("%#X", 0, "0X0")
-testboth("%#X", 0L, "0X0")
+ big = 012345670123456701234567012345670L # 32 octal digits
+ testboth("%o", big, "12345670123456701234567012345670")
+ testboth("%o", -big, "-12345670123456701234567012345670")
+ testboth("%5o", -big, "-12345670123456701234567012345670")
+ testboth("%33o", -big, "-12345670123456701234567012345670")
+ testboth("%34o", -big, " -12345670123456701234567012345670")
+ testboth("%-34o", -big, "-12345670123456701234567012345670 ")
+ testboth("%034o", -big, "-012345670123456701234567012345670")
+ testboth("%-034o", -big, "-12345670123456701234567012345670 ")
+ testboth("%036o", -big, "-00012345670123456701234567012345670")
+ testboth("%036o", big, "000012345670123456701234567012345670")
+ testboth("%0+36o", big, "+00012345670123456701234567012345670")
+ testboth("%+36o", big, " +12345670123456701234567012345670")
+ testboth("%36o", big, " 12345670123456701234567012345670")
+ testboth("%.2o", big, "12345670123456701234567012345670")
+ testboth("%.32o", big, "12345670123456701234567012345670")
+ testboth("%.33o", big, "012345670123456701234567012345670")
+ testboth("%34.33o", big, " 012345670123456701234567012345670")
+ testboth("%-34.33o", big, "012345670123456701234567012345670 ")
+ testboth("%o", big, "12345670123456701234567012345670")
+ testboth("%#o", big, "012345670123456701234567012345670")
+ testboth("%#o", -big, "-012345670123456701234567012345670")
+ testboth("%#.34o", -big, "-0012345670123456701234567012345670")
+ testboth("%#+.34o", big, "+0012345670123456701234567012345670")
+ testboth("%# .34o", big, " 0012345670123456701234567012345670")
+ testboth("%#+.34o", big, "+0012345670123456701234567012345670")
+ testboth("%#-+.34o", big, "+0012345670123456701234567012345670")
+ testboth("%#-+37.34o", big, "+0012345670123456701234567012345670 ")
+ testboth("%#+37.34o", big, " +0012345670123456701234567012345670")
+ # next one gets one leading zero from precision
+ testboth("%.33o", big, "012345670123456701234567012345670")
+ # base marker shouldn't change that, since "0" is redundant
+ testboth("%#.33o", big, "012345670123456701234567012345670")
+ # but reduce precision, and base marker should add a zero
+ testboth("%#.32o", big, "012345670123456701234567012345670")
+ # one leading zero from precision, and another from "0" flag & width
+ testboth("%034.33o", big, "0012345670123456701234567012345670")
+ # base marker shouldn't change that
+ testboth("%0#34.33o", big, "0012345670123456701234567012345670")
+ testboth("%o", float(big), "123456__________________________", 6)
-testboth("%x", 0x42, "42")
-testboth("%x", -0x42, "-42")
-testboth("%x", 0x42L, "42")
-testboth("%x", -0x42L, "-42")
+ # Some small ints, in both Python int and long flavors).
+ testboth("%d", 42, "42")
+ testboth("%d", -42, "-42")
+ testboth("%d", 42L, "42")
+ testboth("%d", -42L, "-42")
+ testboth("%d", 42.0, "42")
+ testboth("%#x", 1, "0x1")
+ testboth("%#x", 1L, "0x1")
+ testboth("%#X", 1, "0X1")
+ testboth("%#X", 1L, "0X1")
+ testboth("%#x", 1.0, "0x1")
+ testboth("%#o", 1, "01")
+ testboth("%#o", 1L, "01")
+ testboth("%#o", 0, "0")
+ testboth("%#o", 0L, "0")
+ testboth("%o", 0, "0")
+ testboth("%o", 0L, "0")
+ testboth("%d", 0, "0")
+ testboth("%d", 0L, "0")
+ testboth("%#x", 0, "0x0")
+ testboth("%#x", 0L, "0x0")
+ testboth("%#X", 0, "0X0")
+ testboth("%#X", 0L, "0X0")
-testboth("%o", 042, "42")
-testboth("%o", -042, "-42")
-testboth("%o", 042L, "42")
-testboth("%o", -042L, "-42")
+ testboth("%x", 0x42, "42")
+ testboth("%x", -0x42, "-42")
+ testboth("%x", 0x42L, "42")
+ testboth("%x", -0x42L, "-42")
+ testboth("%x", float(0x42), "42")
-# Test exception for unknown format characters
-if verbose:
- print 'Testing exceptions'
+ testboth("%o", 042, "42")
+ testboth("%o", -042, "-42")
+ testboth("%o", 042L, "42")
+ testboth("%o", -042L, "-42")
+ testboth("%o", float(042), "42")
-def test_exc(formatstr, args, exception, excmsg):
- try:
- testformat(formatstr, args)
- except exception, exc:
- if str(exc) == excmsg:
- if verbose:
- print "yes"
- else:
- if verbose: print 'no'
- print 'Unexpected ', exception, ':', repr(str(exc))
- except:
- if verbose: print 'no'
- print 'Unexpected exception'
- raise
- else:
- raise TestFailed, 'did not get expected exception: %s' % excmsg
+ # alternate float formatting
+ testformat('%g', 1.1, '1.1')
+ testformat('%#g', 1.1, '1.10000')
-test_exc('abc %a', 1, ValueError,
- "unsupported format character 'a' (0x61) at index 5")
-if have_unicode:
- test_exc(unicode('abc %\u3000','raw-unicode-escape'), 1, ValueError,
- "unsupported format character '?' (0x3000) at index 5")
+ # Regression test for http://bugs.python.org/issue15516.
+ class IntFails(object):
+ def __int__(self):
+ raise TestFailed
+ def __long__(self):
+ return 0
-test_exc('%d', '1', TypeError, "int argument required")
-test_exc('%g', '1', TypeError, "float argument required")
-test_exc('no format', '1', TypeError,
- "not all arguments converted during string formatting")
-test_exc('no format', u'1', TypeError,
- "not all arguments converted during string formatting")
-test_exc(u'no format', '1', TypeError,
- "not all arguments converted during string formatting")
-test_exc(u'no format', u'1', TypeError,
- "not all arguments converted during string formatting")
+ fst = IntFails()
+ testformat("%x", fst, '0')
-# for Jython, do we really need to support this? what's the use case
-# here! the problem in a nutshell is that it changes __oct__, __hex__
-# such that they don't return a string, but later on the exception
-# will occur anyway. so seems like a lot of work for no value
+ # Test exception for unknown format characters
+ if verbose:
+ print 'Testing exceptions'
-# class Foobar(long):
-# def __oct__(self):
-# # Returning a non-string should not blow up.
-# return self + 1
+ def test_exc(formatstr, args, exception, excmsg):
+ try:
+ testformat(formatstr, args)
+ except exception, exc:
+ if str(exc) == excmsg:
+ if verbose:
+ print "yes"
+ else:
+ if verbose: print 'no'
+ print 'Unexpected ', exception, ':', repr(str(exc))
+ except:
+ if verbose: print 'no'
+ print 'Unexpected exception'
+ raise
+ else:
+ raise TestFailed, 'did not get expected exception: %s' % excmsg
-#test_exc('%o', Foobar(), TypeError,
-# "expected string or Unicode object, long found")
+ test_exc('abc %a', 1, ValueError,
+ "unsupported format character 'a' (0x61) at index 5")
+ if have_unicode:
+ test_exc(unicode('abc %\u3000','raw-unicode-escape'), 1, ValueError,
+ "unsupported format character '?' (0x3000) at index 5")
-if sys.maxint == 2**31-1 and not is_jython:
- # crashes 2.2.1 and earlier:
- try:
- "%*d"%(sys.maxint, -127)
- except MemoryError:
- pass
- else:
- raise TestFailed, '"%*d"%(sys.maxint, -127) should fail'
+ test_exc('%d', '1', TypeError, "%d format: a number is required, not str")
+ test_exc('%g', '1', TypeError, "float argument required, not str")
+ test_exc('no format', '1', TypeError,
+ "not all arguments converted during string formatting")
+ test_exc('no format', u'1', TypeError,
+ "not all arguments converted during string formatting")
+ test_exc(u'no format', '1', TypeError,
+ "not all arguments converted during string formatting")
+ test_exc(u'no format', u'1', TypeError,
+ "not all arguments converted during string formatting")
+
+ # For Jython, we do not support this use case. The test aims at the,
+ # use of __oct__ within %o formatting of long. (Or __hex__ within %x
+ # formatting?) CPython does this for long (not int) and has dropped
+ # the idea again by v3. Jython's %o and %x are likewise direct.
+ class Foobar(long):
+ def __oct__(self):
+ # Returning a non-string should not blow up.
+ return self + 1
+
+ if not is_jython :
+ test_exc('%o', Foobar(), TypeError,
+ "expected string or Unicode object, long found")
+
+ if maxsize == 2**31-1:
+ # crashes 2.2.1 and earlier:
+ try:
+ "%*d"%(maxsize, -127)
+ except MemoryError:
+ pass
+ else:
+ raise TestFailed, '"%*d"%(maxsize, -127) should fail'
+
+def test_main():
+ test_support.run_unittest(FormatTest)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/Lib/test/test_format_jy.py b/Lib/test/test_format_jy.py
--- a/Lib/test/test_format_jy.py
+++ b/Lib/test/test_format_jy.py
@@ -5,8 +5,9 @@
from test import test_support
import unittest
-class FormatTestCase(unittest.TestCase):
- # Tests that %d converts values for custom classes implementing __int__
+class FormatSubclass(unittest.TestCase):
+ # Custom __int__ and __float__ should be respected by %-formatting
+
def test_int_conversion_support(self):
class Foo(object):
def __init__(self, x): self.x = x
@@ -21,9 +22,71 @@
def __float__(self): return self. x
self.assertEqual('1.0', '%.1f' % Foo(1.0))
+class FormatUnicodeBase(unittest.TestCase):
+
+ # Test padding non-BMP result
+ def test_pad_string(self):
+ self.padcheck(u"architect")
+ self.padcheck(u'a\U00010001cde')
+
+class FormatUnicodeClassic(FormatUnicodeBase):
+ # Check using %-formatting
+
+ def padcheck(self, s):
+ self.assertEqual(10, len('%10.4s' % s))
+ self.assertEqual(u' '*6 + s[0:4], '%10.4s' % s)
+ self.assertEqual(u' '*6 + s[0:4], '% 10.4s' % s)
+ self.assertEqual(u' '*6 + s[0:4], '%010.4s' % s)
+ self.assertEqual(s[0:3] + u' '*5, '%-8.3s' % s)
+
+class FormatUnicodeModern(FormatUnicodeBase):
+ # Check using __format__
+
+ def padcheck(self, s):
+ self.assertEqual(10, len(format(s, '10.4s')))
+ self.assertEqual(s[0:3] + u' '*7, format(s, '10.3s'))
+ self.assertEqual(s[0:3] + u'~'*7, format(s, '~<10.3s'))
+ self.assertEqual(s[0:3] + u'~'*7, format(s, '~<10.3'))
+ self.assertEqual(u' '*6 + s[0:4], format(s, '>10.4s'))
+ self.assertEqual(u'*'*6 + s[0:4], format(s, '*>10.4s'))
+ self.assertEqual(u'*'*6 + s[0:4], format(s, '*>10.4'))
+
+
+class FormatMisc(unittest.TestCase):
+ # Odd tests Jython used to fail
+
+ def test_mixtures(self) :
+ # Check formatting to a common buffer in PyString
+ result = 'The cube of 0.5 -0.866j is -1 to 0.01%.'
+ self.assertEqual(result, 'The %s of %.3g -%.3fj is -%d to %.2f%%.' %
+ ('cube', 0.5, 0.866, 1, 0.01))
+ self.assertEqual(result, 'The %s of %.3g %.3fj is %d to %.2f%%.' %
+ ('cube', 0.5, -0.866, -1, 0.01))
+ self.assertEqual(result, 'The%5s of%4.3g%7.3fj is%3d to%5.2f%%.' %
+ ('cube', 0.5, -0.866, -1, 0.01))
+ self.assertEqual(result, 'The %-5.4sof %-4.3g%.3fj is %-3dto %.4g%%.' %
+ ('cubensis', 0.5, -0.866, -1, 0.01))
+
+ def test_percent_padded(self) :
+ self.assertEqual('%hello', '%%%s' % 'hello')
+ self.assertEqual(u' %hello', '%6%%s' % u'hello')
+ self.assertEqual(u'% hello', u'%-6%%s' % 'hello')
+
+ self.assertEqual(' %', '%6%' % ())
+ self.assertEqual(' %', '%06%' % ())
+ self.assertEqual(' %', '%*%' % 4)
+ self.assertEqual('% ', '%-6%' % ())
+ self.assertEqual('% ', '%-06%' % ())
+ self.assertEqual('% ', '%*%' % -4)
+
def test_main():
- test_support.run_unittest(FormatTestCase)
+ test_support.run_unittest(
+ FormatSubclass,
+ FormatUnicodeClassic,
+ FormatUnicodeModern,
+ FormatMisc,
+ )
if __name__ == '__main__':
test_main()
diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -1,7 +1,9 @@
# Python test set -- part 6, built-in types
from test.test_support import run_unittest, have_unicode, run_with_locale, \
- check_py3k_warnings, is_jython
+ check_py3k_warnings
+from test.test_support import is_jython
+
import unittest
import sys
import locale
@@ -90,7 +92,6 @@
if float(1) == 1.0 and float(-1) == -1.0 and float(0) == 0.0: pass
else: self.fail('float() does not work properly')
- @unittest.skipIf(is_jython, "FIXME: not working")
def test_float_to_string(self):
def test(f, result):
self.assertEqual(f.__format__('e'), result)
@@ -407,8 +408,7 @@
test(-123456, "#012X", '-0X00001E240')
# issue 5782, commas with no specifier type
- #FIXME: not working.
- #test(1234, '010,', '00,001,234')
+ test(1234, '010,', '00,001,234')
# make sure these are errors
@@ -424,21 +424,19 @@
self.assertRaises(ValueError, 3 .__format__, ",c")
# ensure that only int and float type specifiers work
- #FIXME: not working.
- #for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
- # [chr(x) for x in range(ord('A'), ord('Z')+1)]):
- # if not format_spec in 'bcdoxXeEfFgGn%':
- # self.assertRaises(ValueError, 0 .__format__, format_spec)
- # self.assertRaises(ValueError, 1 .__format__, format_spec)
- # self.assertRaises(ValueError, (-1) .__format__, format_spec)
+ for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
+ [chr(x) for x in range(ord('A'), ord('Z')+1)]):
+ if not format_spec in 'bcdoxXeEfFgGn%':
+ self.assertRaises(ValueError, 0 .__format__, format_spec)
+ self.assertRaises(ValueError, 1 .__format__, format_spec)
+ self.assertRaises(ValueError, (-1) .__format__, format_spec)
# ensure that float type specifiers work; format converts
# the int to a float
- #FIXME: not working.
- #for format_spec in 'eEfFgG%':
- # for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
- # self.assertEqual(value.__format__(format_spec),
- # float(value).__format__(format_spec))
+ for format_spec in 'eEfFgG%':
+ for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
+ self.assertEqual(value.__format__(format_spec),
+ float(value).__format__(format_spec))
# Issue 6902
test(123456, "0<20", '12345600000000000000')
@@ -534,23 +532,20 @@
self.assertRaises(ValueError, 1L .__format__, "#+5x")
self.assertRaises(ValueError, 1L .__format__, "+5#x")
- #FIXME: this section broken in Jython.
# ensure that only int and float type specifiers work
- #for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
- # [chr(x) for x in range(ord('A'), ord('Z')+1)]):
- # if not format_spec in 'bcdoxXeEfFgGn%':
- # self.assertRaises(ValueError, 0L .__format__, format_spec)
- # self.assertRaises(ValueError, 1L .__format__, format_spec)
- # self.assertRaises(ValueError, (-1L) .__format__, format_spec)
+ for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
+ [chr(x) for x in range(ord('A'), ord('Z')+1)]):
+ if not format_spec in 'bcdoxXeEfFgGn%':
+ self.assertRaises(ValueError, 0L .__format__, format_spec)
+ self.assertRaises(ValueError, 1L .__format__, format_spec)
+ self.assertRaises(ValueError, (-1L) .__format__, format_spec)
# ensure that float type specifiers work; format converts
# the long to a float
-
- #FIXME: this section broken in Jython.
- #for format_spec in 'eEfFgG%':
- # for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]:
- # self.assertEqual(value.__format__(format_spec),
- # float(value).__format__(format_spec))
+ for format_spec in 'eEfFgG%':
+ for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]:
+ self.assertEqual(value.__format__(format_spec),
+ float(value).__format__(format_spec))
# Issue 6902
test(123456L, "0<20", '12345600000000000000')
test(123456L, "1<20", '12345611111111111111')
@@ -562,7 +557,6 @@
test(123456L, "1=20", '11111111111111123456')
test(123456L, "*=20", '**************123456')
- @unittest.skipIf(is_jython, "FIXME: not working")
@run_with_locale('LC_NUMERIC', 'en_US.UTF8')
def test_float__format__locale(self):
# test locale support for __format__ code 'n'
@@ -576,13 +570,12 @@
def test_int__format__locale(self):
# test locale support for __format__ code 'n' for integers
- #FIXME: not working in Jython.
- #x = 123456789012345678901234567890
- #for i in range(0, 30):
- # self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n'))
+ x = 123456789012345678901234567890
+ for i in range(0, 30):
+ self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n'))
- # # move to the next integer to test
- # x = x // 10
+ # move to the next integer to test
+ x = x // 10
rfmt = ">20n"
lfmt = "<20n"
@@ -661,30 +654,25 @@
# a totaly empty format specifier means something else.
# So, just use a sign flag
test(1e200, '+g', '+1e+200')
- #FIXME: not working.
- #test(1e200, '+', '+1e+200')
+ test(1e200, '+', '+1e+200')
test(1.1e200, '+g', '+1.1e+200')
- #FIXME: not working.
- ##test(1.1e200, '+', '+1.1e+200')
+ test(1.1e200, '+', '+1.1e+200')
test(1.1e200, '+g', '+1.1e+200')
- #FIXME: not working.
- #test(1.1e200, '+', '+1.1e+200')
+ test(1.1e200, '+', '+1.1e+200')
# 0 padding
test(1234., '010f', '1234.000000')
test(1234., '011f', '1234.000000')
test(1234., '012f', '01234.000000')
test(-1234., '011f', '-1234.000000')
- #FIXME: not working.
- #test(-1234., '012f', '-1234.000000')
- #test(-1234., '013f', '-01234.000000')
- #test(-1234.12341234, '013f', '-01234.123412')
- #test(-123456.12341234, '011.2f', '-0123456.12')
+ test(-1234., '012f', '-1234.000000')
+ test(-1234., '013f', '-01234.000000')
+ test(-1234.12341234, '013f', '-01234.123412')
+ test(-123456.12341234, '011.2f', '-0123456.12')
# issue 5782, commas with no specifier type
- #FIXME: not working.
- #test(1.2, '010,.2', '0,000,001.2')
+ test(1.2, '010,.2', '0,000,001.2')
# 0 padding with commas
test(1234., '011,f', '1,234.000000')
@@ -692,13 +680,12 @@
test(1234., '013,f', '01,234.000000')
test(-1234., '012,f', '-1,234.000000')
test(-1234., '013,f', '-1,234.000000')
- #FIXME: not working.
- #test(-1234., '014,f', '-01,234.000000')
- #test(-12345., '015,f', '-012,345.000000')
- #test(-123456., '016,f', '-0,123,456.000000')
- #test(-123456., '017,f', '-0,123,456.000000')
- #test(-123456.12341234, '017,f', '-0,123,456.123412')
- #test(-123456.12341234, '013,.2f', '-0,123,456.12')
+ test(-1234., '014,f', '-01,234.000000')
+ test(-12345., '015,f', '-012,345.000000')
+ test(-123456., '016,f', '-0,123,456.000000')
+ test(-123456., '017,f', '-0,123,456.000000')
+ test(-123456.12341234, '017,f', '-0,123,456.123412')
+ test(-123456.12341234, '013,.2f', '-0,123,456.12')
# % formatting
test(-1.0, '%', '-100.000000%')
@@ -721,23 +708,20 @@
self.assertRaises(ValueError, format, -1e-100, format_spec)
# Alternate formatting is not supported
- #FIXME: not working.
- ##self.assertRaises(ValueError, format, 0.0, '#')
+ self.assertRaises(ValueError, format, 0.0, '#')
self.assertRaises(ValueError, format, 0.0, '#20f')
# Issue 6902
- #FIXME: not working.
- #test(12345.6, "0<20", '12345.60000000000000')
- #test(12345.6, "1<20", '12345.61111111111111')
- #test(12345.6, "*<20", '12345.6*************')
- #test(12345.6, "0>20", '000000000000012345.6')
- #test(12345.6, "1>20", '111111111111112345.6')
- #test(12345.6, "*>20", '*************12345.6')
- #test(12345.6, "0=20", '000000000000012345.6')
- #test(12345.6, "1=20", '111111111111112345.6')
- #test(12345.6, "*=20", '*************12345.6')
+ test(12345.6, "0<20", '12345.60000000000000')
+ test(12345.6, "1<20", '12345.61111111111111')
+ test(12345.6, "*<20", '12345.6*************')
+ test(12345.6, "0>20", '000000000000012345.6')
+ test(12345.6, "1>20", '111111111111112345.6')
+ test(12345.6, "*>20", '*************12345.6')
+ test(12345.6, "0=20", '000000000000012345.6')
+ test(12345.6, "1=20", '111111111111112345.6')
+ test(12345.6, "*=20", '*************12345.6')
- @unittest.skipIf(is_jython, "FIXME: not working")
def test_format_spec_errors(self):
# int, float, and string all share the same format spec
# mini-language parser.
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -357,13 +357,12 @@
self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000, 3.50')
self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000, 3.57')
self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
- if not sys.platform.startswith('java'):
- self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
+ self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
- # self.assertEqual(u'%c' % 0x1234, u'\u1234')
- # self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
+ self.assertEqual(u'%c' % 0x1234, u'\u1234')
+ self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
# formatting jobs delegated from the string implementation:
self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
diff --git a/src/org/python/core/PyComplex.java b/src/org/python/core/PyComplex.java
--- a/src/org/python/core/PyComplex.java
+++ b/src/org/python/core/PyComplex.java
@@ -4,6 +4,7 @@
import org.python.core.stringlib.FloatFormatter;
import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
import org.python.core.stringlib.InternalFormat.Spec;
import org.python.expose.ExposedGet;
import org.python.expose.ExposedMethod;
@@ -174,7 +175,9 @@
* @return formatted value
*/
private String formatComplex(Spec spec) {
- FloatFormatter f = new FloatFormatter(spec, 2, 3); // Two elements + "(j)".length
+ int size = 2 * FloatFormatter.size(spec) + 3; // 2 floats + "(j)"
+ FloatFormatter f = new FloatFormatter(new StringBuilder(size), spec);
+ f.setBytes(true);
// Even in r-format, complex strips *all* the trailing zeros.
f.setMinFracDigits(0);
if (Double.doubleToLongBits(real) == 0L) {
@@ -816,42 +819,87 @@
@ExposedMethod(doc = BuiltinDocs.complex___format___doc)
final PyObject complex___format__(PyObject formatSpec) {
- if (!(formatSpec instanceof PyString)) {
- throw Py.TypeError("__format__ requires str or unicode");
+
+ // Parse the specification
+ Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+
+ // fromText will have thrown if formatSpecStr is not a PyString (including PyUnicode)
+ PyString formatSpecStr = (PyString)formatSpec;
+ String result;
+
+ // Validate the specification and detect the special case for none-format
+ switch (checkSpecification(spec)) {
+
+ case 0: // None-format
+ // In none-format, we take the default type and precision from __str__.
+ spec = spec.withDefaults(SPEC_STR);
+ // And then we use the __str__ mechanism to get parentheses or real 0 elision.
+ result = formatComplex(spec);
+ break;
+
+ case 1: // Floating-point formats
+ // In any other format, defaults are those commonly used for numeric formats.
+ spec = spec.withDefaults(Spec.NUMERIC);
+ int size = 2 * FloatFormatter.size(spec) + 1; // 2 floats + "j"
+ FloatFormatter f = new FloatFormatter(new StringBuilder(size), spec);
+ f.setBytes(!(formatSpecStr instanceof PyUnicode));
+ // Convert both parts as per specification
+ f.format(real).format(imag, "+").append('j');
+ result = f.pad().getResult();
+ break;
+
+ default: // The type code was not recognised
+ throw Formatter.unknownFormat(spec.type, "complex");
}
- PyString formatSpecStr = (PyString)formatSpec;
- String result;
- try {
- String specString = formatSpecStr.getString();
- Spec spec = InternalFormat.fromText(specString);
- if (spec.type != Spec.NONE && "efgEFGn%".indexOf(spec.type) < 0) {
- throw FloatFormatter.unknownFormat(spec.type, "complex");
- } else if (spec.alternate) {
- throw FloatFormatter.alternateFormNotAllowed("complex");
- } else if (spec.fill == '0') {
- throw FloatFormatter.zeroPaddingNotAllowed("complex");
- } else if (spec.align == '=') {
- throw FloatFormatter.alignmentNotAllowed('=', "complex");
- } else {
- if (spec.type == Spec.NONE) {
- // In none-format, we take the default type and precision from __str__.
- spec = spec.withDefaults(SPEC_STR);
- // And then we use the __str__ mechanism to get parentheses or real 0 elision.
- result = formatComplex(spec);
+ // Wrap the result in the same type as the format string
+ return formatSpecStr.createInstance(result);
+ }
+
+ /**
+ * Validate a parsed specification, for <code>PyComplex</code>, returning 0 if it is a valid
+ * none-format specification, 1 if it is a valid float specification, and some other value if it
+ * not a valid type. If it has any other faults (e.g. alternate form was specified) the method
+ * raises a descriptive exception.
+ *
+ * @param spec a parsed PEP-3101 format specification.
+ * @return 0, 1, or other value for none-format, a float format, or incorrect type.
+ * @throws PyException(ValueError) if the specification is faulty.
+ */
+ @SuppressWarnings("fallthrough")
+ private static int checkSpecification(Spec spec) {
+
+ // Slight differences between format types
+ switch (spec.type) {
+
+ case 'n':
+ if (spec.grouping) {
+ throw Formatter.notAllowed("Grouping", "complex", spec.type);
+ }
+ // Fall through
+
+ case Spec.NONE:
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'E':
+ case 'F':
+ case 'G':
+ // Check for disallowed parts of the specification
+ if (spec.alternate) {
+ throw FloatFormatter.alternateFormNotAllowed("complex");
+ } else if (spec.fill == '0') {
+ throw FloatFormatter.zeroPaddingNotAllowed("complex");
+ } else if (spec.align == '=') {
+ throw FloatFormatter.alignmentNotAllowed('=', "complex");
} else {
- // In any other format, defaults are those commonly used for numeric formats.
- spec = spec.withDefaults(Spec.NUMERIC);
- FloatFormatter f = new FloatFormatter(spec, 2, 1);// 2 floats + "j"
- // Convert both parts as per specification
- f.format(real).format(imag, "+").append('j');
- result = f.pad().getResult();
+ return (spec.type == Spec.NONE) ? 0 : 1;
}
- }
- } catch (IllegalArgumentException e) {
- throw Py.ValueError(e.getMessage()); // XXX Can this be reached?
+
+ default:
+ // spec.type is invalid for complex
+ return 2;
}
- return formatSpecStr.createInstance(result);
}
@Override
diff --git a/src/org/python/core/PyFloat.java b/src/org/python/core/PyFloat.java
--- a/src/org/python/core/PyFloat.java
+++ b/src/org/python/core/PyFloat.java
@@ -7,6 +7,7 @@
import org.python.core.stringlib.FloatFormatter;
import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
import org.python.core.stringlib.InternalFormat.Spec;
import org.python.expose.ExposedClassMethod;
import org.python.expose.ExposedGet;
@@ -911,32 +912,66 @@
@ExposedMethod(doc = BuiltinDocs.float___format___doc)
final PyObject float___format__(PyObject formatSpec) {
- if (!(formatSpec instanceof PyString)) {
- throw Py.TypeError("__format__ requires str or unicode");
+
+ // Parse the specification
+ Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+
+ // Get a formatter for the specification
+ FloatFormatter f = prepareFormatter(spec);
+
+ if (f != null) {
+ // Bytes mode if formatSpec argument is not unicode.
+ f.setBytes(!(formatSpec instanceof PyUnicode));
+ // Convert as per specification.
+ f.format(value);
+ // Return a result that has the same type (str or unicode) as the formatSpec argument.
+ return f.pad().getPyResult();
+
+ } else {
+ // The type code was not recognised in prepareFormatter
+ throw Formatter.unknownFormat(spec.type, "float");
}
+ }
- PyString formatSpecStr = (PyString)formatSpec;
- String result;
- try {
- String specString = formatSpecStr.getString();
- Spec spec = InternalFormat.fromText(specString);
- if (spec.type!=Spec.NONE && "efgEFGn%".indexOf(spec.type) < 0) {
- throw FloatFormatter.unknownFormat(spec.type, "float");
- } else if (spec.alternate) {
- throw FloatFormatter.alternateFormNotAllowed("float");
- } else {
+ /**
+ * Common code for PyFloat, {@link PyInteger} and {@link PyLong} to prepare a {@link FloatFormatter} from a parsed specification.
+ * The object returned has format method {@link FloatFormatter#format(double)}.
+ *
+ * @param spec a parsed PEP-3101 format specification.
+ * @return a formatter ready to use, or null if the type is not a floating point format type.
+ * @throws PyException(ValueError) if the specification is faulty.
+ */
+ @SuppressWarnings("fallthrough")
+ static FloatFormatter prepareFormatter(Spec spec) {
+
+ // Slight differences between format types
+ switch (spec.type) {
+
+ case 'n':
+ if (spec.grouping) {
+ throw Formatter.notAllowed("Grouping", "float", spec.type);
+ }
+ // Fall through
+
+ case Spec.NONE:
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'E':
+ case 'F':
+ case 'G':
+ case '%':
+ // Check for disallowed parts of the specification
+ if (spec.alternate) {
+ throw FloatFormatter.alternateFormNotAllowed("float");
+ }
// spec may be incomplete. The defaults are those commonly used for numeric formats.
spec = spec.withDefaults(Spec.NUMERIC);
- // Get a formatter for the spec.
- FloatFormatter f = new FloatFormatter(spec);
- // Convert as per specification.
- f.format(value).pad();
- result = f.getResult();
- }
- } catch (IllegalArgumentException e) {
- throw Py.ValueError(e.getMessage()); // XXX Can this be reached?
+ return new FloatFormatter(spec);
+
+ default:
+ return null;
}
- return formatSpecStr.createInstance(result);
}
@ExposedMethod(doc = BuiltinDocs.float_as_integer_ratio_doc)
diff --git a/src/org/python/core/PyInteger.java b/src/org/python/core/PyInteger.java
--- a/src/org/python/core/PyInteger.java
+++ b/src/org/python/core/PyInteger.java
@@ -4,11 +4,12 @@
import java.io.Serializable;
import java.math.BigInteger;
-import java.text.NumberFormat;
-import java.util.Locale;
-import org.python.core.stringlib.InternalFormatSpec;
-import org.python.core.stringlib.InternalFormatSpecParser;
+import org.python.core.stringlib.FloatFormatter;
+import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
+import org.python.core.stringlib.InternalFormat.Spec;
import org.python.expose.ExposedGet;
import org.python.expose.ExposedMethod;
import org.python.expose.ExposedNew;
@@ -953,11 +954,8 @@
@ExposedMethod(doc = BuiltinDocs.int___oct___doc)
final PyString int___oct__() {
- if (getValue() < 0) {
- return new PyString("-0" + Integer.toString(getValue() * -1, 8));
- } else {
- return new PyString("0" + Integer.toString(getValue(), 8));
- }
+ // Use the prepared format specifier for octal.
+ return formatImpl(IntegerFormatter.OCT);
}
@Override
@@ -967,11 +965,21 @@
@ExposedMethod(doc = BuiltinDocs.int___hex___doc)
final PyString int___hex__() {
- if (getValue() < 0) {
- return new PyString("-0x" + Integer.toString(getValue() * -1, 16));
- } else {
- return new PyString("0x" + Integer.toString(getValue(), 16));
- }
+ // Use the prepared format specifier for hexadecimal.
+ return formatImpl(IntegerFormatter.HEX);
+ }
+
+ /**
+ * Common code used by the number-base conversion method __oct__ and __hex__.
+ *
+ * @param spec prepared format-specifier.
+ * @return converted value of this object
+ */
+ private PyString formatImpl(Spec spec) {
+ // Traditional formatter (%-format) because #o means "-0123" not "-0o123".
+ IntegerFormatter f = new IntegerFormatter.Traditional(spec);
+ f.format(value);
+ return new PyString(f.getResult());
}
@ExposedMethod(doc = BuiltinDocs.int___getnewargs___doc)
@@ -1015,256 +1023,87 @@
@ExposedMethod(doc = BuiltinDocs.int___format___doc)
final PyObject int___format__(PyObject formatSpec) {
- return formatImpl(getValue(), formatSpec);
- }
- static PyObject formatImpl(Object value, PyObject formatSpec) {
- if (!(formatSpec instanceof PyString)) {
- throw Py.TypeError("__format__ requires str or unicode");
+ // Parse the specification
+ Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+ InternalFormat.Formatter f;
+
+ // Try to make an integer formatter from the specification
+ IntegerFormatter fi = PyInteger.prepareFormatter(spec);
+ if (fi != null) {
+ // Bytes mode if formatSpec argument is not unicode.
+ fi.setBytes(!(formatSpec instanceof PyUnicode));
+ // Convert as per specification.
+ fi.format(value);
+ f = fi;
+
+ } else {
+ // Try to make a float formatter from the specification
+ FloatFormatter ff = PyFloat.prepareFormatter(spec);
+ if (ff != null) {
+ // Bytes mode if formatSpec argument is not unicode.
+ ff.setBytes(!(formatSpec instanceof PyUnicode));
+ // Convert as per specification.
+ ff.format(value);
+ f = ff;
+
+ } else {
+ // The type code was not recognised in either prepareFormatter
+ throw Formatter.unknownFormat(spec.type, "integer");
+ }
}
- PyString formatSpecStr = (PyString)formatSpec;
- String result;
- try {
- String specString = formatSpecStr.getString();
- InternalFormatSpec spec = new InternalFormatSpecParser(specString).parse();
- result = formatIntOrLong(value, spec);
- } catch (IllegalArgumentException e) {
- throw Py.ValueError(e.getMessage());
- }
- return formatSpecStr.createInstance(result);
+ // Return a result that has the same type (str or unicode) as the formatSpec argument.
+ return f.pad().getPyResult();
}
/**
- * Formats an integer or long number according to a PEP-3101 format specification.
+ * Common code for PyInteger and PyLong to prepare an IntegerFormatter. This object has an
+ * overloaded format method {@link IntegerFormatter#format(int)} and
+ * {@link IntegerFormatter#format(BigInteger)} to support the two types.
*
- * @param value Integer or BigInteger object specifying the value to format.
- * @param spec parsed PEP-3101 format specification.
- * @return result of the formatting.
+ * @param spec a parsed PEP-3101 format specification.
+ * @return a formatter ready to use, or null if the type is not an integer format type.
+ * @throws PyException(ValueError) if the specification is faulty.
*/
- public static String formatIntOrLong(Object value, InternalFormatSpec spec) {
- if (spec.precision != -1) {
- throw new IllegalArgumentException("Precision not allowed in integer format specifier");
+ @SuppressWarnings("fallthrough")
+ static IntegerFormatter prepareFormatter(Spec spec) throws PyException {
+
+ // Slight differences between format types
+ switch (spec.type) {
+ case 'c':
+ // Character data: specific prohibitions.
+ if (Spec.specified(spec.sign)) {
+ throw IntegerFormatter.signNotAllowed("integer", spec.type);
+ } else if (spec.alternate) {
+ throw IntegerFormatter.alternateFormNotAllowed("integer", spec.type);
+ }
+ // Fall through
+
+ case 'x':
+ case 'X':
+ case 'o':
+ case 'b':
+ case 'n':
+ if (spec.grouping) {
+ throw IntegerFormatter.notAllowed("Grouping", "integer", spec.type);
+ }
+ // Fall through
+
+ case Spec.NONE:
+ case 'd':
+ // Check for disallowed parts of the specification
+ if (Spec.specified(spec.precision)) {
+ throw IntegerFormatter.precisionNotAllowed("integer");
+ }
+ // spec may be incomplete. The defaults are those commonly used for numeric formats.
+ spec = spec.withDefaults(Spec.NUMERIC);
+ // Get a formatter for the spec.
+ return new IntegerFormatter(spec);
+
+ default:
+ return null;
}
-
- int sign;
- if (value instanceof Integer) {
- int intValue = (Integer)value;
- sign = intValue < 0 ? -1 : intValue == 0 ? 0 : 1;
- } else {
- sign = ((BigInteger)value).signum();
- }
-
- String strValue;
- String strPrefix = "";
- String strSign = "";
-
- if (spec.type == 'c') {
- if (spec.sign != '\0') {
- throw new IllegalArgumentException("Sign not allowed with integer format "
- + "specifier 'c'");
- }
- if (value instanceof Integer) {
- int intValue = (Integer)value;
- if (intValue > 0xffff) {
- throw new IllegalArgumentException("%c arg not in range(0x10000)");
- }
- strValue = Character.toString((char)intValue);
- } else {
- BigInteger bigInt = (BigInteger)value;
- if (bigInt.intValue() > 0xffff || bigInt.bitCount() > 16) {
- throw new IllegalArgumentException("%c arg not in range(0x10000)");
- }
- strValue = Character.toString((char)bigInt.intValue());
- }
- } else {
- int radix = 10;
- if (spec.type == 'o') {
- radix = 8;
- } else if (spec.type == 'x' || spec.type == 'X') {
- radix = 16;
- } else if (spec.type == 'b') {
- radix = 2;
- }
-
- if (spec.type == 'n') {
- strValue = NumberFormat.getNumberInstance().format(value);
- } else if (spec.thousands_separators) {
- NumberFormat format = NumberFormat.getNumberInstance(Locale.US);
- format.setGroupingUsed(true);
- strValue = format.format(value);
- } else if (value instanceof BigInteger) {
- switch (radix) {
- case 2:
- strValue = toBinString((BigInteger)value);
- break;
- case 8:
- strValue = toOctString((BigInteger)value);
- break;
- case 16:
- strValue = toHexString((BigInteger)value);
- break;
- default:
- // General case (v.slow in known implementations up to Java 7).
- strValue = ((BigInteger)value).toString(radix);
- break;
- }
- } else {
- strValue = Integer.toString((Integer)value, radix);
- }
-
- if (spec.alternate) {
- switch (radix) {
- case 2:
- strPrefix = "0b";
- break;
- case 8:
- strPrefix = "0o";
- break;
- case 16:
- strPrefix = "0x";
- break;
- }
-
- if (sign < 0) {
- assert (strValue.startsWith("-"));
- strSign = "-";
- strValue = strValue.substring(1);
- }
- }
-
- if (spec.type == 'X') {
- strPrefix = strPrefix.toUpperCase();
- strValue = strValue.toUpperCase();
- }
-
- if (sign >= 0) {
- switch (spec.sign) {
- case '+':
- case ' ':
- strSign = Character.toString(spec.sign);
- break;
- }
- }
- }
-
- if (spec.align == '=' && (spec.sign == '-' || spec.sign == '+' || spec.sign == ' ')) {
- assert (strSign.length() == 1);
- return strSign + strPrefix + spec.pad(strValue, '>', 1 + strPrefix.length());
- }
-
- if (spec.fill_char == 0) {
- return spec.pad(strSign + strPrefix + strValue, '>', 0);
- }
-
- return strSign + strPrefix + spec.pad(strValue, '>', strSign.length() + strPrefix.length());
- }
-
- /**
- * A more efficient algorithm for generating a hexadecimal representation of a byte array.
- * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
- * consequently, is implemented using expensive mathematical operations.
- *
- * @param value the value to generate a hexadecimal string from
- * @return the hexadecimal representation of value, with "-" sign prepended if necessary
- */
- static final String toHexString(BigInteger value) {
- int signum = value.signum();
-
- // obvious shortcut
- if (signum == 0) {
- return "0";
- }
-
- // we want to work in absolute numeric value (negative sign is added afterward)
- byte[] input = value.abs().toByteArray();
- StringBuilder sb = new StringBuilder(input.length * 2);
-
- int b;
- for (int i = 0; i < input.length; i++) {
- b = input[i] & 0xFF;
- sb.append(LOOKUP.charAt(b >> 4));
- sb.append(LOOKUP.charAt(b & 0x0F));
- }
-
- // before returning the char array as string, remove leading zeroes, but not the last one
- String result = sb.toString().replaceFirst("^0+(?!$)", "");
- return signum < 0 ? "-" + result : result;
- }
-
- /**
- * A more efficient algorithm for generating an octal representation of a byte array.
- * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
- * consequently, is implemented using expensive mathematical operations.
- *
- * @param value the value to generate an octal string from
- * @return the octal representation of value, with "-" sign prepended if necessary
- */
- static final String toOctString(BigInteger value) {
- int signum = value.signum();
-
- // obvious shortcut
- if (signum == 0) {
- return "0";
- }
-
- byte[] input = value.abs().toByteArray();
- if (input.length < 3) {
- return value.toString(8);
- }
-
- StringBuilder sb = new StringBuilder(input.length * 3);
-
- // working backwards, three bytes at a time
- int threebytes;
- int trip1, trip2, trip3; // most, middle, and least significant bytes in the triplet
- for (int i = input.length - 1; i >= 0; i -= 3) {
- trip3 = input[i] & 0xFF;
- trip2 = ((i - 1) >= 0) ? (input[i - 1] & 0xFF) : 0x00;
- trip1 = ((i - 2) >= 0) ? (input[i - 2] & 0xFF) : 0x00;
- threebytes = trip3 | (trip2 << 8) | (trip1 << 16);
-
- // convert the three-byte value into an eight-character octal string
- for (int j = 0; j < 8; j++) {
- sb.append(LOOKUP.charAt((threebytes >> (j * 3)) & 0x000007));
- }
- }
-
- String result = sb.reverse().toString().replaceFirst("^0+(?!%)", "");
- return signum < 0 ? "-" + result : result;
- }
-
- /**
- * A more efficient algorithm for generating a binary representation of a byte array.
- * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
- * consequently, is implemented using expensive mathematical operations.
- *
- * @param value the value to generate a binary string from
- * @return the binary representation of value, with "-" sign prepended if necessary
- */
- static final String toBinString(BigInteger value) {
- int signum = value.signum();
-
- // obvious shortcut
- if (signum == 0) {
- return "0";
- }
-
- // we want to work in absolute numeric value (negative sign is added afterward)
- byte[] input = value.abs().toByteArray();
- StringBuilder sb = new StringBuilder(value.bitCount());
-
- int b;
- for (int i = 0; i < input.length; i++) {
- b = input[i] & 0xFF;
- for (int bit = 7; bit >= 0; bit--) {
- sb.append(((b >> bit) & 0x1) > 0 ? "1" : "0");
- }
- }
-
- // before returning the char array as string, remove leading zeroes, but not the last one
- String result = sb.toString().replaceFirst("^0+(?!$)", "");
- return signum < 0 ? "-" + result : result;
}
@Override
diff --git a/src/org/python/core/PyLong.java b/src/org/python/core/PyLong.java
--- a/src/org/python/core/PyLong.java
+++ b/src/org/python/core/PyLong.java
@@ -1,13 +1,17 @@
-/*
- * Copyright (c) Corporation for National Research Initiatives
- * Copyright (c) Jython Developers
- */
+// Copyright (c) Corporation for National Research Initiatives
+// Copyright (c) Jython Developers
+
package org.python.core;
import java.io.Serializable;
import java.math.BigDecimal;
import java.math.BigInteger;
+import org.python.core.stringlib.FloatFormatter;
+import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
+import org.python.core.stringlib.InternalFormat.Spec;
import org.python.expose.ExposedGet;
import org.python.expose.ExposedMethod;
import org.python.expose.ExposedNew;
@@ -24,8 +28,8 @@
public static final BigInteger MIN_LONG = BigInteger.valueOf(Long.MIN_VALUE);
public static final BigInteger MAX_LONG = BigInteger.valueOf(Long.MAX_VALUE);
- public static final BigInteger MAX_ULONG =
- BigInteger.valueOf(1).shiftLeft(64).subtract(BigInteger.valueOf(1));
+ public static final BigInteger MAX_ULONG = BigInteger.valueOf(1).shiftLeft(64)
+ .subtract(BigInteger.valueOf(1));
/** @deprecated Use MIN_INT instead. */
@Deprecated
@@ -66,7 +70,7 @@
@ExposedNew
public static PyObject long___new__(PyNewWrapper new_, boolean init, PyType subtype,
- PyObject[] args, String[] keywords) {
+ PyObject[] args, String[] keywords) {
if (new_.for_type != subtype) {
return longSubtypeNew(new_, init, subtype, args, keywords);
}
@@ -74,7 +78,7 @@
ArgParser ap = new ArgParser("long", args, keywords, new String[] {"x", "base"}, 0);
PyObject x = ap.getPyObject(0, null);
if (x != null && x.getJavaProxy() instanceof BigInteger) {
- return new PyLong((BigInteger) x.getJavaProxy());
+ return new PyLong((BigInteger)x.getJavaProxy());
}
int base = ap.getInt(1, -909);
@@ -87,7 +91,7 @@
if (!(x instanceof PyString)) {
throw Py.TypeError("long: can't convert non-string with explicit base");
}
- return ((PyString) x).atol(base);
+ return ((PyString)x).atol(base);
}
/**
@@ -108,8 +112,9 @@
if (!pye2.match(Py.AttributeError)) {
throw pye2;
}
- throw Py.TypeError(
- String.format("long() argument must be a string or a number, not '%.200s'", x.getType().fastGetName()));
+ throw Py.TypeError(String.format(
+ "long() argument must be a string or a number, not '%.200s'", x.getType()
+ .fastGetName()));
}
}
}
@@ -123,7 +128,7 @@
PyObject i = integral.invoke("__int__");
if (!(i instanceof PyInteger) && !(i instanceof PyLong)) {
throw Py.TypeError(String.format("__trunc__ returned non-Integral (type %.200s)",
- integral.getType().fastGetName()));
+ integral.getType().fastGetName()));
}
return i;
}
@@ -133,24 +138,22 @@
/**
* Wimpy, slow approach to new calls for subtypes of long.
*
- * First creates a regular long from whatever arguments we got, then allocates a
- * subtype instance and initializes it from the regular long. The regular long is then
- * thrown away.
+ * First creates a regular long from whatever arguments we got, then allocates a subtype
+ * instance and initializes it from the regular long. The regular long is then thrown away.
*/
private static PyObject longSubtypeNew(PyNewWrapper new_, boolean init, PyType subtype,
- PyObject[] args, String[] keywords) {
+ PyObject[] args, String[] keywords) {
PyObject tmp = long___new__(new_, init, TYPE, args, keywords);
if (tmp instanceof PyInteger) {
- int intValue = ((PyInteger) tmp).getValue();
+ int intValue = ((PyInteger)tmp).getValue();
return new PyLongDerived(subtype, BigInteger.valueOf(intValue));
} else {
- return new PyLongDerived(subtype, ((PyLong) tmp).getValue());
+ return new PyLongDerived(subtype, ((PyLong)tmp).getValue());
}
}
/**
- * Convert a double to BigInteger, raising an OverflowError if
- * infinite.
+ * Convert a double to BigInteger, raising an OverflowError if infinite.
*/
private static BigInteger toBigInteger(double value) {
if (Double.isInfinite(value)) {
@@ -249,7 +252,7 @@
}
public double scaledDoubleValue(int[] exp) {
- return scaledDoubleValue(getValue(),exp);
+ return scaledDoubleValue(getValue(), exp);
}
public long getLong(long min, long max) {
@@ -273,14 +276,14 @@
@Override
public int asInt(int index) {
- return (int) getLong(Integer.MIN_VALUE, Integer.MAX_VALUE,
- "long int too large to convert to int");
+ return (int)getLong(Integer.MIN_VALUE, Integer.MAX_VALUE,
+ "long int too large to convert to int");
}
@Override
public int asInt() {
- return (int) getLong(Integer.MIN_VALUE, Integer.MAX_VALUE,
- "long int too large to convert to int");
+ return (int)getLong(Integer.MIN_VALUE, Integer.MAX_VALUE,
+ "long int too large to convert to int");
}
@Override
@@ -292,13 +295,13 @@
public Object __tojava__(Class<?> c) {
try {
if (c == Byte.TYPE || c == Byte.class) {
- return new Byte((byte) getLong(Byte.MIN_VALUE, Byte.MAX_VALUE));
+ return new Byte((byte)getLong(Byte.MIN_VALUE, Byte.MAX_VALUE));
}
if (c == Short.TYPE || c == Short.class) {
- return new Short((short) getLong(Short.MIN_VALUE, Short.MAX_VALUE));
+ return new Short((short)getLong(Short.MIN_VALUE, Short.MAX_VALUE));
}
if (c == Integer.TYPE || c == Integer.class) {
- return new Integer((int) getLong(Integer.MIN_VALUE, Integer.MAX_VALUE));
+ return new Integer((int)getLong(Integer.MIN_VALUE, Integer.MAX_VALUE));
}
if (c == Long.TYPE || c == Long.class) {
return new Long(getLong(Long.MIN_VALUE, Long.MAX_VALUE));
@@ -307,7 +310,7 @@
return __float__().__tojava__(c);
}
if (c == BigInteger.class || c == Number.class || c == Object.class
- || c == Serializable.class) {
+ || c == Serializable.class) {
return getValue();
}
} catch (PyException e) {
@@ -340,14 +343,14 @@
}
/**
- * Coercion logic for long. Implemented as a final method to avoid
- * invocation of virtual methods from the exposed coerce.
+ * Coercion logic for long. Implemented as a final method to avoid invocation of virtual methods
+ * from the exposed coerce.
*/
final Object long___coerce_ex__(PyObject other) {
if (other instanceof PyLong) {
return other;
} else if (other instanceof PyInteger) {
- return Py.newLong(((PyInteger) other).getValue());
+ return Py.newLong(((PyInteger)other).getValue());
} else {
return Py.None;
}
@@ -359,9 +362,9 @@
private static final BigInteger coerce(PyObject other) {
if (other instanceof PyLong) {
- return ((PyLong) other).getValue();
+ return ((PyLong)other).getValue();
} else if (other instanceof PyInteger) {
- return BigInteger.valueOf(((PyInteger) other).getValue());
+ return BigInteger.valueOf(((PyInteger)other).getValue());
} else {
throw Py.TypeError("xxx");
}
@@ -421,7 +424,7 @@
@ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___mul___doc)
final PyObject long___mul__(PyObject right) {
if (right instanceof PySequence) {
- return ((PySequence) right).repeat(coerceInt(this));
+ return ((PySequence)right).repeat(coerceInt(this));
}
if (!canCoerce(right)) {
@@ -438,7 +441,7 @@
@ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rmul___doc)
final PyObject long___rmul__(PyObject left) {
if (left instanceof PySequence) {
- return ((PySequence) left).repeat(coerceInt(this));
+ return ((PySequence)left).repeat(coerceInt(this));
}
if (!canCoerce(left)) {
return null;
@@ -479,7 +482,7 @@
if (Options.division_warning > 0) {
Py.warning(Py.DeprecationWarning, "classic long division");
}
- return Py.newLong(divide( getValue(), coerce(right)));
+ return Py.newLong(divide(getValue(), coerce(right)));
}
@Override
@@ -508,7 +511,7 @@
if (!canCoerce(right)) {
return null;
}
- return Py.newLong(divide( getValue(), coerce(right)));
+ return Py.newLong(divide(getValue(), coerce(right)));
}
@Override
@@ -564,7 +567,7 @@
if (!canCoerce(right)) {
return null;
}
- return true_divide( this.getValue(), coerce(right));
+ return true_divide(this.getValue(), coerce(right));
}
@Override
@@ -595,7 +598,7 @@
return null;
}
BigInteger rightv = coerce(right);
- return Py.newLong(modulo(getValue(),rightv, divide(getValue(),rightv)));
+ return Py.newLong(modulo(getValue(), rightv, divide(getValue(), rightv)));
}
@Override
@@ -624,8 +627,8 @@
}
BigInteger rightv = coerce(right);
- BigInteger xdivy = divide(getValue(),rightv);
- return new PyTuple(Py.newLong(xdivy), Py.newLong(modulo(getValue(),rightv, xdivy)));
+ BigInteger xdivy = divide(getValue(), rightv);
+ return new PyTuple(Py.newLong(xdivy), Py.newLong(modulo(getValue(), rightv, xdivy)));
}
@Override
@@ -650,7 +653,7 @@
}
@ExposedMethod(type = MethodType.BINARY, defaults = {"null"},
- doc = BuiltinDocs.long___pow___doc)
+ doc = BuiltinDocs.long___pow___doc)
final PyObject long___pow__(PyObject right, PyObject modulo) {
if (!canCoerce(right)) {
return null;
@@ -659,7 +662,7 @@
if (modulo != null && !canCoerce(right)) {
return null;
}
- return _pow( getValue(), coerce(right), modulo, this, right);
+ return _pow(getValue(), coerce(right), modulo, this, right);
}
@Override
@@ -676,7 +679,7 @@
}
public static PyObject _pow(BigInteger value, BigInteger y, PyObject modulo, PyObject left,
- PyObject right) {
+ PyObject right) {
if (y.compareTo(BigInteger.ZERO) < 0) {
if (value.compareTo(BigInteger.ZERO) != 0) {
return left.__float__().__pow__(right, modulo);
@@ -700,32 +703,32 @@
}
if (z.compareTo(BigInteger.valueOf(0)) <= 0) {
- // Handle negative modulo's specially
- /*if (z.compareTo(BigInteger.valueOf(0)) == 0) {
- throw Py.ValueError("pow(x, y, z) with z == 0");
- }*/
+ // Handle negative modulo specially
+ // if (z.compareTo(BigInteger.valueOf(0)) == 0) {
+ // throw Py.ValueError("pow(x, y, z) with z == 0");
+ // }
y = value.modPow(y, z.negate());
if (y.compareTo(BigInteger.valueOf(0)) > 0) {
return Py.newLong(z.add(y));
} else {
return Py.newLong(y);
}
- //return __pow__(right).__mod__(modulo);
+ // return __pow__(right).__mod__(modulo);
} else {
// XXX: 1.1 no longer supported so review this.
// This is buggy in SUN's jdk1.1.5
// Extra __mod__ improves things slightly
return Py.newLong(value.modPow(y, z));
- //return __pow__(right).__mod__(modulo);
+ // return __pow__(right).__mod__(modulo);
}
}
}
private static final int coerceInt(PyObject other) {
if (other instanceof PyLong) {
- return ((PyLong) other).asInt();
+ return ((PyLong)other).asInt();
} else if (other instanceof PyInteger) {
- return ((PyInteger) other).getValue();
+ return ((PyInteger)other).getValue();
} else {
throw Py.TypeError("xxx");
}
@@ -915,7 +918,8 @@
@ExposedMethod(doc = BuiltinDocs.long___int___doc)
final PyObject long___int__() {
- if (getValue().compareTo(PyInteger.MAX_INT) <= 0 && getValue().compareTo(PyInteger.MIN_INT) >= 0) {
+ if (getValue().compareTo(PyInteger.MAX_INT) <= 0
+ && getValue().compareTo(PyInteger.MIN_INT) >= 0) {
return Py.newInteger(getValue().intValue());
}
return long___long__();
@@ -977,14 +981,8 @@
@ExposedMethod(doc = BuiltinDocs.long___oct___doc)
final PyString long___oct__() {
- String s = PyInteger.toOctString(getValue());
- if (s.startsWith("-")) {
- return new PyString("-0" + s.substring(1, s.length()) + "L");
- } else if (s.startsWith("0")) {
- return new PyString(s + "L");
- } else {
- return new PyString("0" + s + "L");
- }
+ // Use the prepared format specifier for octal.
+ return formatImpl(IntegerFormatter.OCT);
}
@Override
@@ -994,12 +992,21 @@
@ExposedMethod(doc = BuiltinDocs.long___hex___doc)
final PyString long___hex__() {
- String s = PyInteger.toHexString(getValue());
- if (s.startsWith("-")) {
- return new PyString("-0x" + s.substring(1, s.length()) + "L");
- } else {
- return new PyString("0x" + s + "L");
- }
+ // Use the prepared format specifier for hexadecimal.
+ return formatImpl(IntegerFormatter.HEX);
+ }
+
+ /**
+ * Common code used by the number-base conversion method __oct__ and __hex__.
+ *
+ * @param spec prepared format-specifier.
+ * @return converted value of this object
+ */
+ private PyString formatImpl(Spec spec) {
+ // Traditional formatter (%-format) because #o means "-0123" not "-0o123".
+ IntegerFormatter f = new IntegerFormatter.Traditional(spec);
+ f.format(value).append('L');
+ return new PyString(f.getResult());
}
@ExposedMethod(doc = BuiltinDocs.long___str___doc)
@@ -1058,7 +1065,38 @@
@ExposedMethod(doc = BuiltinDocs.long___format___doc)
final PyObject long___format__(PyObject formatSpec) {
- return PyInteger.formatImpl(getValue(), formatSpec);
+
+ // Parse the specification
+ Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+ InternalFormat.Formatter f;
+
+ // Try to make an integer formatter from the specification
+ IntegerFormatter fi = PyInteger.prepareFormatter(spec);
+ if (fi != null) {
+ // Bytes mode if formatSpec argument is not unicode.
+ fi.setBytes(!(formatSpec instanceof PyUnicode));
+ // Convert as per specification.
+ fi.format(value);
+ f = fi;
+
+ } else {
+ // Try to make a float formatter from the specification
+ FloatFormatter ff = PyFloat.prepareFormatter(spec);
+ if (ff != null) {
+ // Bytes mode if formatSpec argument is not unicode.
+ ff.setBytes(!(formatSpec instanceof PyUnicode));
+ // Convert as per specification.
+ ff.format(value.doubleValue());
+ f = ff;
+
+ } else {
+ // The type code was not recognised in either prepareFormatter
+ throw Formatter.unknownFormat(spec.type, "integer");
+ }
+ }
+
+ // Return a result that has the same type (str or unicode) as the formatSpec argument.
+ return f.pad().getPyResult();
}
@Override
@@ -1076,7 +1114,7 @@
}
return tooLow ? Integer.MIN_VALUE : Integer.MAX_VALUE;
}
- return (int) getValue().longValue();
+ return (int)getValue().longValue();
}
@Override
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -9,10 +9,12 @@
import org.python.core.buffer.SimpleStringBuffer;
import org.python.core.stringlib.FieldNameIterator;
import org.python.core.stringlib.FloatFormatter;
+import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
import org.python.core.stringlib.InternalFormat.Spec;
-import org.python.core.stringlib.InternalFormatSpec;
-import org.python.core.stringlib.InternalFormatSpecParser;
import org.python.core.stringlib.MarkupIterator;
+import org.python.core.stringlib.TextFormatter;
import org.python.core.util.StringUtil;
import org.python.expose.ExposedMethod;
import org.python.expose.ExposedNew;
@@ -3897,50 +3899,68 @@
@ExposedMethod(doc = BuiltinDocs.str___format___doc)
final PyObject str___format__(PyObject formatSpec) {
- if (!(formatSpec instanceof PyString)) {
- throw Py.TypeError("__format__ requires str or unicode");
+
+ // Parse the specification
+ Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+
+ // Get a formatter for the specification
+ TextFormatter f = prepareFormatter(spec);
+ if (f == null) {
+ // The type code was not recognised
+ throw Formatter.unknownFormat(spec.type, "string");
}
- PyString formatSpecStr = (PyString)formatSpec;
- String result;
- try {
- String specString = formatSpecStr.getString();
- InternalFormatSpec spec = new InternalFormatSpecParser(specString).parse();
- result = formatString(getString(), spec);
- } catch (IllegalArgumentException e) {
- throw Py.ValueError(e.getMessage());
+ // Bytes mode if neither this nor formatSpec argument is Unicode.
+ boolean unicode = this instanceof PyUnicode || formatSpec instanceof PyUnicode;
+ f.setBytes(!unicode);
+
+ // Convert as per specification.
+ f.format(getString());
+
+ // Return a result that has the same type (str or unicode) as the formatSpec argument.
+ return f.pad().getPyResult();
+ }
+
+ /**
+ * Common code for {@link PyString} and {@link PyUnicode} to prepare a {@link TextFormatter}
+ * from a parsed specification. The object returned has format method
+ * {@link TextFormatter#format(String)} that treats its argument as UTF-16 encoded unicode (not
+ * just <code>char</code>s). That method will format its argument ( <code>str</code> or
+ * <code>unicode</code>) according to the PEP 3101 formatting specification supplied here. This
+ * would be used during <code>text.__format__(".5s")</code> or
+ * <code>"{:.5s}".format(text)</code> where <code>text</code> is this Python string.
+ *
+ * @param spec a parsed PEP-3101 format specification.
+ * @return a formatter ready to use, or null if the type is not a string format type.
+ * @throws PyException(ValueError) if the specification is faulty.
+ */
+ @SuppressWarnings("fallthrough")
+ static TextFormatter prepareFormatter(Spec spec) throws PyException {
+ // Slight differences between format types
+ switch (spec.type) {
+
+ case Spec.NONE:
+ case 's':
+ // Check for disallowed parts of the specification
+ if (spec.grouping) {
+ throw Formatter.notAllowed("Grouping", "string", spec.type);
+ } else if (Spec.specified(spec.sign)) {
+ throw Formatter.signNotAllowed("string", '\0');
+ } else if (spec.alternate) {
+ throw Formatter.alternateFormNotAllowed("string");
+ } else if (spec.align == '=') {
+ throw Formatter.alignmentNotAllowed('=', "string");
+ }
+ // spec may be incomplete. The defaults are those commonly used for string formats.
+ spec = spec.withDefaults(Spec.STRING);
+ // Get a formatter for the specification
+ return new TextFormatter(spec);
+
+ default:
+ // The type code was not recognised
+ return null;
}
- return formatSpecStr.createInstance(result);
- }
-
- /**
- * Format the given text according to a parsed PEP 3101 formatting specification, as during
- * <code>text.__format__(format_spec)</code> or <code>"{:s}".format(text)</code> where
- * <code>text</code> is a Python string.
- *
- * @param text to format
- * @param spec the parsed PEP 3101 formatting specification
- * @return the result of the formatting
- */
- public static String formatString(String text, InternalFormatSpec spec) {
- if (spec.sign != '\0') {
- throw new IllegalArgumentException("Sign not allowed in string format specifier");
- }
- if (spec.alternate) {
- throw new IllegalArgumentException(
- "Alternate form (#) not allowed in string format specifier");
- }
- if (spec.align == '=') {
- throw new IllegalArgumentException(
- "'=' alignment not allowed in string format specifier");
- }
- if (spec.precision >= 0 && text.length() > spec.precision) {
- text = text.substring(0, spec.precision);
- }
- return spec.pad(text, '<', 0);
- }
-
- /* arguments' conversion helper */
+ }
@Override
public String asString(int index) throws PyObject.ConversionException {
@@ -4005,10 +4025,6 @@
String format;
/** Where the output is built. */
StringBuilder buffer;
- /** Remembers that the value currently converted is negative */
- boolean negative;
- /** Precision from format specification. */
- int precision;
/**
* Index into args of argument currently being worked, or special values indicating -1: a single
* item that has not yet been used, -2: a single item that has already been used, -3: a mapping.
@@ -4017,7 +4033,7 @@
/** Arguments supplied to {@link #format(PyObject)} method. */
PyObject args;
/** Indicate a <code>PyUnicode</code> result is expected. */
- boolean unicodeCoercion;
+ boolean needUnicode;
final char pop() {
try {
@@ -4053,7 +4069,7 @@
public StringFormatter(String format, boolean unicodeCoercion) {
index = 0;
this.format = format;
- this.unicodeCoercion = unicodeCoercion;
+ this.needUnicode = unicodeCoercion;
buffer = new StringBuilder(format.length() + 100);
}
@@ -4106,211 +4122,129 @@
}
}
- private void checkPrecision(String type) {
- if (precision > 250) {
- // A magic number. Larger than in CPython.
- throw Py.OverflowError("formatted " + type + " is too long (precision too long?)");
- }
-
- }
-
/**
- * Format the argument interpreted as a long, using the argument's <code>__str__</code>,
- * <code>__oct__</code>, or <code>__hex__</code> method according to <code>type</code>. If v is
- * being treated as signed, the sign of v is transferred to {@link #negative} and the absolute
- * value is converted. The <code>altFlag</code> argument controls the appearance of a "0x" or
- * "0X" prefix in the hex case, or a "0" prefix in the octal case. The hexadecimal case, the
- * case of characters and digits will match the type ('x' meaning lowercase, 'X' meaning
- * uppercase).
+ * Return the argument as either a {@link PyInteger} or a {@link PyLong} according to its
+ * <code>__int__</code> method, or its <code>__long__</code> method. If the argument has neither
+ * method, or both raise an exception, we return the argument itself. The caller must check the
+ * return type.
*
* @param arg to convert
- * @param type one of 'o' for octal, 'x' or 'X' for hex, anything else calls
- * <code>arg.__str__</code>.
- * @param altFlag if true there will be a prefix
- * @return converted value as <code>String</code>
+ * @return PyInteger or PyLong if possible
*/
- private String formatLong(PyObject arg, char type, boolean altFlag) {
- // Convert using the appropriate type
- // XXX Results in behaviour divergent from CPython when any of the methods is overridden.
- PyString argAsString;
- switch (type) {
- case 'o':
- argAsString = arg.__oct__();
- break;
- case 'x':
- case 'X':
- argAsString = arg.__hex__();
- break;
- default:
- argAsString = arg.__str__();
- break;
- }
-
- checkPrecision("long");
- String s = argAsString.toString();
- int end = s.length();
- int ptr = 0;
-
- // In the hex case, the __hex__ return starts 0x
- // XXX (we assume, perhaps falsely)
- int numnondigits = 0;
- if (type == 'x' || type == 'X') {
- numnondigits = 2;
- }
-
- // Strip a "long" indicator
- if (s.endsWith("L")) {
- end--;
- }
-
- // Strip a possible sign to member negative
- negative = s.charAt(0) == '-';
- if (negative) {
- ptr++;
- }
-
- // The formatted number is s[ptr:end] and starts with numnondigits non-digits.
- int numdigits = end - numnondigits - ptr;
- if (!altFlag) {
- // We should have no "base tag" '0' or "0x" on the front.
- switch (type) {
- case 'o':
- // Strip the '0'
- if (numdigits > 1) {
- ++ptr;
- --numdigits;
- }
- break;
- case 'x':
- case 'X':
- // Strip the "0x"
- ptr += 2;
- numnondigits -= 2;
- break;
- }
- }
-
- // If necessary, add leading zeros to the numerical digits part.
- if (precision > numdigits) {
- // Recompose the formatted number in this buffer
- StringBuilder buf = new StringBuilder();
- // The base indicator prefix
- for (int i = 0; i < numnondigits; ++i) {
- buf.append(s.charAt(ptr++));
- }
- // The extra zeros
- for (int i = 0; i < precision - numdigits; i++) {
- buf.append('0');
- }
- // The previously known digits
- for (int i = 0; i < numdigits; i++) {
- buf.append(s.charAt(ptr++));
- }
- s = buf.toString();
- } else if (end < s.length() || ptr > 0) {
- // It's only necessary to extract the formatted number from s
- s = s.substring(ptr, end);
- }
-
- // And finally, deal with the case, so it matches x or X.
- switch (type) {
- case 'X':
- s = s.toUpperCase();
- break;
- }
- return s;
- }
-
- /**
- * Formats arg as an integer, with the specified radix. The integer value is obtained from the
- * result of <code>arg.__int__()</code>. <code>type</code> and <code>altFlag</code> are passed
- * to {@link #formatLong(PyObject, char, boolean)} in case the result is a PyLong.
- *
- * @param arg to convert
- * @param radix in which to express <code>arg</code>
- * @param unsigned true if required to interpret a 32-bit integer as unsigned ('u' legacy?).
- * @param type of conversion ('d', 'o', 'x', or 'X')
- * @param altFlag '#' present in format (causes "0x" prefix in hex, and '0' prefix in octal)
- * @return string form of the value
- */
- private String formatInteger(PyObject arg, int radix, boolean unsigned, char type,
- boolean altFlag) {
- PyObject argAsInt;
+ private PyObject asNumber(PyObject arg) {
if (arg instanceof PyInteger || arg instanceof PyLong) {
- argAsInt = arg;
+ // arg is already acceptable
+ return arg;
+
} else {
- // use __int__ to get an int (or long)
- if (arg instanceof PyFloat) {
- // safe to call __int__:
- argAsInt = arg.__int__();
+ // use __int__ or __long__to get an int (or long)
+ if (arg.getClass() == PyFloat.class) {
+ // A common case where it is safe to return arg.__int__()
+ return arg.__int__();
+
} else {
- // We can't simply call arg.__int__() because PyString implements
- // it without exposing it to python (i.e, str instances has no
- // __int__ attribute). So, we would support strings as arguments
- // for %d format, which is forbidden by CPython tests (on
- // test_format.py).
+ /*
+ * In general, we can't simply call arg.__int__() because PyString implements it
+ * without exposing it to python (str has no __int__). This would make str
+ * acceptacle to integer format specifiers, which is forbidden by CPython tests
+ * (test_format.py). PyString implements __int__ perhaps only to help the int
+ * constructor. Maybe that was a bad idea?
+ */
try {
- argAsInt = arg.__getattr__("__int__").__call__();
+ // Result is the result of arg.__int__() if that works
+ return arg.__getattr__("__int__").__call__();
} catch (PyException e) {
- // XXX: Swallow custom AttributeError throws from __int__ methods
- // No better alternative for the moment
- if (e.match(Py.AttributeError)) {
- throw Py.TypeError("int argument required");
- }
- throw e;
+ // Swallow the exception
+ }
+
+ // Try again with arg.__long__()
+ try {
+ // Result is the result of arg.__long__() if that works
+ return arg.__getattr__("__long__").__call__();
+ } catch (PyException e) {
+ // No __long__ defined (at Python level)
+ return arg;
}
}
}
- if (argAsInt instanceof PyInteger) {
- // This call does not provide the prefix and will be lowercase.
- return formatInteger(((PyInteger)argAsInt).getValue(), radix, unsigned);
- } else { // must be a PyLong (as per __int__ contract)
- // This call provides the base prefix and case-matches with 'x' or 'X'.
- return formatLong(argAsInt, type, altFlag);
- }
}
/**
- * Convert a 32-bit integer (as from a {@link PyInteger}) to characters, signed or unsigned. The
- * values is presented in a <code>long</code>. The string result is left-padded with zeros to
- * the stated {@link #precision}. If v is being treated as signed, the sign of v is transferred
- * to {@link #negative} and the absolute value is converted. Otherwise (unsigned case)
- * <code>0x100000000L + v</code> is converted. This method does not provide the '0' or "0x"
- * prefix, just the padded digit string.
+ * Return the argument as a {@link PyFloat} according to its <code>__float__</code> method. If
+ * the argument has no such method, or it raises an exception, we return the argument itself.
+ * The caller must check the return type.
*
- * @param v value to convert
- * @param radix of conversion
- * @param unsigned if should be treated as unsigned
- * @return string form
+ * @param arg to convert
+ * @return PyFloat if possible
*/
- private String formatInteger(long v, int radix, boolean unsigned) {
- checkPrecision("integer");
- if (unsigned) {
- // If the high bit was set, this will have been sign-extended: correct that.
- if (v < 0) {
- v = 0x100000000l + v;
- }
+ private PyObject asFloat(PyObject arg) {
+
+ if (arg instanceof PyFloat) {
+ // arg is already acceptable
+ return arg;
+
} else {
- // If the high bit was set, the sign extension was correct, but we need sign + abs(v).
- if (v < 0) {
- negative = true;
- v = -v;
+ // use __float__ to get a float.
+ if (arg.getClass() == PyFloat.class) {
+ // A common case where it is safe to return arg.__float__()
+ return arg.__float__();
+
+ } else {
+ /*
+ * In general, we can't simply call arg.__float__() because PyString implements it
+ * without exposing it to python (str has no __float__). This would make str
+ * acceptacle to float format specifiers, which is forbidden by CPython tests
+ * (test_format.py). PyString implements __float__ perhaps only to help the float
+ * constructor. Maybe that was a bad idea?
+ */
+ try {
+ // Result is the result of arg.__float__() if that works
+ return arg.__getattr__("__float__").__call__();
+ } catch (PyException e) {
+ // No __float__ defined (at Python level)
+ return arg;
+ }
}
}
- // Use the method in java.lang.Long (lowercase, no prefix)
- String s = Long.toString(v, radix);
- // But zero pad to the requested precision
- while (s.length() < precision) {
- s = "0" + s;
- }
- return s;
- }
-
- private double asDouble(PyObject obj) {
- try {
- return obj.asDouble();
- } catch (PyException pye) {
- throw !pye.match(Py.TypeError) ? pye : Py.TypeError("float argument required");
+ }
+
+ /**
+ * Return the argument as either a {@link PyString} or a {@link PyUnicode}, and set the
+ * {@link #needUnicode} member accordingly. If we already know we are building a Unicode string
+ * (<code>needUnicode==true</code>), then any argument that is not already a
+ * <code>PyUnicode</code> will be converted by calling its <code>__unicode__</code> method.
+ * Conversely, if we are not yet building a Unicode string (<code>needUnicode==false</code> ),
+ * then a PyString will pass unchanged, a <code>PyUnicode</code> will switch us to Unicode mode
+ * (<code>needUnicode=true</code>), and any other type will be converted by calling its
+ * <code>__str__</code> method, which will return a <code>PyString</code>, or possibly a
+ * <code>PyUnicode</code>, which will switch us to Unicode mode.
+ *
+ * @param arg to convert
+ * @return PyString or PyUnicode equivalent
+ */
+ private PyString asText(PyObject arg) {
+
+ if (arg instanceof PyUnicode) {
+ // arg is already acceptable.
+ needUnicode = true;
+ return (PyUnicode)arg;
+
+ } else if (needUnicode) {
+ // The string being built is unicode, so we need that version of the arg.
+ return arg.__unicode__();
+
+ } else if (arg instanceof PyString) {
+ // The string being built is not unicode, so arg is already acceptable.
+ return (PyString)arg;
+
+ } else {
+ // The string being built is not unicode, so use __str__ to get a PyString.
+ PyString s = arg.__str__();
+ // But __str__ might return PyUnicode, and we have to notice that.
+ if (s instanceof PyUnicode) {
+ needUnicode = true;
+ }
+ return s;
}
}
@@ -4325,7 +4259,7 @@
public PyString format(PyObject args) {
PyObject dict = null;
this.args = args;
- boolean needUnicode = unicodeCoercion;
+
if (args instanceof PyTuple) {
// We will simply work through the tuple elements
argIndex = 0;
@@ -4341,16 +4275,6 @@
while (index < format.length()) {
- // Attributes to be parsed from the next format specifier
- boolean ljustFlag = false;
- boolean signFlag = false;
- boolean blankFlag = false;
- boolean altFlag = false;
- boolean zeroFlag = false;
-
- int width = -1;
- precision = -1;
-
// Read one character from the format string
char c = pop();
if (c != '%') {
@@ -4360,6 +4284,14 @@
// It's a %, so the beginning of a conversion specifier. Parse it.
+ // Attributes to be parsed from the next format specifier
+ boolean altFlag = false;
+ char sign = Spec.NONE;
+ char fill = ' ';
+ char align = '>';
+ int width = Spec.UNSPECIFIED;
+ int precision = Spec.UNSPECIFIED;
+
// A conversion specifier contains the following components, in this order:
// + The '%' character, which marks the start of the specifier.
// + Mapping key (optional), consisting of a parenthesised sequence of characters.
@@ -4399,19 +4331,22 @@
while (true) {
switch (c = pop()) {
case '-':
- ljustFlag = true;
+ align = '<';
continue;
case '+':
- signFlag = true;
+ sign = '+';
continue;
case ' ':
- blankFlag = true;
+ if (!Spec.specified(sign)) {
+ // Blank sign only wins if '+' not specified.
+ sign = ' ';
+ }
continue;
case '#':
altFlag = true;
continue;
case '0':
- zeroFlag = true;
+ fill = '0';
continue;
}
break;
@@ -4428,7 +4363,7 @@
width = getNumber();
if (width < 0) {
width = -width;
- ljustFlag = true;
+ align = '<';
}
/*
@@ -4451,284 +4386,149 @@
c = pop();
}
- // c is now the conversion type.
- if (c == '%') {
- // It was just a percent sign after all
- buffer.append(c);
- continue;
+ /*
+ * As a function of the conversion type (currently in c) override some of the formatting
+ * flags we read from the format specification.
+ */
+ switch (c) {
+ case 's':
+ case 'r':
+ case 'c':
+ case '%':
+ // These have string-like results: fill, if needed, is always blank.
+ fill = ' ';
+ break;
+
+ default:
+ if (fill == '0' && align == '>') {
+ // Zero-fill comes after the sign in right-justification.
+ align = '=';
+ } else {
+ // If left-justifying, the fill is always blank.
+ fill = ' ';
+ }
}
/*
+ * Encode as an InternalFormat.Spec. The values in the constructor always have specified
+ * values, except for sign, width and precision.
+ */
+ Spec spec = new Spec(fill, align, sign, altFlag, width, false, precision, c);
+
+ /*
* Process argument according to format specification decoded from the string. It is
- * important we don't read the argumnent from the list until this point because of the
+ * important we don't read the argument from the list until this point because of the
* possibility that width and precision were specified via the argument list.
*/
- PyObject arg = getarg();
- String string = null;
- negative = false;
-
- // Independent of type, decide the padding character based on decoded flags.
- char fill = ' ';
- if (zeroFlag) {
- fill = '0';
- } else {
- fill = ' ';
- }
-
- // Perform the type-specific formatting
- switch (c) {
-
- case 's':
- // String (converts any Python object using str()).
- if (arg instanceof PyUnicode) {
- needUnicode = true;
+
+ // Depending on the type of conversion, we use one of these formatters:
+ FloatFormatter ff;
+ IntegerFormatter fi;
+ TextFormatter ft;
+ Formatter f; // = ff, fi or ft, whichever we actually use.
+
+ switch (spec.type) {
+
+ case 's': // String: converts any object using __str__(), __unicode__() ...
+ case 'r': // ... or repr().
+ PyObject arg = getarg();
+
+ // Get hold of the actual object to display (may set needUnicode)
+ PyString argAsString = asText(spec.type == 's' ? arg : arg.__repr__());
+ // Format the str/unicode form of the argument using this Spec.
+ f = ft = new TextFormatter(buffer, spec);
+ ft.setBytes(!needUnicode);
+ ft.format(argAsString.getString());
+ break;
+
+ case 'd': // All integer formats (+case for X).
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'c': // Single character (accepts integer or single character string).
+ case 'u': // Obsolete type identical to 'd'.
+ case 'i': // Compatibility with scanf().
+
+ // Format the argument using this Spec.
+ f = fi = new IntegerFormatter.Traditional(buffer, spec);
+ // If not producing PyUnicode, disallow codes >255.
+ fi.setBytes(!needUnicode);
+
+ arg = getarg();
+
+ if (arg instanceof PyString && spec.type == 'c') {
+ if (arg.__len__() != 1) {
+ throw Py.TypeError("%c requires int or char");
+ } else {
+ if (!needUnicode && arg instanceof PyUnicode) {
+ // Change of mind forced by encountering unicode object.
+ needUnicode = true;
+ fi.setBytes(false);
+ }
+ fi.format(((PyString)arg).getString().codePointAt(0));
+ }
+
+ } else {
+ // Note various types accepted here as long as they have an __int__ method.
+ PyObject argAsNumber = asNumber(arg);
+
+ // We have to check what we got back.
+ if (argAsNumber instanceof PyInteger) {
+ fi.format(((PyInteger)argAsNumber).getValue());
+ } else if (argAsNumber instanceof PyLong) {
+ fi.format(((PyLong)argAsNumber).getValue());
+ } else {
+ // It couldn't be converted, raise the error here
+ throw Py.TypeError("%" + spec.type
+ + " format: a number is required, not "
+ + arg.getType().fastGetName());
+ }
}
- // fall through ...
-
- case 'r':
- // String (converts any Python object using repr()).
- fill = ' ';
- if (c == 's') {
- if (needUnicode) {
- string = arg.__unicode__().toString();
- } else {
- string = arg.__str__().toString();
- }
- } else {
- string = arg.__repr__().toString();
- }
- if (precision >= 0 && string.length() > precision) {
- string = string.substring(0, precision);
- }
break;
- case 'i':
- case 'd':
- // Signed integer decimal. Note floats accepted.
- if (arg instanceof PyLong) {
- string = formatLong(arg, c, altFlag);
- } else {
- string = formatInteger(arg, 10, false, c, altFlag);
- }
- break;
-
- case 'u':
- // Obsolete type – it is identical to 'd'. (Why not identical here?)
- if (arg instanceof PyLong) {
- string = formatLong(arg, c, altFlag);
- } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
- string = formatInteger(arg, 10, false, c, altFlag);
- } else {
- throw Py.TypeError("int argument required");
- }
- break;
-
- case 'o':
- // Signed octal value. Note floats accepted.
- if (arg instanceof PyLong) {
- // This call provides the base prefix '0' if altFlag.
- string = formatLong(arg, c, altFlag);
- } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
- // This call does not provide the '0' prefix and will be lowercase ...
- // ... except where arg.__int__ returns PyLong, then it's like formatLong.
- string = formatInteger(arg, 8, false, c, altFlag);
- if (altFlag && string.charAt(0) != '0') {
- string = "0" + string;
- }
- } else {
- throw Py.TypeError("int argument required");
- }
- break;
-
- case 'x':
- // Signed hexadecimal (lowercase). Note floats accepted.
- if (arg instanceof PyLong) {
- // This call provides the base prefix "0x" if altFlag and case-matches c.
- string = formatLong(arg, c, altFlag);
- } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
- // This call does not provide the "0x" prefix and will be lowercase.
- // ... except where arg.__int__ returns PyLong, then it's like formatLong.
- string = formatInteger(arg, 16, false, c, altFlag);
- string = string.toLowerCase();
- if (altFlag) {
- string = "0x" + string;
- }
- } else {
- throw Py.TypeError("int argument required");
- }
- break;
-
- case 'X':
- // Signed hexadecimal (uppercase). Note floats accepted.
- if (arg instanceof PyLong) {
- // This call provides the base prefix "0x" if altFlag and case-matches c.
- string = formatLong(arg, c, altFlag);
- } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
- // This call does not provide the "0x" prefix and will be lowercase.
- // ... except where arg.__int__ returns PyLong, then it's like formatLong.
- string = formatInteger(arg, 16, false, c, altFlag);
- string = string.toUpperCase();
- if (altFlag) {
- string = "0X" + string;
- }
- } else {
- throw Py.TypeError("int argument required");
- }
- break;
-
- case 'e':
+ case 'e': // All floating point formats (+case).
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
- // All floating point formats (+case).
-
- // Convert the flags (local variables) to the form needed in the Spec object.
- char align = ljustFlag ? '<' : '>';
- char sign = signFlag ? '+' : (blankFlag ? ' ' : Spec.NONE);
- int w = Spec.UNSPECIFIED;
- Spec spec = new Spec(fill, align, sign, altFlag, w, false, precision, c);
// Format using this Spec the double form of the argument.
- FloatFormatter f = new FloatFormatter(spec);
- double v = asDouble(arg);
- f.format(v);
- string = f.getResult();
-
- // Suppress subsequent attempts to insert a correct sign, done already.
- signFlag = blankFlag = negative = false;
+ f = ff = new FloatFormatter(buffer, spec);
+ ff.setBytes(!needUnicode);
+
+ // Note various types accepted here as long as they have a __float__ method.
+ arg = getarg();
+ PyObject argAsFloat = asFloat(arg);
+
+ // We have to check what we got back..
+ if (argAsFloat instanceof PyFloat) {
+ ff.format(((PyFloat)argAsFloat).getValue());
+ } else {
+ // It couldn't be converted, raise the error here
+ throw Py.TypeError("float argument required, not "
+ + arg.getType().fastGetName());
+ }
+
break;
- case 'c':
- // Single character (accepts integer or single character string).
- fill = ' ';
- if (arg instanceof PyString) {
- string = ((PyString)arg).toString();
- if (string.length() != 1) {
- throw Py.TypeError("%c requires int or char");
- }
- if (arg instanceof PyUnicode) {
- needUnicode = true;
- }
- break;
- }
-
- // arg is not a str (or unicode)
- int val;
- try {
- // Explicitly __int__ so we can look for an AttributeError (which is
- // less invasive to mask than a TypeError)
- val = arg.__int__().asInt();
- } catch (PyException e) {
- if (e.match(Py.AttributeError)) {
- throw Py.TypeError("%c requires int or char");
- }
- throw e;
- }
- // Range check, according to ultimate type of result as presentl;y known.
- if (!needUnicode) {
- if (val < 0) {
- throw Py.OverflowError("unsigned byte integer is less than minimum");
- } else if (val > 255) {
- throw Py.OverflowError("unsigned byte integer is greater than maximum");
- }
- } else if (val < 0 || val > PySystemState.maxunicode) {
- throw Py.OverflowError("%c arg not in range(0x110000) (wide Python build)");
- }
- string = new String(new int[] {val}, 0, 1);
+ case '%': // Percent symbol, but surprisingly, padded.
+
+ // We use an integer formatter.
+ f = fi = new IntegerFormatter.Traditional(buffer, spec);
+ fi.setBytes(!needUnicode);
+ fi.format('%');
break;
default:
throw Py.ValueError("unsupported format character '"
- + codecs.encode(Py.newString(c), null, "replace") + "' (0x"
- + Integer.toHexString(c) + ") at index " + (index - 1));
+ + codecs.encode(Py.newString(spec.type), null, "replace") + "' (0x"
+ + Integer.toHexString(spec.type) + ") at index " + (index - 1));
}
- /*
- * We have now dealt with the translation of the (absolute value of the) argument, in
- * variable string[]. In the next sections we deal with sign, padding and base prefix.
- */
- int length = string.length();
- int skip = 0;
-
- // Decide how to represent the sign according to format and actual sign of argument.
- String signString = null;
- if (negative) {
- signString = "-";
- } else {
- if (signFlag) {
- signString = "+";
- } else if (blankFlag) {
- signString = " ";
- }
- }
-
- // The width (from here on) will be the remaining width on the line.
- if (width < length) {
- width = length;
- }
-
- // Insert the sign in the buffer and adjust the width.
- if (signString != null) {
- if (fill != ' ') {
- // When the fill is not space, the sign comes before the fill.
- buffer.append(signString);
- }
- // Adjust width for sign.
- if (width > length) {
- width--;
- }
- }
-
- // Insert base prefix used with alternate mode for hexadecimal.
- if (altFlag && (c == 'x' || c == 'X')) {
- if (fill != ' ') {
- // When the fill is not space, this base prefix comes before the fill.
- buffer.append('0');
- buffer.append(c);
- skip += 2;
- }
- // Adjust width for base prefix.
- width -= 2;
- if (width < 0) {
- width = 0;
- }
- length -= 2;
- }
-
- // Fill on the left of the item.
- if (width > length && !ljustFlag) {
- do {
- buffer.append(fill);
- } while (--width > length);
- }
-
- // If the fill is spaces, we will have deferred the sign and hex base prefix
- if (fill == ' ') {
- if (signString != null) {
- buffer.append(signString);
- }
- if (altFlag && (c == 'x' || c == 'X')) {
- buffer.append('0');
- buffer.append(c);
- skip += 2;
- }
- }
-
- // Now append the converted argument.
- if (skip > 0) {
- // The string contains a hex-prefix, but we have already inserted one.
- buffer.append(string.substring(skip));
- } else {
- buffer.append(string);
- }
-
- // If this hasn't filled the space required, add right-padding.
- while (--width >= length) {
- buffer.append(' ');
- }
+ // Pad the result as specified (in-place, in the buffer).
+ f.pad();
}
/*
@@ -4743,10 +4543,7 @@
}
// Return the final buffer contents as a str or unicode as appropriate.
- if (needUnicode) {
- return new PyUnicode(buffer);
- }
- return new PyString(buffer);
+ return needUnicode ? new PyUnicode(buffer) : new PyString(buffer);
}
}
diff --git a/src/org/python/core/__builtin__.java b/src/org/python/core/__builtin__.java
--- a/src/org/python/core/__builtin__.java
+++ b/src/org/python/core/__builtin__.java
@@ -4,20 +4,20 @@
*/
package org.python.core;
-import java.io.EOFException;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.Map;
import org.python.antlr.base.mod;
+import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Spec;
import org.python.core.util.ExtraMath;
import org.python.core.util.RelativeFile;
-import org.python.core.util.StringUtil;
import org.python.modules._functools._functools;
class BuiltinFunctions extends PyBuiltinFunctionSet {
@@ -768,7 +768,7 @@
/**
* Built-in Python function ord() applicable to the string-like types <code>str</code>,
* <code>bytearray</code>, <code>unicode</code>.
- *
+ *
* @param c string-like object of length 1
* @return ordinal value of character or byte value in
* @throws PyException (TypeError) if not a string-like type
@@ -1245,10 +1245,10 @@
PyObject[] args;
if (level < 0) {
// for backward compatibility provide only 4 arguments
- args = new PyObject[] {Py.newString(name), globals, locals,
+ args = new PyObject[] {Py.newString(name), globals, locals,
fromlist};
} else {
- args = new PyObject[] {Py.newString(name), globals, locals,
+ args = new PyObject[] {Py.newString(name), globals, locals,
fromlist, Py.newInteger(level)};
}
PyObject module = __import__.__call__(args);
@@ -1469,7 +1469,7 @@
endObject = useUnicode ? Py.newUnicode(end) : Py.newString(end);
}
- out.print(values, sepObject, endObject);
+ out.print(values, sepObject, endObject);
}
return Py.None;
}
@@ -1774,10 +1774,6 @@
public PyObject __call__(PyObject args[], String kwds[]) {
ArgParser ap = new ArgParser("bin", args, kwds, new String[] {"number"}, 1);
ap.noKeywords();
- PyObject number = ap.getPyObject(0);
-
- //XXX: this could be made more efficient by using a binary only formatter
- // instead of using generic formatting.
- return number.__format__(new PyString("#b"));
+ return IntegerFormatter.bin(ap.getPyObject(0));
}
}
diff --git a/src/org/python/core/stringlib/FloatFormatter.java b/src/org/python/core/stringlib/FloatFormatter.java
--- a/src/org/python/core/stringlib/FloatFormatter.java
+++ b/src/org/python/core/stringlib/FloatFormatter.java
@@ -18,6 +18,10 @@
/** The rounding mode dominant in the formatter. */
static final RoundingMode ROUND_PY = RoundingMode.HALF_EVEN;
+ /** Limit the size of results. */
+ // No-one needs more than log(Double.MAX_VALUE) - log2(Double.MIN_VALUE) = 1383 digits.
+ static final int MAX_PRECISION = 1400;
+
/** If it contains no decimal point, this length is zero, and 1 otherwise. */
private int lenPoint;
/** The length of the fractional part, right of the decimal point. */
@@ -30,26 +34,14 @@
private int minFracDigits;
/**
- * Construct the formatter from a specification. A reference is held to this specification, but
- * it will not be modified by the actions of this class.
+ * Construct the formatter from a client-supplied buffer, to which the result will be appended,
+ * and a specification. Sets {@link #mark} to the end of the buffer.
*
+ * @param result destination buffer
* @param spec parsed conversion specification
*/
- public FloatFormatter(Spec spec) {
- // Space for result is based on padded width, or precision, whole part & furniture.
- this(spec, 1, 0);
- }
-
- /**
- * Construct the formatter from a specification and an explicit initial buffer capacity. A
- * reference is held to this specification, but it will not be modified by the actions of this
- * class.
- *
- * @param spec parsed conversion specification
- * @param width expected for the formatted result
- */
- public FloatFormatter(Spec spec, int width) {
- super(spec, width);
+ public FloatFormatter(StringBuilder result, Spec spec) {
+ super(result, spec);
if (spec.alternate) {
// Alternate form means do not trim the zero fractional digits.
minFracDigits = -1;
@@ -66,20 +58,26 @@
}
/**
- * Construct the formatter from a specification and two extra hints about the initial buffer
- * capacity. A reference is held to this specification, but it will not be modified by the
- * actions of this class.
+ * Construct the formatter from a specification, allocating a buffer internally for the result.
*
* @param spec parsed conversion specification
- * @param count of elements likely to be formatted
- * @param margin for elements formatted only once
*/
- public FloatFormatter(Spec spec, int count, int margin) {
- /*
- * Rule of thumb used here: in e format w = (p-1) + len("+1.e+300") = p+7; in f format w = p
- * + len("1,000,000.") = p+10. If we're wrong, the result will have to grow. No big deal.
- */
- this(spec, Math.max(spec.width + 1, count * (spec.precision + 10) + margin));
+ public FloatFormatter(Spec spec) {
+ this(new StringBuilder(size(spec)), spec);
+ }
+
+ /**
+ * Recommend a buffer size for a given specification, assuming one float is converted. This will
+ * be a "right" answer for e and g-format, and for f-format with values up to 9,999,999.
+ *
+ * @param spec parsed conversion specification
+ */
+ public static int size(Spec spec) {
+ // Rule of thumb used here (no right answer):
+ // in e format each float occupies: (p-1) + len("+1.e+300") = p+7;
+ // in f format each float occupies: p + len("1,000,000.%") = p+11;
+ // or an explicit (minimum) width may be given, with one overshoot possible.
+ return Math.max(spec.width + 1, spec.getPrecision(6) + 11);
}
/**
@@ -160,12 +158,19 @@
// Precision defaults to 6 (or 12 for none-format)
int precision = spec.getPrecision(Spec.specified(spec.type) ? 6 : 12);
+ // Guard against excessive result precision
+ // XXX Possibly better raised before result is allocated/sized.
+ if (precision > MAX_PRECISION) {
+ throw precisionTooLarge("float");
+ }
+
/*
* By default, the prefix of a positive number is "", but the format specifier may override
* it, and the built-in type complex needs to override the format.
*/
- if (positivePrefix == null && Spec.specified(spec.sign) && spec.sign != '-') {
- positivePrefix = Character.toString(spec.sign);
+ char sign = spec.sign;
+ if (positivePrefix == null && Spec.specified(sign) && sign != '-') {
+ positivePrefix = Character.toString(sign);
}
// Different process for each format type, ignoring case for now.
@@ -905,8 +910,8 @@
}
/**
- * Return the index in {@link #result} of the first letter. helper for {@link #uppercase()} and
- * {@link #getExponent()}
+ * Return the index in {@link #result} of the first letter. This is a helper for
+ * {@link #uppercase()} and {@link #getExponent()}
*/
private int indexOfMarker() {
return start + lenSign + lenWhole + lenPoint + lenFraction;
diff --git a/src/org/python/core/stringlib/IntegerFormatter.java b/src/org/python/core/stringlib/IntegerFormatter.java
new file mode 100644
--- /dev/null
+++ b/src/org/python/core/stringlib/IntegerFormatter.java
@@ -0,0 +1,779 @@
+// Copyright (c) Jython Developers
+package org.python.core.stringlib;
+
+import java.math.BigInteger;
+
+import org.python.core.Py;
+import org.python.core.PyInteger;
+import org.python.core.PyLong;
+import org.python.core.PyObject;
+import org.python.core.PyString;
+import org.python.core.PySystemState;
+import org.python.core.stringlib.InternalFormat.Spec;
+
+/**
+ * A class that provides the implementation of integer formatting. In a limited way, it acts like a
+ * StringBuilder to which text and one or more numbers may be appended, formatted according to the
+ * format specifier supplied at construction. These are ephemeral objects that are not, on their
+ * own, thread safe.
+ */
+public class IntegerFormatter extends InternalFormat.Formatter {
+
+ /**
+ * Construct the formatter from a client-supplied buffer, to which the result will be appended,
+ * and a specification. Sets {@link #mark} to the end of the buffer.
+ *
+ * @param result destination buffer
+ * @param spec parsed conversion specification
+ */
+ public IntegerFormatter(StringBuilder result, Spec spec) {
+ super(result, spec);
+ }
+
+ /**
+ * Construct the formatter from a specification, allocating a buffer internally for the result.
+ *
+ * @param spec parsed conversion specification
+ */
+ public IntegerFormatter(Spec spec) {
+ // Rule of thumb: big enough for 32-bit binary with base indicator 0b
+ this(new StringBuilder(34), spec);
+ }
+
+ /*
+ * Re-implement the text appends so they return the right type.
+ */
+ @Override
+ public IntegerFormatter append(char c) {
+ super.append(c);
+ return this;
+ }
+
+ @Override
+ public IntegerFormatter append(CharSequence csq) {
+ super.append(csq);
+ return this;
+ }
+
+ @Override
+ public IntegerFormatter append(CharSequence csq, int start, int end) //
+ throws IndexOutOfBoundsException {
+ super.append(csq, start, end);
+ return this;
+ }
+
+ /**
+ * Format a {@link BigInteger}, which is the implementation type of Jython <code>long</code>,
+ * according to the specification represented by this <code>IntegerFormatter</code>. The
+ * conversion type, and flags for grouping or base prefix are dealt with here. At the point this
+ * is used, we know the {@link #spec} is one of the integer types.
+ *
+ * @param value to convert
+ * @return this object
+ */
+ @SuppressWarnings("fallthrough")
+ public IntegerFormatter format(BigInteger value) {
+ try {
+ // Different process for each format type.
+ switch (spec.type) {
+ case 'd':
+ case Spec.NONE:
+ case 'u':
+ case 'i':
+ // None format or d-format: decimal
+ format_d(value);
+ break;
+
+ case 'x':
+ // hexadecimal.
+ format_x(value, false);
+ break;
+
+ case 'X':
+ // HEXADECIMAL!
+ format_x(value, true);
+ break;
+
+ case 'o':
+ // Octal.
+ format_o(value);
+ break;
+
+ case 'b':
+ // Binary.
+ format_b(value);
+ break;
+
+ case 'c':
+ // Binary.
+ format_c(value);
+ break;
+
+ case 'n':
+ // Locale-sensitive version of d-format should be here.
+ format_d(value);
+ break;
+
+ default:
+ // Should never get here, since this was checked in caller.
+ throw unknownFormat(spec.type, "long");
+ }
+
+ // If required to, group the whole-part digits.
+ if (spec.grouping) {
+ groupDigits(3, ',');
+ }
+
+ return this;
+
+ } catch (OutOfMemoryError eme) {
+ // Most probably due to excessive precision.
+ throw precisionTooLarge("long");
+ }
+ }
+
+ /**
+ * Format the value as decimal (into {@link #result}). The option for mandatory sign is dealt
+ * with by reference to the format specification.
+ *
+ * @param value to convert
+ */
+ void format_d(BigInteger value) {
+ String number;
+ if (value.signum() < 0) {
+ // Negative value: deal with sign and base, and convert magnitude.
+ negativeSign(null);
+ number = value.negate().toString();
+ } else {
+ // Positive value: deal with sign, base and magnitude.
+ positiveSign(null);
+ number = value.toString();
+ }
+ appendNumber(number);
+ }
+
+ /**
+ * Format the value as hexadecimal (into {@link #result}), with the option of using upper-case
+ * or lower-case letters. The options for mandatory sign and for the presence of a base-prefix
+ * "0x" or "0X" are dealt with by reference to the format specification.
+ *
+ * @param value to convert
+ * @param upper if the hexadecimal should be upper case
+ */
+ void format_x(BigInteger value, boolean upper) {
+ String base = upper ? "0X" : "0x";
+ String number;
+ if (value.signum() < 0) {
+ // Negative value: deal with sign and base, and convert magnitude.
+ negativeSign(base);
+ number = toHexString(value.negate());
+ } else {
+ // Positive value: deal with sign, base and magnitude.
+ positiveSign(base);
+ number = toHexString(value);
+ }
+ // Append to result, case-shifted if necessary.
+ if (upper) {
+ number = number.toUpperCase();
+ }
+ appendNumber(number);
+ }
+
+ /**
+ * Format the value as octal (into {@link #result}). The options for mandatory sign and for the
+ * presence of a base-prefix "0o" are dealt with by reference to the format specification.
+ *
+ * @param value to convert
+ */
+ void format_o(BigInteger value) {
+ String base = "0o";
+ String number;
+ if (value.signum() < 0) {
+ // Negative value: deal with sign and base, and convert magnitude.
+ negativeSign(base);
+ number = toOctalString(value.negate());
+ } else {
+ // Positive value: deal with sign, base and magnitude.
+ positiveSign(base);
+ number = toOctalString(value);
+ }
+ // Append to result.
+ appendNumber(number);
+ }
+
+ /**
+ * Format the value as binary (into {@link #result}). The options for mandatory sign and for the
+ * presence of a base-prefix "0b" are dealt with by reference to the format specification.
+ *
+ * @param value to convert
+ */
+ void format_b(BigInteger value) {
+ String base = "0b";
+ String number;
+ if (value.signum() < 0) {
+ // Negative value: deal with sign and base, and convert magnitude.
+ negativeSign(base);
+ number = toBinaryString(value.negate());
+ } else {
+ // Positive value: deal with sign, base and magnitude.
+ positiveSign(base);
+ number = toBinaryString(value);
+ }
+ // Append to result.
+ appendNumber(number);
+ }
+
+ /**
+ * Format the value as a character (into {@link #result}).
+ *
+ * @param value to convert
+ */
+ void format_c(BigInteger value) {
+ // Limit is 256 if we're formatting for byte output, unicode range otherwise.
+ BigInteger limit = bytes ? LIMIT_BYTE : LIMIT_UNICODE;
+ if (value.signum() < 0 || value.compareTo(limit) >= 0) {
+ throw Py.OverflowError("%c arg not in range(0x" + toHexString(limit) + ")");
+ } else {
+ result.appendCodePoint(value.intValue());
+ }
+ }
+
+ // Limits used in format_c(BigInteger)
+ private static final BigInteger LIMIT_UNICODE = BigInteger
+ .valueOf(PySystemState.maxunicode + 1);
+ private static final BigInteger LIMIT_BYTE = BigInteger.valueOf(256);
+
+ /**
+ * Format an integer according to the specification represented by this
+ * <code>IntegerFormatter</code>. The conversion type, and flags for grouping or base prefix are
+ * dealt with here. At the point this is used, we know the {@link #spec} is one of the integer
+ * types.
+ *
+ * @param value to convert
+ * @return this object
+ */
+ @SuppressWarnings("fallthrough")
+ public IntegerFormatter format(int value) {
+ try {
+ // Scratch all instance variables and start = result.length().
+ setStart();
+
+ // Different process for each format type.
+ switch (spec.type) {
+ case 'd':
+ case Spec.NONE:
+ case 'u':
+ case 'i':
+ // None format or d-format: decimal
+ format_d(value);
+ break;
+
+ case 'x':
+ // hexadecimal.
+ format_x(value, false);
+ break;
+
+ case 'X':
+ // HEXADECIMAL!
+ format_x(value, true);
+ break;
+
+ case 'o':
+ // Octal.
+ format_o(value);
+ break;
+
+ case 'b':
+ // Binary.
+ format_b(value);
+ break;
+
+ case 'c':
+ case '%':
+ // Binary.
+ format_c(value);
+ break;
+
+ case 'n':
+ // Locale-sensitive version of d-format should be here.
+ format_d(value);
+ break;
+
+ default:
+ // Should never get here, since this was checked in caller.
+ throw unknownFormat(spec.type, "integer");
+ }
+
+ // If required to, group the whole-part digits.
+ if (spec.grouping) {
+ groupDigits(3, ',');
+ }
+
+ return this;
+ } catch (OutOfMemoryError eme) {
+ // Most probably due to excessive precision.
+ throw precisionTooLarge("integer");
+ }
+ }
+
+ /**
+ * Format the value as decimal (into {@link #result}). The option for mandatory sign is dealt
+ * with by reference to the format specification.
+ *
+ * @param value to convert
+ */
+ void format_d(int value) {
+ String number;
+ if (value < 0) {
+ // Negative value: deal with sign and base, and convert magnitude.
+ negativeSign(null);
+ number = Integer.toString(-value);
+ } else {
+ // Positive value: deal with sign, base and magnitude.
+ positiveSign(null);
+ number = Integer.toString(value);
+ }
+ appendNumber(number);
+ }
+
+ /**
+ * Format the value as hexadecimal (into {@link #result}), with the option of using upper-case
+ * or lower-case letters. The options for mandatory sign and for the presence of a base-prefix
+ * "0x" or "0X" are dealt with by reference to the format specification.
+ *
+ * @param value to convert
+ * @param upper if the hexadecimal should be upper case
+ */
+ void format_x(int value, boolean upper) {
+ String base = upper ? "0X" : "0x";
+ String number;
+ if (value < 0) {
+ // Negative value: deal with sign and base, and convert magnitude.
+ negativeSign(base);
+ number = Integer.toHexString(-value);
+ } else {
+ // Positive value: deal with sign, base and magnitude.
+ positiveSign(base);
+ number = Integer.toHexString(value);
+ }
+ // Append to result, case-shifted if necessary.
+ if (upper) {
+ number = number.toUpperCase();
+ }
+ appendNumber(number);
+ }
+
+ /**
+ * Format the value as octal (into {@link #result}). The options for mandatory sign and for the
+ * presence of a base-prefix "0o" are dealt with by reference to the format specification.
+ *
+ * @param value to convert
+ */
+ void format_o(int value) {
+ String base = "0o";
+ String number;
+ if (value < 0) {
+ // Negative value: deal with sign and base, and convert magnitude.
+ negativeSign(base);
+ number = Integer.toOctalString(-value);
+ } else {
+ // Positive value: deal with sign, base and magnitude.
+ positiveSign(base);
+ number = Integer.toOctalString(value);
+ }
+ // Append to result.
+ appendNumber(number);
+ }
+
+ /**
+ * Format the value as binary (into {@link #result}). The options for mandatory sign and for the
+ * presence of a base-prefix "0b" are dealt with by reference to the format specification.
+ *
+ * @param value to convert
+ */
+ void format_b(int value) {
+ String base = "0b";
+ String number;
+ if (value < 0) {
+ // Negative value: deal with sign and base, and convert magnitude.
+ negativeSign(base);
+ number = Integer.toBinaryString(-value);
+ } else {
+ // Positive value: deal with sign, base and magnitude.
+ positiveSign(base);
+ number = Integer.toBinaryString(value);
+ }
+ // Append to result.
+ appendNumber(number);
+ }
+
+ /**
+ * Format the value as a character (into {@link #result}).
+ *
+ * @param value to convert
+ */
+ void format_c(int value) {
+ // Limit is 256 if we're formatting for byte output, unicode range otherwise.
+ int limit = bytes ? 256 : PySystemState.maxunicode + 1;
+ if (value < 0 || value >= limit) {
+ throw Py.OverflowError("%c arg not in range(0x" + Integer.toHexString(limit) + ")");
+ } else {
+ result.appendCodePoint(value);
+ }
+ }
+
+ /**
+ * Append to {@link #result} buffer a sign (if one is specified for positive numbers) and, in
+ * alternate mode, the base marker provided. The sign and base marker are together considered to
+ * be the "sign" of the converted number, spanned by {@link #lenSign}. This is relevant when we
+ * come to insert padding.
+ *
+ * @param base marker "0x" or "0X" for hex, "0o" for octal, "0b" for binary, "" or
+ * <code>null</code> for decimal.
+ */
+ final void positiveSign(String base) {
+ // Does the format specify a sign for positive values?
+ char sign = spec.sign;
+ if (Spec.specified(sign) && sign != '-') {
+ append(sign);
+ lenSign = 1;
+ }
+ // Does the format call for a base prefix?
+ if (base != null && spec.alternate) {
+ append(base);
+ lenSign += base.length();
+ }
+ }
+
+ /**
+ * Append to {@link #result} buffer a minus sign and, in alternate mode, the base marker
+ * provided. The sign and base marker are together considered to be the "sign" of the converted
+ * number, spanned by {@link #lenSign}. This is relevant when we come to insert padding.
+ *
+ * @param base marker ("0x" or "0X" for hex, "0" for octal, <code>null</code> or "" for decimal.
+ */
+ final void negativeSign(String base) {
+ // Insert a minus sign unconditionally.
+ append('-');
+ lenSign = 1;
+ // Does the format call for a base prefix?
+ if (base != null && spec.alternate) {
+ append(base);
+ lenSign += base.length();
+ }
+ }
+
+ /**
+ * Append a string (number) to {@link #result} and set {@link #lenWhole} to its length .
+ *
+ * @param number to append
+ */
+ void appendNumber(String number) {
+ lenWhole = number.length();
+ append(number);
+ }
+
+ // For hex-conversion by lookup
+ private static final String LOOKUP = "0123456789abcdef";
+
+ /**
+ * A more efficient algorithm for generating a hexadecimal representation of a byte array.
+ * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
+ * consequently, is implemented using expensive mathematical operations.
+ *
+ * @param value the value to generate a hexadecimal string from
+ * @return the hexadecimal representation of value, with "-" sign prepended if necessary
+ */
+ private static final String toHexString(BigInteger value) {
+ int signum = value.signum();
+
+ // obvious shortcut
+ if (signum == 0) {
+ return "0";
+ }
+
+ // we want to work in absolute numeric value (negative sign is added afterward)
+ byte[] input = value.abs().toByteArray();
+ StringBuilder sb = new StringBuilder(input.length * 2);
+
+ int b;
+ for (int i = 0; i < input.length; i++) {
+ b = input[i] & 0xFF;
+ sb.append(LOOKUP.charAt(b >> 4));
+ sb.append(LOOKUP.charAt(b & 0x0F));
+ }
+
+ // before returning the char array as string, remove leading zeroes, but not the last one
+ String result = sb.toString().replaceFirst("^0+(?!$)", "");
+ return signum < 0 ? "-" + result : result;
+ }
+
+ /**
+ * A more efficient algorithm for generating an octal representation of a byte array.
+ * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
+ * consequently, is implemented using expensive mathematical operations.
+ *
+ * @param value the value to generate an octal string from
+ * @return the octal representation of value, with "-" sign prepended if necessary
+ */
+ private static final String toOctalString(BigInteger value) {
+ int signum = value.signum();
+
+ // obvious shortcut
+ if (signum == 0) {
+ return "0";
+ }
+
+ byte[] input = value.abs().toByteArray();
+ if (input.length < 3) {
+ return value.toString(8);
+ }
+
+ StringBuilder sb = new StringBuilder(input.length * 3);
+
+ // working backwards, three bytes at a time
+ int threebytes;
+ int trip1, trip2, trip3; // most, middle, and least significant bytes in the triplet
+ for (int i = input.length - 1; i >= 0; i -= 3) {
+ trip3 = input[i] & 0xFF;
+ trip2 = ((i - 1) >= 0) ? (input[i - 1] & 0xFF) : 0x00;
+ trip1 = ((i - 2) >= 0) ? (input[i - 2] & 0xFF) : 0x00;
+ threebytes = trip3 | (trip2 << 8) | (trip1 << 16);
+
+ // convert the three-byte value into an eight-character octal string
+ for (int j = 0; j < 8; j++) {
+ sb.append(LOOKUP.charAt((threebytes >> (j * 3)) & 0x000007));
+ }
+ }
+
+ String result = sb.reverse().toString().replaceFirst("^0+(?!%)", "");
+ return signum < 0 ? "-" + result : result;
+ }
+
+ /**
+ * A more efficient algorithm for generating a binary representation of a byte array.
+ * {@link BigInteger#toString(int)} is too slow because it generalizes to any radix and,
+ * consequently, is implemented using expensive mathematical operations.
+ *
+ * @param value the value to generate a binary string from
+ * @return the binary representation of value, with "-" sign prepended if necessary
+ */
+ private static final String toBinaryString(BigInteger value) {
+ int signum = value.signum();
+
+ // obvious shortcut
+ if (signum == 0) {
+ return "0";
+ }
+
+ // we want to work in absolute numeric value (negative sign is added afterward)
+ byte[] input = value.abs().toByteArray();
+ StringBuilder sb = new StringBuilder(value.bitCount());
+
+ int b;
+ for (int i = 0; i < input.length; i++) {
+ b = input[i] & 0xFF;
+ for (int bit = 7; bit >= 0; bit--) {
+ sb.append(((b >> bit) & 0x1) > 0 ? "1" : "0");
+ }
+ }
+
+ // before returning the char array as string, remove leading zeroes, but not the last one
+ String result = sb.toString().replaceFirst("^0+(?!$)", "");
+ return signum < 0 ? "-" + result : result;
+ }
+
+ /** Format specification used by bin(). */
+ public static final Spec BIN = InternalFormat.fromText("#b");
+
+ /** Format specification used by oct(). */
+ public static final Spec OCT = InternalFormat.fromText("#o");
+
+ /** Format specification used by hex(). */
+ public static final Spec HEX = InternalFormat.fromText("#x");
+
+ /**
+ * Convert the object to binary according to the conventions of Python built-in
+ * <code>bin()</code>. The object's __index__ method is called, and is responsible for raising
+ * the appropriate error (which the base {@link PyObject#__index__()} does).
+ *
+ * @param number to convert
+ * @return PyString converted result
+ */
+ // Follow this pattern in Python 3, where objects no longer have __hex__, __oct__ members.
+ public static PyString bin(PyObject number) {
+ return formatNumber(number, BIN);
+ }
+
+ /**
+ * Convert the object according to the conventions of Python built-in <code>hex()</code>, or
+ * <code>oct()</code>. The object's <code>__index__</code> method is called, and is responsible
+ * for raising the appropriate error (which the base {@link PyObject#__index__()} does).
+ *
+ * @param number to convert
+ * @return PyString converted result
+ */
+ public static PyString formatNumber(PyObject number, Spec spec) {
+ number = number.__index__();
+ IntegerFormatter f = new IntegerFormatter(spec);
+ if (number instanceof PyInteger) {
+ f.format(((PyInteger)number).getValue());
+ } else {
+ f.format(((PyLong)number).getValue());
+ }
+ return new PyString(f.getResult());
+ }
+
+ /**
+ * A minor variation on {@link IntegerFormatter} to handle "traditional" %-formatting. The
+ * difference is in support for <code>spec.precision</code>, the formatting octal in "alternate"
+ * mode (0 and 0123, not 0o0 and 0o123), and in c-format (in the error logic).
+ */
+ public static class Traditional extends IntegerFormatter {
+
+ /**
+ * Construct the formatter from a client-supplied buffer, to which the result will be
+ * appended, and a specification. Sets {@link #mark} to the end of the buffer.
+ *
+ * @param result destination buffer
+ * @param spec parsed conversion specification
+ */
+ public Traditional(StringBuilder result, Spec spec) {
+ super(result, spec);
+ }
+
+ /**
+ * Construct the formatter from a specification, allocating a buffer internally for the
+ * result.
+ *
+ * @param spec parsed conversion specification
+ */
+ public Traditional(Spec spec) {
+ this(new StringBuilder(), spec);
+ }
+
+ /**
+ * Format the value as octal (into {@link #result}). The options for mandatory sign and for
+ * the presence of a base-prefix "0" are dealt with by reference to the format
+ * specification.
+ *
+ * @param value to convert
+ */
+ @Override
+ void format_o(BigInteger value) {
+ String number;
+ int signum = value.signum();
+ if (signum < 0) {
+ // Negative value: deal with sign and base, and convert magnitude.
+ negativeSign(null);
+ number = toOctalString(value.negate());
+ } else {
+ // Positive value: deal with sign, base and magnitude.
+ positiveSign(null);
+ number = toOctalString(value);
+ }
+ // Append to result.
+ appendOctalNumber(number);
+ }
+
+ /**
+ * Format the value as a character (into {@link #result}).
+ *
+ * @param value to convert
+ */
+ @Override
+ void format_c(BigInteger value) {
+ if (value.signum() < 0) {
+ throw Py.OverflowError("unsigned byte integer is less than minimum");
+ } else {
+ // Limit is 256 if we're formatting for byte output, unicode range otherwise.
+ BigInteger limit = bytes ? LIMIT_BYTE : LIMIT_UNICODE;
+ if (value.compareTo(limit) >= 0) {
+ throw Py.OverflowError("unsigned byte integer is greater than maximum");
+ } else {
+ result.appendCodePoint(value.intValue());
+ }
+ }
+ }
+
+ /**
+ * Format the value as octal (into {@link #result}). The options for mandatory sign and for
+ * the presence of a base-prefix "0" are dealt with by reference to the format
+ * specification.
+ *
+ * @param value to convert
+ */
+ @Override
+ void format_o(int value) {
+ String number;
+ if (value < 0) {
+ // Negative value: deal with sign and convert magnitude.
+ negativeSign(null);
+ number = Integer.toOctalString(-value);
+ } else {
+ // Positive value: deal with sign, base and magnitude.
+ positiveSign(null);
+ number = Integer.toOctalString(value);
+ }
+ // Append to result.
+ appendOctalNumber(number);
+ }
+
+ /**
+ * Format the value as a character (into {@link #result}).
+ *
+ * @param value to convert
+ */
+ @Override
+ void format_c(int value) {
+ if (value < 0) {
+ throw Py.OverflowError("unsigned byte integer is less than minimum");
+ } else {
+ // Limit is 256 if we're formatting for byte output, unicode range otherwise.
+ int limit = bytes ? 256 : PySystemState.maxunicode + 1;
+ if (value >= limit) {
+ throw Py.OverflowError("unsigned byte integer is greater than maximum");
+ } else {
+ result.appendCodePoint(value);
+ }
+ }
+ }
+
+ /**
+ * Append a string (number) to {@link #result}, but insert leading zeros first in order
+ * that, on return, the whole-part length #lenWhole should be no less than the precision.
+ *
+ * @param number to append
+ */
+ @Override
+ void appendNumber(String number) {
+ int n, p = spec.getPrecision(0);
+ for (n = number.length(); n < p; n++) {
+ result.append('0');
+ }
+ lenWhole = n;
+ append(number);
+ }
+
+ /**
+ * Append a string (number) to {@link #result}, but insert leading zeros first in order
+ * that, on return, the whole-part length #lenWhole should be no less than the precision.
+ * Octal numbers must begin with a zero if <code>spec.alternate==true</code>, so if the
+ * number passed in does not start with a zero, at least one will be inserted.
+ *
+ * @param number to append
+ */
+ void appendOctalNumber(String number) {
+ int n = number.length(), p = spec.getPrecision(0);
+ if (spec.alternate && number.charAt(0) != '0' && n >= p) {
+ p = n + 1;
+ }
+ for (; n < p; n++) {
+ result.append('0');
+ }
+ lenWhole = n;
+ append(number);
+ }
+
+ }
+}
diff --git a/src/org/python/core/stringlib/InternalFormat.java b/src/org/python/core/stringlib/InternalFormat.java
--- a/src/org/python/core/stringlib/InternalFormat.java
+++ b/src/org/python/core/stringlib/InternalFormat.java
@@ -3,6 +3,9 @@
import org.python.core.Py;
import org.python.core.PyException;
+import org.python.core.PyObject;
+import org.python.core.PyString;
+import org.python.core.PyUnicode;
public class InternalFormat {
@@ -14,7 +17,25 @@
*/
public static Spec fromText(String text) {
Parser parser = new Parser(text);
- return parser.parse();
+ try {
+ return parser.parse();
+ } catch (IllegalArgumentException e) {
+ throw Py.ValueError(e.getMessage());
+ }
+ }
+
+ /**
+ * Create a {@link Spec} object by parsing a format specification, supplied as an object.
+ *
+ * @param text to parse
+ * @return parsed equivalent to text
+ */
+ public static Spec fromText(PyObject text, String method) {
+ if (text instanceof PyString) {
+ return fromText(((PyString)text).getString());
+ } else {
+ throw Py.TypeError(method + " requires str or unicode");
+ }
}
/**
@@ -30,23 +51,72 @@
/** The (partial) result. */
protected StringBuilder result;
- /** The number we are working on floats at the end of the result, and starts here. */
+ /**
+ * Signals the client's intention to make a PyString (or other byte-like) interpretation of
+ * {@link #result}, rather than a PyUnicode one.
+ */
+ protected boolean bytes;
+
+ /** The start of the formatted data for padding purposes, <={@link #start} */
+ protected int mark;
+ /** The latest number we are working on floats at the end of the result, and starts here. */
protected int start;
- /** If it contains no sign, this length is zero, and 1 otherwise. */
+ /** If it contains no sign, this length is zero, and >0 otherwise. */
protected int lenSign;
/** The length of the whole part (to left of the decimal point or exponent) */
protected int lenWhole;
/**
- * Construct the formatter from a specification and initial buffer capacity. A reference is
- * held to this specification, but it will not be modified by the actions of this class.
+ * Construct the formatter from a client-supplied buffer and a specification. Sets
+ * {@link #mark} and {@link #start} to the end of the buffer. The new formatted object will
+ * therefore be appended there and, when the time comes, padding will be applied to (just)
+ * the new text.
+ *
+ * @param result destination buffer
+ * @param spec parsed conversion specification
+ */
+ public Formatter(StringBuilder result, Spec spec) {
+ this.spec = spec;
+ this.result = result;
+ this.start = this.mark = result.length();
+ }
+
+ /**
+ * Construct the formatter from a specification and initial buffer capacity. Sets
+ * {@link #mark} to the end of the buffer.
*
* @param spec parsed conversion specification
* @param width of buffer initially
*/
public Formatter(Spec spec, int width) {
- this.spec = spec;
- result = new StringBuilder(width);
+ this(new StringBuilder(width), spec);
+ }
+
+ /**
+ * Signals the client's intention to make a PyString (or other byte-like) interpretation of
+ * {@link #result}, rather than a PyUnicode one. Only formatters that could produce
+ * characters >255 are affected by this (e.g. c-format). Idiom:
+ *
+ * <pre>
+ * MyFormatter f = new MyFormatter( InternalFormatter.fromText(formatSpec) );
+ * f.setBytes(!(formatSpec instanceof PyUnicode));
+ * // ... formatting work
+ * return f.getPyResult();
+ * </pre>
+ *
+ * @param bytes true to signal the intention to make a byte-like interpretation
+ */
+ public void setBytes(boolean bytes) {
+ this.bytes = bytes;
+ }
+
+ /**
+ * Whether initialised for a byte-like interpretation.
+ *
+ * @return bytes attribute
+ */
+ public boolean isBytes() {
+ return bytes;
}
/**
@@ -58,6 +128,22 @@
return result.toString();
}
+ /**
+ * Convenience method to return the current result of the formatting, as a
+ * <code>PyObject</code>, either {@link PyString} or {@link PyUnicode} according to
+ * {@link #bytes}.
+ *
+ * @return formatted result
+ */
+ public PyString getPyResult() {
+ String r = getResult();
+ if (bytes) {
+ return new PyString(r);
+ } else {
+ return new PyUnicode(r);
+ }
+ }
+
/*
* Implement Appendable interface by delegation to the result buffer.
*
@@ -84,21 +170,28 @@
/**
* Clear the instance variables describing the latest object in {@link #result}, ready to
- * receive a new number
+ * receive a new one: sets {@link #start} and calls {@link #reset()}. This is necessary when
+ * a <code>Formatter</code> is to be re-used. Note that this leaves {@link #mark} where it
+ * is. In the core, we need this to support <code>complex</code>: two floats in the same
+ * format, but padded as a unit.
*/
public void setStart() {
- // Mark the end of the buffer as the start of the current object and reset all.
+ // The new value will float at the current end of the result buffer.
start = result.length();
- // Clear the variable describing the latest number in result.
- reset();
+ // If anything has been added since construction, reset all state.
+ if (start > mark) {
+ // Clear the variable describing the latest number in result.
+ reset();
+ }
}
/**
* Clear the instance variables describing the latest object in {@link #result}, ready to
- * receive a new one.
+ * receive a new one. This is called from {@link #setStart()}. Subclasses override this
+ * method and call {@link #setStart()} at the start of their format method.
*/
protected void reset() {
- // Clear the variable describing the latest object in result.
+ // Clear the variables describing the latest object in result.
lenSign = lenWhole = 0;
}
@@ -215,19 +308,19 @@
}
/**
- * Pad the result so far (defined as the entire contents of {@link #result}) using the
- * alignment, target width and fill character defined in {@link #spec}. The action of
- * padding will increase the overall length of the result to the target width, if that is
- * greater than the current length.
+ * Pad the result so far (defined as the contents of {@link #result} from {@link #mark} to
+ * the end) using the alignment, target width and fill character defined in {@link #spec}.
+ * The action of padding will increase the length of this segment to the target width, if
+ * that is greater than the current length.
* <p>
* When the padding method has decided that that it needs to add n padding characters, it
- * will affect {@link #start} or {@link #lenSign} as follows.
+ * will affect {@link #start} or {@link #lenWhole} as follows.
* <table border style>
* <tr>
* <th>align</th>
* <th>meaning</th>
* <th>start</th>
- * <th>lenSign</th>
+ * <th>lenWhole</th>
* <th>result.length()</th>
* </tr>
* <tr>
@@ -259,69 +352,79 @@
* <td>+n</td>
* </tr>
* </table>
- * Note that we may have converted more than one value into the result buffer (for example
- * when formatting a complex number). The pointer <code>start</code> is at the start of the
- * last number converted. Padding with zeros, and the "pad after sign" mode, will produce a
- * result you probably don't want. It is up to the client to disallow this (which
- * <code>complex</code> does).
+ * Note that in the "pad after sign" mode, only the last number into the buffer receives the
+ * padding. This padding gets incorporated into the whole part of the number. (In other
+ * modes, the padding is around <code>result[mark:]</code>.) When this would not be
+ * appropriate, it is up to the client to disallow this (which <code>complex</code> does).
*
- * @return this object
+ * @return this Formatter object
*/
public Formatter pad() {
+ // We'll need this many pad characters (if>0). Note Spec.UNDEFINED<0.
+ int n = spec.width - (result.length() - mark);
+ if (n > 0) {
+ pad(mark, n);
+ }
+ return this;
+ }
- // We'll need this many pad characters (if>0). Note Spec.UNDEFINED<0.
- int n = spec.width - result.length();
- if (n > 0) {
+ /**
+ * Pad the last result (defined as the contents of {@link #result} from argument
+ * <code>leftIndex</code> to the end) using the alignment, by <code>n</code> repetitions of
+ * the fill character defined in {@link #spec}, and distributed according to
+ * <code>spec.align</code>. The value of <code>leftIndex</code> is only used if the
+ * alignment is '>' (left) or '^' (both). The value of the critical lengths (lenWhole,
+ * lenSign, etc.) are not affected, because we assume that <code>leftIndex <= </code>
+ * {@link #start}.
+ *
+ * @param leftIndex the index in result at which to insert left-fill characters.
+ * @param n number of fill characters to insert.
+ */
+ protected void pad(int leftIndex, int n) {
+ char align = spec.getAlign('>'); // Right for numbers (strings will supply '<' align)
+ char fill = spec.getFill(' ');
- char align = spec.getAlign('>'); // Right for numbers (wrong for strings)
- char fill = spec.getFill(' ');
+ // Start by assuming padding is all leading ('>' case or '=')
+ int leading = n;
- // Start by assuming padding is all leading ('>' case or '=')
- int leading = n;
+ // Split the total padding according to the alignment
+ if (align == '^') {
+ // Half the padding before
+ leading = n / 2;
+ } else if (align == '<') {
+ // All the padding after
+ leading = 0;
+ }
- // Split the total padding according to the alignment
- if (align == '^') {
- // Half the padding before
- leading = n / 2;
- } else if (align == '<') {
- // All the padding after
- leading = 0;
+ // All padding that is not leading is trailing
+ int trailing = n - leading;
+
+ // Insert the leading space
+ if (leading > 0) {
+ if (align == '=') {
+ // Incorporate into the (latest) whole part
+ leftIndex = start + lenSign;
+ lenWhole += leading;
+ } else {
+ // Default is to insert at the stated leftIndex <= start.
+ start += leading;
}
-
- // All padding that is not leading is trailing
- int trailing = n - leading;
-
- // Insert the leading space
- if (leading > 0) {
- int pos;
- if (align == '=') {
- // Incorporate into the (latest) whole part
- pos = start + lenSign;
- lenWhole += leading;
- } else {
- // Insert at the very beginning (not start) by default.
- pos = 0;
- start += leading;
- }
- makeSpaceAt(pos, leading);
- for (int i = 0; i < leading; i++) {
- result.setCharAt(pos + i, fill);
- }
- }
-
- // Append the trailing space
- for (int i = 0; i < trailing; i++) {
- result.append(fill);
- }
-
- // Check for special case
- if (align == '=' && fill == '0' && spec.grouping) {
- // We must extend the grouping separator into the padding
- zeroPadAfterSignWithGroupingFixup(3, ',');
+ makeSpaceAt(leftIndex, leading);
+ for (int i = 0; i < leading; i++) {
+ result.setCharAt(leftIndex + i, fill);
}
}
- return this;
+ // Append the trailing space
+ for (int i = 0; i < trailing; i++) {
+ result.append(fill);
+ }
+
+ // Check for special case
+ if (align == '=' && fill == '0' && spec.grouping) {
+ // We must extend the grouping separator into the padding
+ zeroPadAfterSignWithGroupingFixup(3, ',');
+ }
}
/**
@@ -345,7 +448,7 @@
* </pre>
*
* The padding has increased the overall length of the result to the target width. About one
- * in three call to this method adds one to the width, because the whole part cannot start
+ * in three calls to this method adds one to the width, because the whole part cannot start
* with a comma.
*
* <pre>
@@ -355,9 +458,6 @@
* '-<b>0</b>,000,000,001,200,000,000.0000'
* </pre>
*
- * Insert grouping characters (conventionally commas) into the whole part of the number.
- * {@link #lenWhole} will increase correspondingly.
- *
* @param groupSize normally 3.
* @param comma or some other character to use as a separator.
*/
@@ -386,10 +486,9 @@
* Suppose the format call was format(-12e8, "0=30,.4f"). At the beginning, we had
* something like this in result: . [-|000000000001,200,000,000|.|0000||]
*
- * And now, result looks like this: [-|0000,000,001,200,000,000|.|0000||] in which
- * the first zero is wrong as it stands, nor can it just be over-written with a
- * comma. We have to insert another zero, even though this makes the result longer
- * than we were given.
+ * And now, result looks like this: [-|,000,000,001,200,000,000|.|0000||] in which
+ * the first comma is wrong, but so would be a zero. We have to insert another zero,
+ * even though this makes the result longer than we were asked for.
*/
result.insert(firstZero, '0');
lenWhole += 1;
@@ -418,7 +517,19 @@
* @return exception to throw
*/
public static PyException alternateFormNotAllowed(String forType) {
- return notAllowed("Alternate form (#)", forType);
+ return alternateFormNotAllowed(forType, '\0');
+ }
+
+ /**
+ * Convenience method returning a {@link Py#ValueError} reporting that alternate form is not
+ * allowed in a format specifier for the named type and specified typoe code.
+ *
+ * @param forType the type it was found applied to
+ * @param code the formatting code (or '\0' not to mention one)
+ * @return exception to throw
+ */
+ public static PyException alternateFormNotAllowed(String forType, char code) {
+ return notAllowed("Alternate form (#)", forType, code);
}
/**
@@ -430,7 +541,30 @@
* @return exception to throw
*/
public static PyException alignmentNotAllowed(char align, String forType) {
- return notAllowed("'" + align + "' alignment flag", forType);
+ return notAllowed("'" + align + "' alignment flag", forType, '\0');
+ }
+
+ /**
+ * Convenience method returning a {@link Py#ValueError} reporting that specifying a sign is
+ * not allowed in a format specifier for the named type.
+ *
+ * @param forType the type it was found applied to
+ * @param code the formatting code (or '\0' not to mention one)
+ * @return exception to throw
+ */
+ public static PyException signNotAllowed(String forType, char code) {
+ return notAllowed("Sign", forType, code);
+ }
+
+ /**
+ * Convenience method returning a {@link Py#ValueError} reporting that specifying a
+ * precision is not allowed in a format specifier for the named type.
+ *
+ * @param forType the type it was found applied to
+ * @return exception to throw
+ */
+ public static PyException precisionNotAllowed(String forType) {
+ return notAllowed("Precision", forType, '\0');
}
/**
@@ -441,22 +575,63 @@
* @return exception to throw
*/
public static PyException zeroPaddingNotAllowed(String forType) {
- return notAllowed("Zero padding", forType);
+ return notAllowed("Zero padding", forType, '\0');
}
/**
* Convenience method returning a {@link Py#ValueError} reporting that some format specifier
- * feature is not allowed for the named type.
+ * feature is not allowed for the named data type.
*
- * @param particularOutrage committed in the present case
- * @param forType the type it where it is an outrage
+ * @param outrage committed in the present case
+ * @param forType the data type (e.g. "integer") it where it is an outrage
* @return exception to throw
*/
- protected static PyException notAllowed(String particularOutrage, String forType) {
- String msg = particularOutrage + " is not allowed in " + forType + " format specifier";
+ public static PyException notAllowed(String outrage, String forType) {
+ return notAllowed(outrage, forType, '\0');
+ }
+
+ /**
+ * Convenience method returning a {@link Py#ValueError} reporting that some format specifier
+ * feature is not allowed for the named format code and data type. Produces a message like:
+ * <p>
+ * <code>outrage+" not allowed with "+forType+" format specifier '"+code+"'"</code>
+ * <p>
+ * <code>outrage+" not allowed in "+forType+" format specifier"</code>
+ *
+ * @param outrage committed in the present case
+ * @param forType the data type (e.g. "integer") it where it is an outrage
+ * @param code the formatting code for which it is an outrage (or '\0' not to mention one)
+ * @return exception to throw
+ */
+ public static PyException notAllowed(String outrage, String forType, char code) {
+ // Try really hard to be like CPython
+ String codeAsString, withOrIn;
+ if (code == 0) {
+ withOrIn = "in ";
+ codeAsString = "";
+ } else {
+ withOrIn = "with ";
+ codeAsString = " '" + code + "'";
+ }
+ String msg =
+ outrage + " not allowed " + withOrIn + forType + " format specifier"
+ + codeAsString;
return Py.ValueError(msg);
}
+ /**
+ * Convenience method returning a {@link Py#OverflowError} reporting:
+ * <p>
+ * <code>"formatted "+type+" is too long (precision too large?)"</code>
+ *
+ * @param type of formatting ("integer", "float")
+ * @return exception to throw
+ */
+ public static PyException precisionTooLarge(String type) {
+ String msg = "formatted " + type + " is too long (precision too large?)";
+ return Py.OverflowError(msg);
+ }
+
}
/**
@@ -636,6 +811,12 @@
false, Spec.UNSPECIFIED, Spec.NONE);
/**
+ * Defaults applicable to string types. Equivalent to " <"
+ */
+ public static final Spec STRING = new Spec(' ', '<', Spec.NONE, false, Spec.UNSPECIFIED,
+ false, Spec.UNSPECIFIED, Spec.NONE);
+
+ /**
* Constructor offering just precision and type.
*
* <pre>
@@ -775,11 +956,6 @@
throw new IllegalArgumentException("Invalid conversion specification");
}
- // Restrict grouping to known formats. (Mirrors CPython, but misplaced?)
- if (grouping && "defgEG%F\0".indexOf(type) == -1) {
- throw new IllegalArgumentException("Cannot specify ',' with '" + type + "'.");
- }
-
// Create a specification
return new Spec(fill, align, sign, alternate, width, grouping, precision, type);
}
diff --git a/src/org/python/core/stringlib/InternalFormatSpec.java b/src/org/python/core/stringlib/InternalFormatSpec.java
deleted file mode 100644
--- a/src/org/python/core/stringlib/InternalFormatSpec.java
+++ /dev/null
@@ -1,88 +0,0 @@
-package org.python.core.stringlib;
-
-/**
- * Parsed PEP-3101 format specification of a single field. This class holds the several attributes
- * that might be decoded from a format specifier. It provides a method
- * {@link #pad(String, char, int)} for adjusting a string using those attributes related to padding
- * to a string assumed to be the result of formatting to the given precision.
- * <p>
- * This structure is returned by {@link InternalFormatSpecParser#parse()} and having public members
- * is freely used by {@link InternalFormatSpecParser}, and the __format__ methods of client object
- * types.
- * <p>
- * The fields correspond to the elements of a format specification. The grammar of a format
- * specification is:
- *
- * <pre>
- * [[fill]align][sign][#][0][width][,][.precision][type]
- * </pre>
- */
-public final class InternalFormatSpec {
-
- /** The fill specified in the grammar. */
- public char fill_char;
- /** Alignment indicator is 0, or one of {<code>'<', '^', '>', '='</code> . */
- public char align;
- /** The alternative format flag '#' was given. */
- public boolean alternate;
- /** Sign-handling flag, one of <code>'+'</code>, <code>'-'</code>, or <code>' '</code>. */
- public char sign;
- /** Width to which to pad the resault in {@link #pad(String, char, int)}. */
- public int width = -1;
- /** Insert the grouping separator (which in Python always indicates a group-size of 3). */
- public boolean thousands_separators;
- /** Precision decoded from the format. */
- public int precision = -1;
- /** Type key from the format. */
- public char type;
-
- /**
- * Pad value, using {@link #fill_char} (or <code>' '</code>) before and after, to {@link #width}
- * <code>-leaveWidth</code>, aligned according to {@link #align} (or according to
- * <code>defaultAlign</code>).
- *
- * @param value to pad
- * @param defaultAlign to use if <code>this.align</code>=0 (one of <code>'<'</code>,
- * <code>'^'</code>, <code>'>'</code>, or <code>'='</code>).
- * @param leaveWidth to reduce effective <code>this.width</code> by
- * @return padded value
- */
- public String pad(String value, char defaultAlign, int leaveWidth) {
-
- // We'll need this many pad characters (if>0)
- int remaining = width - value.length() - leaveWidth;
- if (remaining <= 0) {
- return value;
- }
-
- // Use this.align or defaultAlign
- int useAlign = align;
- if (useAlign == 0) {
- useAlign = defaultAlign;
- }
-
- // By default all padding is leading padding ('<' case or '=')
- int leading = remaining;
- if (useAlign == '^') {
- // Half the padding before
- leading = remaining / 2;
- } else if (useAlign == '<') {
- // All the padding after
- leading = 0;
- }
-
- // Now build the result
- StringBuilder result = new StringBuilder();
- char fill = fill_char != 0 ? fill_char : ' ';
-
- for (int i = 0; i < leading; i++) { // before
- result.append(fill);
- }
- result.append(value);
- for (int i = 0; i < remaining - leading; i++) { // after
- result.append(fill);
- }
-
- return result.toString();
- }
-}
diff --git a/src/org/python/core/stringlib/InternalFormatSpecParser.java b/src/org/python/core/stringlib/InternalFormatSpecParser.java
deleted file mode 100644
--- a/src/org/python/core/stringlib/InternalFormatSpecParser.java
+++ /dev/null
@@ -1,118 +0,0 @@
-package org.python.core.stringlib;
-
-/**
- * Parser for PEP-3101 field format specifications. This class provides a {@link #parse()} method
- * that translates the format specification into an <code>InternalFormatSpec</code> object.
- */
-public class InternalFormatSpecParser {
-
- private String spec;
- private int index;
-
- /**
- * Constructor simply holds the specification streang ahead of the {@link #parse()} operation.
- *
- * @param spec format specifier to parse (e.g. "<+12.3f")
- */
- public InternalFormatSpecParser(String spec) {
- this.spec = spec;
- this.index = 0;
- }
-
- private static boolean isAlign(char c) {
- switch (c) {
- case '<':
- case '>':
- case '=':
- case '^':
- return true;
- default:
- return false;
- }
- }
-
- /**
- * Parse the specification with which this object was initialised into an
- * {@link InternalFormatSpec}, which is an object encapsulating the format for use by formatting
- * methods. This parser deals only with the format specifiers themselves, as accepted by the
- * <code>__format__</code> method of a type, or the <code>format()</code> built-in, not format
- * strings in general as accepted by <code>str.format()</code>. A typical idiom is:
- *
- * <pre>
- * InternalFormatSpec spec = new InternalFormatSpecParser(specString).parse();
- * </pre>
- *
- * @return the <code>InternalFormatSpec</code> equivalent to the constructor argument
- */
- /*
- * This method is the equivalent of CPython's parse_internal_render_format_spec() in
- * ~/Objects/stringlib/formatter.h.
- */
- // XXX Better encapsulated as a constructor of InternalFormatSpec?
- public InternalFormatSpec parse() {
- InternalFormatSpec result = new InternalFormatSpec();
- if (spec.length() >= 1 && isAlign(spec.charAt(0))) {
- result.align = spec.charAt(index);
- index++;
- } else if (spec.length() >= 2 && isAlign(spec.charAt(1))) {
- result.fill_char = spec.charAt(0);
- result.align = spec.charAt(1);
- index += 2;
- }
- if (isAt("+- ")) {
- result.sign = spec.charAt(index);
- index++;
- }
- if (isAt("#")) {
- result.alternate = true;
- index++;
- }
- if (result.fill_char == '\0' && isAt("0")) {
- result.fill_char = '0';
- if (result.align == '\0') {
- result.align = '=';
- }
- index++;
- }
- result.width = getInteger();
- if (isAt(",")) {
- result.thousands_separators = true;
- index++;
- }
- if (isAt(".")) {
- index++;
- result.precision = getInteger();
- if (result.precision == -1) {
- throw new IllegalArgumentException("Format specifier missing precision");
- }
- }
- if (index < spec.length()) {
- result.type = spec.charAt(index);
- if (index + 1 != spec.length()) {
- throw new IllegalArgumentException("Invalid conversion specification");
- }
- }
- if (result.thousands_separators && "defgEG%F\0".indexOf(result.type) == -1) {
- throw new IllegalArgumentException("Cannot specify ',' with '" + result.type + "'.");
- }
- return result;
- }
-
- private int getInteger() {
- int value = 0;
- boolean empty = true;
- while (index < spec.length() && spec.charAt(index) >= '0' && spec.charAt(index) <= '9') {
- value = value * 10 + spec.charAt(index) - '0';
- index++;
- empty = false;
- }
- if (empty) {
- return -1;
- }
- return value;
- }
-
- private boolean isAt(String chars) {
- return index < spec.length() && chars.indexOf(spec.charAt(index)) >= 0;
- }
-}
diff --git a/src/org/python/core/stringlib/TextFormatter.java b/src/org/python/core/stringlib/TextFormatter.java
new file mode 100644
--- /dev/null
+++ b/src/org/python/core/stringlib/TextFormatter.java
@@ -0,0 +1,119 @@
+// Copyright (c) Jython Developers
+package org.python.core.stringlib;
+
+import org.python.core.stringlib.InternalFormat.Formatter;
+import org.python.core.stringlib.InternalFormat.Spec;
+
+/**
+ * A class that provides the implementation of <code>str</code> and <code>unicode</code> formatting.
+ * In a limited way, it acts like a StringBuilder to which text, formatted according to the format
+ * specifier supplied at construction. These are ephemeral objects that are not, on their own,
+ * thread safe.
+ */
+public class TextFormatter extends InternalFormat.Formatter {
+
+ /**
+ * Construct the formatter from a client-supplied buffer, to which the result will be appended,
+ * and a specification. Sets {@link #mark} to the end of the buffer.
+ *
+ * @param result destination buffer
+ * @param spec parsed conversion specification
+ */
+ public TextFormatter(StringBuilder result, Spec spec) {
+ super(result, spec);
+ }
+
+ /**
+ * Construct the formatter from a specification, allocating a buffer internally for the result.
+ *
+ * @param spec parsed conversion specification
+ */
+ public TextFormatter(Spec spec) {
+ this(new StringBuilder(), spec);
+ }
+
+ /*
+ * Re-implement the text appends so they return the right type.
+ */
+ @Override
+ public TextFormatter append(char c) {
+ super.append(c);
+ return this;
+ }
+
+ @Override
+ public TextFormatter append(CharSequence csq) {
+ super.append(csq);
+ return this;
+ }
+
+ @Override
+ public TextFormatter append(CharSequence csq, int start, int end) //
+ throws IndexOutOfBoundsException {
+ super.append(csq, start, end);
+ return this;
+ }
+
+ /**
+ * Format the given <code>String</code> into the <code>result</code> buffer. Largely, this is a
+ * matter of copying the value of the argument, but a subtlety arises when the string contains
+ * supplementary (non-BMP) Unicode characters, which are represented as surrogate pairs. The
+ * precision specified in the format relates to a count of Unicode characters (code points), not
+ * Java <code>char</code>s. The method deals with this correctly, essentially by not counting
+ * the high-surrogates in the allowance. The final value of {@link #lenWhole} counts the UTF-16
+ * units added.
+ *
+ * @param value to format
+ * @return this <code>TextFormatter</code> object
+ */
+ public TextFormatter format(String value) {
+
+ // Scratch all instance variables and start = result.length().
+ setStart();
+
+ int p = spec.precision, n = value.length();
+
+ if (Spec.specified(p) && p < n) {
+ // Estimate the space for the converted result (preempt multiple re-allocation)
+ int space = Math.max(spec.width, p);
+ result.ensureCapacity(result.length() + space + (bytes ? 0 : space / 4));
+ /*
+ * A precision p was specified less than the length: we may have to truncate. Note we
+ * compared p with the UTF-16 length, even though it is the code point length that
+ * matters. But the code point length cannot be greater than n.
+ */
+ int count = 0;
+ while (count < p) {
+ // count is the number of UTF-16 chars.
+ char c = value.charAt(count++);
+ result.append(c);
+ // A high-surrogate will always be followed by a low, so doesn't count.
+ if (Character.isHighSurrogate(c) && p < n) {
+ // Accomplish "not counting" by bumping the limit p, within the array bounds.
+ p += 1;
+ }
+ }
+ // Record the UTF-16 count as the length in buffer
+ lenWhole = count;
+
+ } else {
+ // We definitely don't need to truncate. Append the whole string.
+ lenWhole = n;
+ result.append(value);
+ }
+
+ return this;
+ }
+
+ // Variant to deal with supplementary characters: other formatters don't produce them.
+ @Override
+ public TextFormatter pad() {
+ // We'll need this many pad characters (if>0). Note Spec.UNDEFINED<0.
+ int n = spec.width - result.codePointCount(mark, result.length());
+ if (n > 0) {
+ pad(mark, n);
+ }
+ return this;
+ }
+
+}
diff --git a/tests/java/org/python/core/StringFormatTest.java b/tests/java/org/python/core/StringFormatTest.java
--- a/tests/java/org/python/core/StringFormatTest.java
+++ b/tests/java/org/python/core/StringFormatTest.java
@@ -1,43 +1,58 @@
package org.python.core;
+import java.math.BigInteger;
+
import junit.framework.TestCase;
+
import org.python.core.stringlib.FieldNameIterator;
-import org.python.core.stringlib.InternalFormatSpec;
-import org.python.core.stringlib.InternalFormatSpecParser;
+import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
import org.python.core.stringlib.MarkupIterator;
+import org.python.core.stringlib.TextFormatter;
+import org.python.core.stringlib.InternalFormat.Spec;
+import org.python.util.PythonInterpreter;
/**
* Tests for internal bits and pieces of string.format implementation.
*/
public class StringFormatTest extends TestCase {
+
+ /** Exception-raising seems to need the interpreter to be initialised **/
+ PythonInterpreter interp = new PythonInterpreter();
+
public void testInternalFormatSpec() {
- InternalFormatSpec spec = new InternalFormatSpecParser("x").parse();
+ InternalFormat.Spec spec;
+ spec = InternalFormat.fromText("x");
+ assertFalse(Spec.specified(spec.align));
+ assertFalse(Spec.specified(spec.fill));
+ assertFalse(Spec.specified(spec.width));
+ assertFalse(Spec.specified(spec.precision));
assertEquals('x', spec.type);
- spec = new InternalFormatSpecParser("<x").parse();
+ spec = InternalFormat.fromText("<x");
assertEquals('<', spec.align);
assertEquals('x', spec.type);
- spec = new InternalFormatSpecParser("~<x").parse();
- assertEquals('~', spec.fill_char);
+ spec = InternalFormat.fromText("~<x");
+ assertEquals('~', spec.fill);
assertEquals('<', spec.align);
assertEquals('x', spec.type);
- spec = new InternalFormatSpecParser("+x").parse();
+ spec = InternalFormat.fromText("+x");
assertEquals('+', spec.sign);
assertEquals('x', spec.type);
- spec = new InternalFormatSpecParser("#x").parse();
+ spec = InternalFormat.fromText("#x");
assertEquals(true, spec.alternate);
- spec = new InternalFormatSpecParser("0x").parse();
+ spec = InternalFormat.fromText("0x");
assertEquals('=', spec.align);
- assertEquals('0', spec.fill_char);
+ assertEquals('0', spec.fill);
- spec = new InternalFormatSpecParser("123x").parse();
+ spec = InternalFormat.fromText("123x");
assertEquals(123, spec.width);
- spec = new InternalFormatSpecParser("123.456x").parse();
+ spec = InternalFormat.fromText("123.456x");
assertEquals(123, spec.width);
assertEquals(456, spec.precision);
@@ -45,105 +60,182 @@
assertParseError("123xx", "Invalid conversion specification");
- spec = new InternalFormatSpecParser("").parse();
- assertEquals(0, spec.type);
+ spec = InternalFormat.fromText("");
+ assertEquals(Spec.NONE, spec.type);
}
private void assertParseError(String spec, String expected) {
String error = null;
try {
- new InternalFormatSpecParser(spec).parse();
- } catch (IllegalArgumentException e) {
- error = e.getMessage();
+ InternalFormat.fromText(spec);
+ } catch (PyException e) {
+ assertEquals(Py.ValueError, e.type);
+ error = e.value.toString();
}
assertEquals(expected, error);
}
- public void testFormatIntOrLong() {
- InternalFormatSpec spec = new InternalFormatSpec();
- spec.type = 'd';
- assertEquals("123", PyInteger.formatIntOrLong(123, spec));
- spec.type = 'o';
- assertEquals("173", PyInteger.formatIntOrLong(123, spec));
- spec.type = 'x';
- assertEquals("7b", PyInteger.formatIntOrLong(123, spec));
- spec.type = 'X';
- assertEquals("7B", PyInteger.formatIntOrLong(123, spec));
- spec.type = 'b';
- assertEquals("1111011", PyInteger.formatIntOrLong(123, spec));
+ /**
+ * Test the IntegerFormatter returned by {@link PyInteger#prepareFormat}. This is based on the original
+ * <code>testFormatIntOrLong</code> which tested <code>PyInteger.formatIntOrLong</code>.
+ */
+ public void testPrepareFormatter() {
+ int v = 123;
+ IntegerFormatter f;
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("d"));
+ assertEquals("123", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("o"));
+ assertEquals("173", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("x"));
+ assertEquals("7b", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("X"));
+ assertEquals("7B", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("b"));
+ assertEquals("1111011", f.format(v).pad().getResult());
- spec.thousands_separators = true;
- spec.type = 'd';
- assertEquals("1,234", PyInteger.formatIntOrLong(1234, spec));
- spec.thousands_separators = false;
+ int v2 = 1234567890;
+ f = PyInteger.prepareFormatter(InternalFormat.fromText(",d"));
+ assertEquals("1,234,567,890", f.format(v2).pad().getResult());
- spec.alternate = true;
- spec.type = 'o';
- assertEquals("0o173", PyInteger.formatIntOrLong(123, spec));
- spec.type = 'X';
- assertEquals("0X7B", PyInteger.formatIntOrLong(123, spec));
- spec.alternate = false;
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("#o"));
+ assertEquals("0o173", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("#X"));
+ assertEquals("0X7B", f.format(v).pad().getResult());
- spec.type = 'c';
- assertEquals("{", PyInteger.formatIntOrLong(123, spec));
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+ assertEquals("{", f.format(v).pad().getResult());
- spec.type = 'd';
- spec.sign = '+';
- assertEquals("+123", PyInteger.formatIntOrLong(123, spec));
- spec.sign = ' ';
- assertEquals(" 123", PyInteger.formatIntOrLong(123, spec));
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("+d"));
+ assertEquals("+123", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText(" d"));
+ assertEquals(" 123", f.format(v).pad().getResult());
- spec.sign = 0;
- spec.width = 5;
- assertEquals(" 123", PyInteger.formatIntOrLong(123, spec));
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("5"));
+ assertEquals(" 123", f.format(v).pad().getResult());
- spec.align = '^';
- spec.width = 6;
- assertEquals(" 123 ", PyInteger.formatIntOrLong(123, spec));
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("^6"));
+ assertEquals(" 123 ", f.format(v).pad().getResult());
- spec.align = '<';
- spec.width = 5;
- spec.fill_char = '~';
- assertEquals("123~~", PyInteger.formatIntOrLong(123, spec));
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("~<5"));
+ assertEquals("123~~", f.format(v).pad().getResult());
- spec.align = '=';
- spec.width = 6;
- spec.fill_char = '0';
- spec.sign = '+';
- assertEquals("+00123", PyInteger.formatIntOrLong(123, spec));
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("0=+6"));
+ assertEquals("+00123", f.format(v).pad().getResult());
- spec.precision = 1;
- assertFormatError(123, spec, "Precision not allowed in integer format specifier");
+ assertValueError("0=+6.1", "Precision not allowed in integer format specifier");
+ assertValueError("+c", "Sign not allowed with integer format specifier 'c'");
- spec.precision = -1;
- spec.sign = '+';
- spec.type = 'c';
- assertFormatError(123, spec, "Sign not allowed with integer format specifier 'c'");
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+ f.setBytes(true);
+ assertOverflowError(256, f, "%c arg not in range(0x100)");
+ assertOverflowError(-1, f, "%c arg not in range(0x100)");
+ assertOverflowError(0x110000, f, "%c arg not in range(0x100)");
- spec.sign = 0;
- assertFormatError(0x11111, spec, "%c arg not in range(0x10000)");
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+ assertOverflowError(0x110000, f, "%c arg not in range(0x110000)");
+ assertOverflowError(-1, f, "%c arg not in range(0x110000)");
+ }
+
+ /**
+ * Test the IntegerFormatter returned by {@link PyInteger#prepareFormat}. This is based on the original
+ * <code>testFormatIntOrLong</code> which tested <code>PyInteger.formatIntOrLong</code>.
+ */
+ public void testPrepareFormatterLong() {
+ BigInteger v = BigInteger.valueOf(123);
+ IntegerFormatter f;
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("d"));
+ assertEquals("123", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("o"));
+ assertEquals("173", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("x"));
+ assertEquals("7b", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("X"));
+ assertEquals("7B", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("b"));
+ assertEquals("1111011", f.format(v).pad().getResult());
+
+ BigInteger v2 = BigInteger.valueOf(1234567890);
+ f = PyInteger.prepareFormatter(InternalFormat.fromText(",d"));
+ assertEquals("1,234,567,890", f.format(v2).pad().getResult());
+
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("#o"));
+ assertEquals("0o173", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("#X"));
+ assertEquals("0X7B", f.format(v).pad().getResult());
+
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+ assertEquals("{", f.format(v).pad().getResult());
+
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("+d"));
+ assertEquals("+123", f.format(v).pad().getResult());
+ f = PyInteger.prepareFormatter(InternalFormat.fromText(" d"));
+ assertEquals(" 123", f.format(v).pad().getResult());
+
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("5"));
+ assertEquals(" 123", f.format(v).pad().getResult());
+
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("^6"));
+ assertEquals(" 123 ", f.format(v).pad().getResult());
+
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("~<5"));
+ assertEquals("123~~", f.format(v).pad().getResult());
+
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("0=+6"));
+ assertEquals("+00123", f.format(v).pad().getResult());
+
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+ f.setBytes(true);
+ assertOverflowError(BigInteger.valueOf(256), f, "%c arg not in range(0x100)");
+ assertOverflowError(BigInteger.valueOf(-1), f, "%c arg not in range(0x100)");
+ assertOverflowError(BigInteger.valueOf(0x110000), f, "%c arg not in range(0x100)");
+
+ f = PyInteger.prepareFormatter(InternalFormat.fromText("c"));
+ assertOverflowError(BigInteger.valueOf(0x110000), f, "%c arg not in range(0x110000)");
+ assertOverflowError(BigInteger.valueOf(-1), f, "%c arg not in range(0x110000)");
+ }
+
+ private void assertValueError(String formatSpec, String expected) {
+ try {
+ IntegerFormatter f = PyInteger.prepareFormatter(InternalFormat.fromText(formatSpec));
+ // f.format(123).pad().getResult();
+ fail("ValueError not thrown, expected: " + expected);
+ } catch (PyException pye) {
+ assertEquals(expected, pye.value.toString());
+ }
+ }
+
+ private void assertOverflowError(int v, IntegerFormatter f, String expected) {
+ // Test with Java int for PyInteger
+ try {
+ f.format(v).pad().getResult();
+ fail("OverflowError not thrown, expected: " + expected);
+ } catch (PyException pye) {
+ assertEquals(expected, pye.value.toString());
+ }
+ }
+
+ private void assertOverflowError(BigInteger v, IntegerFormatter f, String expected) {
+ // Test with BigInteger for PyLong
+ try {
+ f.format(v).pad().getResult();
+ fail("OverflowError not thrown, expected: " + expected);
+ } catch (PyException pye) {
+ assertEquals(expected, pye.value.toString());
+ }
}
public void testFormatString() {
- InternalFormatSpec spec = new InternalFormatSpec();
- assertEquals("abc", PyString.formatString("abc", spec));
+ String v = "abc";
+ TextFormatter f;
+ f = PyString.prepareFormatter(InternalFormat.fromText(""));
+ assertEquals("abc", f.format(v).pad().getResult());
- spec.precision = 3;
- assertEquals("abc", PyString.formatString("abcdef", spec));
+ String v2 = "abcdef";
+ f = PyString.prepareFormatter(InternalFormat.fromText(".3"));
+ assertEquals("abc", f.format(v2).pad().getResult());
- spec.precision = -1;
- spec.width = 6;
- assertEquals("abc ", PyString.formatString("abc", spec));
- }
-
- private void assertFormatError(int value, InternalFormatSpec spec, String expected) {
- String error = null;
- try {
- PyInteger.formatIntOrLong(value, spec);
- } catch (IllegalArgumentException e) {
- error = e.getMessage();
- }
- assertEquals(expected, error);
+ f = PyString.prepareFormatter(InternalFormat.fromText("6"));
+ assertEquals("abc ", f.format(v).pad().getResult());
}
public void testMarkupIterator() {
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list