[Jython-checkins] jython (merge default -> default): Merge further work on str and unicode types to accept buffer arguments
jeff.allen
jython-checkins at python.org
Sun Oct 27 23:49:43 CET 2013
http://hg.python.org/jython/rev/07ca5124f859
changeset: 7147:07ca5124f859
parent: 7137:a9283b590960
parent: 7146:4af2f4241912
user: Jeff Allen <ja.py at farowl.co.uk>
date: Sun Oct 27 22:11:21 2013 +0000
summary:
Merge further work on str and unicode types to accept buffer arguments
files:
Lib/test/string_tests.py | 241 +-
Lib/test/test_java_integration.py | 8 +-
Lib/test/test_jython_initializer.py | 9 +
src/org/python/core/Py.java | 10 +-
src/org/python/core/PyString.java | 2757 +++++++++-----
src/org/python/core/PyUnicode.java | 303 +-
6 files changed, 2079 insertions(+), 1249 deletions(-)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -158,6 +158,32 @@
self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i))
self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i))
+ # Repeat some tests with buffer argument (Jython addition)
+ ba = buffer('a')
+ self.checkequal(3, 'aaa', 'count', ba)
+ self.checkequal(0, 'aaa', 'count', buffer('b'))
+ self.checkequal(2, 'aaa', 'count', ba, 1)
+ self.checkequal(0, 'aaa', 'count', ba, 10)
+ self.checkequal(1, 'aaa', 'count', ba, 0, 1)
+ self.checkequal(3, 'aaa', 'count', ba, 0, 10)
+ b = buffer('')
+ self.checkequal(3, 'aaa', 'count', b, 1)
+ self.checkequal(4, 'aaa', 'count', b, -10)
+
+ # Repeat some tests with memoryview argument (Jython addition)
+ if test_support.is_jython:
+ # CPython does not support until v3.2
+ with memoryview('a') as ma:
+ self.checkequal(3, 'aaa', 'count', ma)
+ self.checkequal(0, 'aaa', 'count', memoryview('b'))
+ self.checkequal(2, 'aaa', 'count', ma, 1)
+ self.checkequal(0, 'aaa', 'count', ma, 10)
+ self.checkequal(1, 'aaa', 'count', ma, 0, 1)
+ self.checkequal(3, 'aaa', 'count', ma, 0, 10)
+ with memoryview('') as m:
+ self.checkequal(3, 'aaa', 'count', m, 1)
+ self.checkequal(4, 'aaa', 'count', m, -10)
+
def test_find(self):
self.checkequal(0, 'abcdefghiabc', 'find', 'abc')
self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1)
@@ -171,11 +197,8 @@
self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a')
self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4)
self.checkequal(-1, 'rrarrrrrrrrra', 'find', 'a', 4, 6)
-
- #FIXME:
- if not test_support.is_jython:
- self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4, None)
- self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a', None, 6)
+ self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4, None)
+ self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a', None, 6)
self.checkraises(TypeError, 'hello', 'find')
self.checkraises(TypeError, 'hello', 'find', 42)
@@ -188,11 +211,6 @@
self.checkequal(-1, '', 'find', 'xx', 1, 1)
self.checkequal(-1, '', 'find', 'xx', sys.maxint, 0)
- # issue 7458
- #FIXME:
- if not test_support.is_jython:
- self.checkequal(-1, 'ab', 'find', 'xxx', sys.maxsize + 1, 0)
-
# For a variety of combinations,
# verify that str.find() matches __contains__
# and that the found substring is really at that location
@@ -217,6 +235,28 @@
if loc != -1:
self.assertEqual(i[loc:loc+len(j)], j)
+ # issue 7458
+ self.checkequal(-1, 'ab', 'find', 'xxx', sys.maxsize + 1, 0)
+
+ # Repeat some tests with buffer argument (Jython addition)
+ self.checkequal(0, 'abcdefghiabc', 'find', buffer('abc'))
+ self.checkequal(9, 'abcdefghiabc', 'find', buffer('abc'), 1)
+ self.checkequal(-1, 'abcdefghiabc', 'find', buffer('def'), 4)
+ self.checkequal(12, 'rrarrrrrrrrra', 'find', buffer('a'), 4, None)
+ self.checkequal( 2, 'rrarrrrrrrrra', 'find', buffer('a'), None, 6)
+
+ # Repeat some tests with memoryview argument (Jython addition)
+ if test_support.is_jython:
+ # CPython does not support until v3.2
+ with memoryview('abc') as m:
+ self.checkequal(0, 'abcdefghiabc', 'find', m)
+ self.checkequal(9, 'abcdefghiabc', 'find', m, 1)
+ with memoryview('def') as m:
+ self.checkequal(-1, 'abcdefghiabc', 'find', m, 4)
+ with memoryview('a') as m:
+ self.checkequal(12, 'rrarrrrrrrrra', 'find', m, 4, None)
+ self.checkequal( 2, 'rrarrrrrrrrra', 'find', m, None, 6)
+
def test_rfind(self):
self.checkequal(9, 'abcdefghiabc', 'rfind', 'abc')
self.checkequal(12, 'abcdefghiabc', 'rfind', '')
@@ -231,10 +271,8 @@
self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a')
self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4)
self.checkequal(-1, 'rrarrrrrrrrra', 'rfind', 'a', 4, 6)
- #FIXME:
- if not test_support.is_jython:
- self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4, None)
- self.checkequal( 2, 'rrarrrrrrrrra', 'rfind', 'a', None, 6)
+ self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4, None)
+ self.checkequal( 2, 'rrarrrrrrrrra', 'rfind', 'a', None, 6)
self.checkraises(TypeError, 'hello', 'rfind')
self.checkraises(TypeError, 'hello', 'rfind', 42)
@@ -264,9 +302,26 @@
self.assertEqual(i[loc:loc+len(j)], self.fixtype(j))
# issue 7458
- #FIXME:
- if not test_support.is_jython:
- self.checkequal(-1, 'ab', 'rfind', 'xxx', sys.maxsize + 1, 0)
+ self.checkequal(-1, 'ab', 'rfind', 'xxx', sys.maxsize + 1, 0)
+
+ # Repeat some tests with buffer argument (Jython addition)
+ self.checkequal(9, 'abcdefghiabc', 'rfind', buffer('abc'))
+ self.checkequal(12, 'abcdefghiabc', 'rfind', buffer(''))
+ self.checkequal(0, 'abcdefghiabc', 'rfind', buffer('abcd'))
+ self.checkequal(-1, 'abcdefghiabc', 'rfind', buffer('abcz'))
+ self.checkequal(12, 'rrarrrrrrrrra', 'rfind', buffer('a'), 4, None)
+ self.checkequal( 2, 'rrarrrrrrrrra', 'rfind', buffer('a'), None, 6)
+
+ # Repeat some tests with memoryview argument (Jython addition)
+ if test_support.is_jython:
+ # CPython does not support until v3.2
+ self.checkequal(9, 'abcdefghiabc', 'rfind', memoryview('abc'))
+ self.checkequal(12, 'abcdefghiabc', 'rfind', memoryview(''))
+ self.checkequal(0, 'abcdefghiabc', 'rfind', memoryview('abcd'))
+ self.checkequal(-1, 'abcdefghiabc', 'rfind', memoryview('abcz'))
+ with memoryview('a') as m:
+ self.checkequal(12, 'rrarrrrrrrrra', 'rfind', m, 4, None)
+ self.checkequal( 2, 'rrarrrrrrrrra', 'rfind', m, None, 6)
def test_index(self):
self.checkequal(0, 'abcdefghiabc', 'index', '')
@@ -283,11 +338,8 @@
self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a')
self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4)
self.checkraises(ValueError, 'rrarrrrrrrrra', 'index', 'a', 4, 6)
-
- #FIXME
- if not test_support.is_jython:
- self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4, None)
- self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a', None, 6)
+ self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4, None)
+ self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a', None, 6)
self.checkraises(TypeError, 'hello', 'index')
self.checkraises(TypeError, 'hello', 'index', 42)
@@ -308,11 +360,8 @@
self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a')
self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4)
self.checkraises(ValueError, 'rrarrrrrrrrra', 'rindex', 'a', 4, 6)
-
- #FIXME:
- if not test_support.is_jython:
- self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4, None)
- self.checkequal( 2, 'rrarrrrrrrrra', 'rindex', 'a', None, 6)
+ self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4, None)
+ self.checkequal( 2, 'rrarrrrrrrrra', 'rindex', 'a', None, 6)
self.checkraises(TypeError, 'hello', 'rindex')
self.checkraises(TypeError, 'hello', 'rindex', 42)
@@ -573,6 +622,21 @@
#self.checkequal(unicode('hello', 'ascii'), 'hello',
# 'strip', unicode('xyz', 'ascii'))
+ # strip/lstrip/rstrip with buffer or memoryview arg (Jython addition)
+ if test_support.is_jython and self.__class__.type2test in (str, bytearray):
+ b = buffer('xyz')
+ self.checkequal('hello', 'xyzzyhelloxyzzy', 'strip', b)
+ self.checkequal('helloxyzzy', 'xyzzyhelloxyzzy', 'lstrip', b)
+ self.checkequal('xyzzyhello', 'xyzzyhelloxyzzy', 'rstrip', b)
+ self.checkequal('hello', 'hello', 'strip', b)
+
+ # CPython does not support until v3.2
+ with memoryview('xyz') as m:
+ self.checkequal('hello', 'xyzzyhelloxyzzy', 'strip', m)
+ self.checkequal('helloxyzzy', 'xyzzyhelloxyzzy', 'lstrip', m)
+ self.checkequal('xyzzyhello', 'xyzzyhelloxyzzy', 'rstrip', m)
+ self.checkequal('hello', 'hello', 'strip', m)
+
self.checkraises(TypeError, 'hello', 'strip', 42, 42)
self.checkraises(TypeError, 'hello', 'lstrip', 42, 42)
self.checkraises(TypeError, 'hello', 'rstrip', 42, 42)
@@ -753,13 +817,11 @@
EQ("bobobXbobob", "bobobobXbobobob", "replace", "bobob", "bob")
EQ("BOBOBOB", "BOBOBOB", "replace", "bob", "bobby")
- # buffer not supported in Jython.
- if not test_support.is_jython:
- with test_support.check_py3k_warnings():
- ba = buffer('a')
- bb = buffer('b')
- EQ("bbc", "abc", "replace", ba, bb)
- EQ("aac", "abc", "replace", bb, ba)
+ with test_support.check_py3k_warnings():
+ ba = buffer('a')
+ bb = buffer('b')
+ EQ("bbc", "abc", "replace", ba, bb)
+ EQ("aac", "abc", "replace", bb, ba)
#
self.checkequal('one at two!three!', 'one!two!three!', 'replace', '!', '@', 1)
@@ -789,6 +851,40 @@
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
+ # Repeat some tests including buffer API objects (Jython addition)
+ if test_support.is_jython:
+ for buftype in (buffer, memoryview, bytearray):
+ # Buffer type as sought argument
+ EQ("", "", "replace", buftype(""), "")
+ EQ("", "", "replace", buftype("A"), "A")
+ EQ("*-A*-A*-", "AA", "replace", buftype(""), "*-")
+ EQ("", "AAA", "replace", buftype("A"), "")
+ EQ("BCD", "ABCADAA", "replace", buftype("A"), "")
+ EQ("ater", "theater", "replace", buftype("the"), "")
+ EQ("", "thethethethe", "replace", buftype("the"), "")
+ EQ("aaaa", "theatheatheathea", "replace", buftype("the"), "")
+ EQ("WhO gOes there?", "Who goes there?", "replace", buftype("o"), "O")
+ EQ("Th** ** a t**sue", "This is a tissue", "replace", buftype("is"), "**")
+ EQ("cobobXcobocob", "bobobXbobobob", "replace", buftype("bob"), "cob")
+ EQ("ReyKKjaviKK", "Reykjavik", "replace", buftype("k"), "KK")
+ EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
+ "replace", buftype("spam"), "ham")
+ # Buffer type as replacement argument
+ EQ("", "", "replace", "", buftype(""))
+ EQ("", "", "replace", "A", buftype("A"))
+ EQ("*-A*-A*-", "AA", "replace", "", buftype("*-"))
+ EQ("", "AAA", "replace", "A", buftype(""))
+ EQ("BCD", "ABCADAA", "replace", "A", buftype(""))
+ EQ("ater", "theater", "replace", "the", buftype(""))
+ EQ("", "thethethethe", "replace", "the", buftype(""))
+ EQ("aaaa", "theatheatheathea", "replace", "the", buftype(""))
+ EQ("WhO gOes there?", "Who goes there?", "replace", "o", buftype("O"))
+ EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", buftype("**"))
+ EQ("cobobXcobocob", "bobobXbobobob", "replace", "bob", buftype("cob"))
+ EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", buftype("KK"))
+ EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
+ "replace", "spam", buftype("ham"))
+
def test_replace_overflow(self):
# Check for overflow checking on 32 bit machines
if sys.maxint != 2147483647 or struct.calcsize("P") > 4:
@@ -962,6 +1058,23 @@
self.checkraises(TypeError, 'hello', 'startswith', (42,))
+ # Repeat some tests including buffer API objects (Jython addition)
+ if test_support.is_jython:
+ for buftype in (buffer, memoryview, bytearray):
+ self.checkequal(True, 'hello', 'startswith', buftype('he'))
+ self.checkequal(True, 'hello', 'startswith', buftype(''))
+ self.checkequal(False, 'hello', 'startswith', buftype('ello'))
+ self.checkequal(True, 'hello', 'startswith', buftype('ello'), 1)
+ self.checkequal(True, 'helloworld', 'startswith', buftype('lowo'), 3, 7)
+ self.checkequal(True, 'hello', 'startswith', buftype('he'), 0, -1)
+ self.checkequal(True, 'hello', 'startswith', buftype('ello'), -4)
+ self.checkequal(True, 'hello', 'startswith', buftype('o'), -1)
+ self.checkequal(True, 'hello', 'startswith', (buftype('he'), 'ha'))
+ self.checkequal(True, 'helloworld', 'startswith', (buftype('hellowo'),
+ 'rld', buftype('lowo')), 3)
+ self.checkequal(True, 'hello', 'startswith', ('lo', buftype('he')), 0, -1)
+ self.checkequal(True, 'hello', 'startswith', (buftype('he'), 'hel'), 0, 2)
+
def test_endswith(self):
self.checkequal(True, 'hello', 'endswith', 'lo')
self.checkequal(False, 'hello', 'endswith', 'he')
@@ -1011,6 +1124,20 @@
self.checkraises(TypeError, 'hello', 'endswith', (42,))
+ # Repeat some tests including buffer API objects (Jython addition)
+ if test_support.is_jython:
+ for buftype in (buffer, memoryview, bytearray):
+ self.checkequal(True, 'hello', 'endswith', buftype('lo'))
+ self.checkequal(False, 'hello', 'endswith', buftype('he'))
+ self.checkequal(True, 'hello', 'endswith', buftype(''))
+ self.checkequal(True, 'helloworld', 'endswith', buftype('worl'), 3, 9)
+ self.checkequal(True, 'helloworld', 'endswith', buftype('worl'), -5, -1)
+ self.checkequal(True, 'hello', 'endswith', (buftype('lo'), buftype('llo')))
+ self.checkequal(True, 'helloworld', 'endswith', ('hellowo',
+ buftype('rld'), buftype('lowo')), 3)
+ self.checkequal(True, 'hello', 'endswith', ('hell', buftype('ell')), 0, -1)
+ self.checkequal(True, 'hello', 'endswith', ('he', buftype('hell')), 0, 4)
+
def test___contains__(self):
self.checkequal(True, '', '__contains__', '')
self.checkequal(True, 'abc', '__contains__', '')
@@ -1203,6 +1330,26 @@
# mixed use of str and unicode
self.assertEqual('a/b/c'.partition(u'/'), ('a', '/', 'b/c'))
+ # with buffer arg (Jython addition)
+ b = buffer('ti')
+ if self.__class__.type2test is unicode:
+ self.checkequal(('this is the par', u'ti', 'tion method'),
+ 'this is the partition method', 'partition', b)
+ else:
+ self.checkequal(('this is the par', b, 'tion method'),
+ 'this is the partition method', 'partition', b)
+
+ # with memoryview arg (Jython addition)
+ if test_support.is_jython:
+ # CPython does not support until v3.2
+ with memoryview('ti') as m:
+ if self.__class__.type2test is unicode:
+ self.checkequal(('this is the par', u'ti', 'tion method'),
+ 'this is the partition method', 'partition', m)
+ else:
+ self.checkequal(('this is the par', m, 'tion method'),
+ 'this is the partition method', 'partition', m)
+
def test_rpartition(self):
self.checkequal(('this is the rparti', 'ti', 'on method'),
@@ -1221,6 +1368,26 @@
# mixed use of str and unicode
self.assertEqual('a/b/c'.rpartition(u'/'), ('a/b', '/', 'c'))
+ # with buffer arg (Jython addition)
+ b = buffer('ti')
+ if self.__class__.type2test is unicode:
+ self.checkequal(('this is the parti', u'ti', 'on method'),
+ 'this is the partition method', 'rpartition', b)
+ else:
+ self.checkequal(('this is the parti', b, 'on method'),
+ 'this is the partition method', 'rpartition', b)
+
+ # with memoryview arg (Jython addition)
+ if test_support.is_jython:
+ # CPython does not support until v3.2
+ with memoryview('ti') as m:
+ if self.__class__.type2test is unicode:
+ self.checkequal(('this is the parti', u'ti', 'on method'),
+ 'this is the partition method', 'rpartition', m)
+ else:
+ self.checkequal(('this is the parti', m, 'on method'),
+ 'this is the partition method', 'rpartition', m)
+
def test_none_arguments(self):
# issue 11828
s = 'hello'
@@ -1293,6 +1460,12 @@
table = string.maketrans('abc', 'xyz')
self.checkequal('xyzxyz', 'xyzabcdef', 'translate', table, 'def')
+ # Repeat using buffer API objects (Jython addition)
+ if test_support.is_jython:
+ for buftype in (buffer, memoryview, bytearray):
+ self.checkequal('xyzxyz', 'xyzabcdef', 'translate', buftype(table), 'def')
+ self.checkequal('xyzxyz', 'xyzabcdef', 'translate', table, buftype('def'))
+
table = string.maketrans('a', 'A')
self.checkequal('Abc', 'abc', 'translate', table)
self.checkequal('xyz', 'xyz', 'translate', table)
diff --git a/Lib/test/test_java_integration.py b/Lib/test/test_java_integration.py
--- a/Lib/test/test_java_integration.py
+++ b/Lib/test/test_java_integration.py
@@ -717,9 +717,11 @@
self.assertRegexpMatches(
subprocess.check_output(cmd, env=env, universal_newlines=True,
stderr=subprocess.STDOUT),
- r"^\*sys-package-mgr\*: processing new jar, '.+?/proxies.jar'\n"
- "Class defined on CLASSPATH <type 'org.python.test.bark.Dog'>\n"
- "Rover barks 42 times\n$".format(tempdir))
+ os.path.join(
+ r"^\*sys-package-mgr\*: processing new jar, '.+?",
+ r"proxies.jar'\n"
+ "Class defined on CLASSPATH <type 'org.python.test.bark.Dog'>\n"
+ "Rover barks 42 times\n$".format(tempdir)))
finally:
pass
# print "Will not remove", tempdir
diff --git a/Lib/test/test_jython_initializer.py b/Lib/test/test_jython_initializer.py
--- a/Lib/test/test_jython_initializer.py
+++ b/Lib/test/test_jython_initializer.py
@@ -4,12 +4,21 @@
import unittest
from test import test_support
+WINDOWS = (os._name if test_support.is_jython else os.name) == 'nt'
+
class TestUsingInitializer(unittest.TestCase):
def test_syspath_initializer(self):
fn = test_support.findfile('check_for_initializer_in_syspath.py')
env = dict(CLASSPATH='tests/data/initializer',
PATH=os.environ.get('PATH', ''))
+
+ if WINDOWS:
+ # TMP is needed to give property java.io.tmpdir a sensible value
+ env['TMP'] = os.environ.get('TMP', '.')
+ # SystemRoot is needed to remote debug the subprocess JVM
+ env['SystemRoot'] = os.environ.get('SystemRoot', '')
+
self.assertEquals(0, subprocess.call([sys.executable, fn], env=env))
def test_main():
diff --git a/src/org/python/core/Py.java b/src/org/python/core/Py.java
--- a/src/org/python/core/Py.java
+++ b/src/org/python/core/Py.java
@@ -1450,11 +1450,13 @@
}
/**
- * Check (using the {@link POSIX} library) whether we are in an interactive environment. Amongst
- * other things, this affects the type of console that may be legitimately installed during
- * system initialisation.
+ * Check (using the {@link POSIX} library and <code>jnr-posix</code> library) whether we are in
+ * an interactive environment. Amongst other things, this affects the type of console that may
+ * be legitimately installed during system initialisation. Note that the result may vary
+ * according to whether a <code>jnr-posix</code> native library is found along
+ * <code>java.library.path</code>, or the pure Java fall-back is used.
*
- * @return
+ * @return true if (we think) we are in an interactive environment
*/
public static boolean isInteractive() {
// Decide if System.in is interactive
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -25,11 +25,11 @@
* A builtin python string.
*/
@ExposedType(name = "str", doc = BuiltinDocs.str_doc)
-public class PyString extends PyBaseString implements BufferProtocol
-{
+public class PyString extends PyBaseString implements BufferProtocol {
+
public static final PyType TYPE = PyType.fromClass(PyString.class);
protected String string; // cannot make final because of Python intern support
- protected transient boolean interned=false;
+ protected transient boolean interned = false;
/** Supports the buffer API, see {@link #getBuffer(int)}. */
private Reference<BaseBuffer> export;
@@ -45,8 +45,7 @@
public PyString(PyType subType, String string) {
super(subType);
if (string == null) {
- throw new IllegalArgumentException(
- "Cannot create PyString from null!");
+ throw new IllegalArgumentException("Cannot create PyString from null!");
}
this.string = string;
}
@@ -56,16 +55,16 @@
}
public PyString(char c) {
- this(TYPE,String.valueOf(c));
+ this(TYPE, String.valueOf(c));
}
PyString(StringBuilder buffer) {
this(TYPE, new String(buffer));
}
-
+
/**
- * Creates a PyString from an already interned String. Just means it won't
- * be reinterned if used in a place that requires interned Strings.
+ * Creates a PyString from an already interned String. Just means it won't be reinterned if used
+ * in a place that requires interned Strings.
*/
public static PyString fromInterned(String interned) {
PyString str = new PyString(TYPE, interned);
@@ -74,12 +73,12 @@
}
@ExposedNew
- static PyObject str_new(PyNewWrapper new_, boolean init, PyType subtype,
- PyObject[] args, String[] keywords) {
- ArgParser ap = new ArgParser("str", args, keywords, new String[] { "object" }, 0);
+ static PyObject str_new(PyNewWrapper new_, boolean init, PyType subtype, PyObject[] args,
+ String[] keywords) {
+ ArgParser ap = new ArgParser("str", args, keywords, new String[] {"object"}, 0);
PyObject S = ap.getPyObject(0, null);
- if(new_.for_type == subtype) {
- if(S == null) {
+ if (new_.for_type == subtype) {
+ if (S == null) {
return new PyString("");
}
return new PyString(S.__str__().toString());
@@ -109,6 +108,7 @@
* @param flags consumer requirements
* @return the requested buffer
*/
+ @Override
public synchronized PyBuffer getBuffer(int flags) {
// If we have already exported a buffer it may still be available for re-use
BaseBuffer pybuf = getExistingBuffer(flags);
@@ -147,15 +147,13 @@
public String substring(int start, int end) {
return getString().substring(start, end);
}
-
+
@Override
public PyString __str__() {
return str___str__();
}
- public
-
- @ExposedMethod(doc = BuiltinDocs.str___str___doc)
+ public @ExposedMethod(doc = BuiltinDocs.str___str___doc)
final PyString str___str__() {
if (getClass() == PyString.class) {
return this;
@@ -184,9 +182,9 @@
}
public String internedString() {
- if (interned)
+ if (interned) {
return getString();
- else {
+ } else {
string = getString().intern();
interned = true;
return getString();
@@ -205,33 +203,30 @@
private static char[] hexdigit = "0123456789abcdef".toCharArray();
- public static String encode_UnicodeEscape(String str,
- boolean use_quotes)
- {
+ public static String encode_UnicodeEscape(String str, boolean use_quotes) {
int size = str.length();
StringBuilder v = new StringBuilder(str.length());
char quote = 0;
if (use_quotes) {
- quote = str.indexOf('\'') >= 0 &&
- str.indexOf('"') == -1 ? '"' : '\'';
+ quote = str.indexOf('\'') >= 0 && str.indexOf('"') == -1 ? '"' : '\'';
v.append(quote);
}
- for (int i = 0; size-- > 0; ) {
+ for (int i = 0; size-- > 0;) {
int ch = str.charAt(i++);
/* Escape quotes */
if ((use_quotes && ch == quote) || ch == '\\') {
v.append('\\');
- v.append((char) ch);
+ v.append((char)ch);
continue;
}
- /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
- else if (ch >= 0xD800 && ch < 0xDC00) {
- char ch2 = str.charAt(i++);
- size--;
- if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
+ /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
+ else if (ch >= 0xD800 && ch < 0xDC00) {
+ char ch2 = str.charAt(i++);
+ size--;
+ if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
int ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
v.append('\\');
v.append('U');
@@ -244,13 +239,13 @@
v.append(hexdigit[(ucs >> 4) & 0xf]);
v.append(hexdigit[ucs & 0xf]);
continue;
- }
- /* Fall through: isolated surrogates are copied as-is */
- i--;
- size++;
}
+ /* Fall through: isolated surrogates are copied as-is */
+ i--;
+ size++;
+ }
/* Map 16-bit characters to '\\uxxxx' */
- if (ch >= 256) {
+ if (ch >= 256) {
v.append('\\');
v.append('u');
v.append(hexdigit[(ch >> 12) & 0xf]);
@@ -258,39 +253,38 @@
v.append(hexdigit[(ch >> 4) & 0xf]);
v.append(hexdigit[ch & 15]);
}
- /* Map special whitespace to '\t', \n', '\r' */
- else if (ch == '\t') v.append("\\t");
- else if (ch == '\n') v.append("\\n");
- else if (ch == '\r') v.append("\\r");
- /* Map non-printable US ASCII to '\ooo' */
- else if (ch < ' ' || ch >= 127) {
+ /* Map special whitespace to '\t', \n', '\r' */
+ else if (ch == '\t') {
+ v.append("\\t");
+ } else if (ch == '\n') {
+ v.append("\\n");
+ } else if (ch == '\r') {
+ v.append("\\r");
+ } else if (ch < ' ' || ch >= 127) {
+ /* Map non-printable US ASCII to '\xNN' */
v.append('\\');
v.append('x');
v.append(hexdigit[(ch >> 4) & 0xf]);
v.append(hexdigit[ch & 0xf]);
+ } else {/* Copy everything else as-is */
+ v.append((char)ch);
}
- /* Copy everything else as-is */
- else
- v.append((char) ch);
}
- if (use_quotes)
+ if (use_quotes) {
v.append(quote);
+ }
return v.toString();
}
private static ucnhashAPI pucnHash = null;
-
- public static String decode_UnicodeEscape(String str,
- int start,
- int end,
- String errors,
- boolean unicode) {
+ public static String decode_UnicodeEscape(String str, int start, int end, String errors,
+ boolean unicode) {
StringBuilder v = new StringBuilder(end - start);
- for(int s = start; s < end;) {
+ for (int s = start; s < end;) {
char ch = str.charAt(s);
/* Non-escape characters are interpreted as Unicode ordinals */
- if(ch != '\\') {
+ if (ch != '\\') {
v.append(ch);
s++;
continue;
@@ -298,19 +292,14 @@
int loopStart = s;
/* \ - Escapes */
s++;
- if(s == end) {
- s = codecs.insertReplacementAndGetResume(v,
- errors,
- "unicodeescape",
- str,
- loopStart,
- s + 1,
- "\\ at end of string");
+ if (s == end) {
+ s = codecs.insertReplacementAndGetResume(v, errors, "unicodeescape", //
+ str, loopStart, s + 1, "\\ at end of string");
continue;
}
ch = str.charAt(s++);
- switch(ch){
- /* \x escapes */
+ switch (ch) {
+ /* \x escapes */
case '\n':
break;
case '\\':
@@ -353,10 +342,11 @@
case '6':
case '7':
int x = Character.digit(ch, 8);
- for(int j = 0; j < 2 && s < end; j++, s++) {
+ for (int j = 0; j < 2 && s < end; j++, s++) {
ch = str.charAt(s);
- if(ch < '0' || ch > '7')
+ if (ch < '0' || ch > '7') {
break;
+ }
x = (x << 3) + Character.digit(ch, 8);
}
v.append((char)x);
@@ -365,95 +355,68 @@
s = hexescape(v, errors, 2, s, str, end, "truncated \\xXX");
break;
case 'u':
- if(!unicode) {
+ if (!unicode) {
v.append('\\');
v.append('u');
break;
}
- s = hexescape(v,
- errors,
- 4,
- s,
- str,
- end,
- "truncated \\uXXXX");
+ s = hexescape(v, errors, 4, s, str, end, "truncated \\uXXXX");
break;
case 'U':
- if(!unicode) {
+ if (!unicode) {
v.append('\\');
v.append('U');
break;
}
- s = hexescape(v,
- errors,
- 8,
- s,
- str,
- end,
- "truncated \\UXXXXXXXX");
+ s = hexescape(v, errors, 8, s, str, end, "truncated \\UXXXXXXXX");
break;
case 'N':
- if(!unicode) {
+ if (!unicode) {
v.append('\\');
v.append('N');
break;
}
/*
- * Ok, we need to deal with Unicode Character Names now,
- * make sure we've imported the hash table data...
+ * Ok, we need to deal with Unicode Character Names now, make sure we've
+ * imported the hash table data...
*/
- if(pucnHash == null) {
+ if (pucnHash == null) {
PyObject mod = imp.importName("ucnhash", true);
mod = mod.__call__();
pucnHash = (ucnhashAPI)mod.__tojava__(Object.class);
- if(pucnHash.getCchMax() < 0)
+ if (pucnHash.getCchMax() < 0) {
throw Py.UnicodeError("Unicode names not loaded");
+ }
}
- if(str.charAt(s) == '{') {
+ if (str.charAt(s) == '{') {
int startName = s + 1;
int endBrace = startName;
/*
- * look for either the closing brace, or we exceed the
- * maximum length of the unicode character names
+ * look for either the closing brace, or we exceed the maximum length of the
+ * unicode character names
*/
int maxLen = pucnHash.getCchMax();
- while(endBrace < end && str.charAt(endBrace) != '}'
+ while (endBrace < end && str.charAt(endBrace) != '}'
&& (endBrace - startName) <= maxLen) {
endBrace++;
}
- if(endBrace != end && str.charAt(endBrace) == '}') {
- int value = pucnHash.getValue(str,
- startName,
- endBrace);
- if(storeUnicodeCharacter(value, v)) {
+ if (endBrace != end && str.charAt(endBrace) == '}') {
+ int value = pucnHash.getValue(str, startName, endBrace);
+ if (storeUnicodeCharacter(value, v)) {
s = endBrace + 1;
} else {
- s = codecs.insertReplacementAndGetResume(v,
- errors,
- "unicodeescape",
- str,
- loopStart,
- endBrace + 1,
- "illegal Unicode character");
+ s = codecs.insertReplacementAndGetResume( //
+ v, errors, "unicodeescape", //
+ str, loopStart, endBrace + 1, "illegal Unicode character");
}
} else {
- s = codecs.insertReplacementAndGetResume(v,
- errors,
- "unicodeescape",
- str,
- loopStart,
- endBrace,
- "malformed \\N character escape");
+ s = codecs.insertReplacementAndGetResume(v, errors, "unicodeescape", //
+ str, loopStart, endBrace, "malformed \\N character escape");
}
break;
} else {
- s = codecs.insertReplacementAndGetResume(v,
- errors,
- "unicodeescape",
- str,
- loopStart,
- s + 1,
- "malformed \\N character escape");
+ s = codecs.insertReplacementAndGetResume(v, errors, "unicodeescape", //
+ str, loopStart, s + 1, "malformed \\N character escape");
}
break;
default:
@@ -465,60 +428,40 @@
return v.toString();
}
- private static int hexescape(StringBuilder partialDecode,
- String errors,
- int digits,
- int hexDigitStart,
- String str,
- int size,
- String errorMessage) {
- if(hexDigitStart + digits > size) {
- return codecs.insertReplacementAndGetResume(partialDecode,
- errors,
- "unicodeescape",
- str,
- hexDigitStart - 2,
- size,
- errorMessage);
+ private static int hexescape(StringBuilder partialDecode, String errors, int digits,
+ int hexDigitStart, String str, int size, String errorMessage) {
+ if (hexDigitStart + digits > size) {
+ return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape",
+ str, hexDigitStart - 2, size, errorMessage);
}
int i = 0;
int x = 0;
- for(; i < digits; ++i) {
+ for (; i < digits; ++i) {
char c = str.charAt(hexDigitStart + i);
int d = Character.digit(c, 16);
- if(d == -1) {
- return codecs.insertReplacementAndGetResume(partialDecode,
- errors,
- "unicodeescape",
- str,
- hexDigitStart - 2,
- hexDigitStart + i + 1,
- errorMessage);
+ if (d == -1) {
+ return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape",
+ str, hexDigitStart - 2, hexDigitStart + i + 1, errorMessage);
}
x = (x << 4) & ~0xF;
- if(c >= '0' && c <= '9')
+ if (c >= '0' && c <= '9') {
x += c - '0';
- else if(c >= 'a' && c <= 'f')
+ } else if (c >= 'a' && c <= 'f') {
x += 10 + c - 'a';
- else
+ } else {
x += 10 + c - 'A';
+ }
}
- if(storeUnicodeCharacter(x, partialDecode)) {
+ if (storeUnicodeCharacter(x, partialDecode)) {
return hexDigitStart + i;
} else {
- return codecs.insertReplacementAndGetResume(partialDecode,
- errors,
- "unicodeescape",
- str,
- hexDigitStart - 2,
- hexDigitStart + i + 1,
- "illegal Unicode character");
+ return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape",
+ str, hexDigitStart - 2, hexDigitStart + i + 1, "illegal Unicode character");
}
}
- /*pass in an int since this can be a UCS-4 character */
- private static boolean storeUnicodeCharacter(int value,
- StringBuilder partialDecode) {
+ /* pass in an int since this can be a UCS-4 character */
+ private static boolean storeUnicodeCharacter(int value, StringBuilder partialDecode) {
if (value < 0 || (value >= 0xD800 && value <= 0xDFFF)) {
return false;
} else if (value <= PySystemState.maxunicode) {
@@ -536,8 +479,8 @@
}
return ret;
}
-
- //XXX: need doc
+
+ // XXX: need doc
@ExposedMethod(defaults = "null")
final PyObject str___getslice__(PyObject start, PyObject stop, PyObject step) {
return seq___getslice__(start, stop, step);
@@ -550,10 +493,11 @@
@ExposedMethod(type = MethodType.CMP)
final int str___cmp__(PyObject other) {
- if (!(other instanceof PyString))
+ if (!(other instanceof PyString)) {
return -2;
-
- int c = getString().compareTo(((PyString) other).getString());
+ }
+
+ int c = getString().compareTo(((PyString)other).getString());
return c < 0 ? -1 : c > 0 ? 1 : 0;
}
@@ -565,8 +509,9 @@
@ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___eq___doc)
final PyObject str___eq__(PyObject other) {
String s = coerce(other);
- if (s == null)
+ if (s == null) {
return null;
+ }
return getString().equals(s) ? Py.True : Py.False;
}
@@ -578,21 +523,23 @@
@ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___ne___doc)
final PyObject str___ne__(PyObject other) {
String s = coerce(other);
- if (s == null)
+ if (s == null) {
return null;
+ }
return getString().equals(s) ? Py.False : Py.True;
}
-
+
@Override
public PyObject __lt__(PyObject other) {
return str___lt__(other);
}
@ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___lt___doc)
- final PyObject str___lt__(PyObject other){
+ final PyObject str___lt__(PyObject other) {
String s = coerce(other);
- if (s == null)
+ if (s == null) {
return null;
+ }
return getString().compareTo(s) < 0 ? Py.True : Py.False;
}
@@ -602,10 +549,11 @@
}
@ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___le___doc)
- final PyObject str___le__(PyObject other){
+ final PyObject str___le__(PyObject other) {
String s = coerce(other);
- if (s == null)
+ if (s == null) {
return null;
+ }
return getString().compareTo(s) <= 0 ? Py.True : Py.False;
}
@@ -615,10 +563,11 @@
}
@ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___gt___doc)
- final PyObject str___gt__(PyObject other){
+ final PyObject str___gt__(PyObject other) {
String s = coerce(other);
- if (s == null)
+ if (s == null) {
return null;
+ }
return getString().compareTo(s) > 0 ? Py.True : Py.False;
}
@@ -628,16 +577,18 @@
}
@ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___ge___doc)
- final PyObject str___ge__(PyObject other){
+ final PyObject str___ge__(PyObject other) {
String s = coerce(other);
- if (s == null)
+ if (s == null) {
return null;
+ }
return getString().compareTo(s) >= 0 ? Py.True : Py.False;
}
private static String coerce(PyObject o) {
- if (o instanceof PyString)
+ if (o instanceof PyString) {
return o.toString();
+ }
return null;
}
@@ -652,9 +603,8 @@
}
/**
- * @return a byte array with one byte for each char in this object's
- * underlying String. Each byte contains the low-order bits of its
- * corresponding char.
+ * @return a byte array with one byte for each char in this object's underlying String. Each
+ * byte contains the low-order bits of its corresponding char.
*/
public byte[] toBytes() {
return StringUtil.toBytes(getString());
@@ -666,38 +616,47 @@
return getString();
}
- if (c == Character.TYPE || c == Character.class)
- if (getString().length() == 1)
+ if (c == Character.TYPE || c == Character.class) {
+ if (getString().length() == 1) {
return new Character(getString().charAt(0));
+ }
+ }
if (c.isArray()) {
- if (c.getComponentType() == Byte.TYPE)
+ if (c.getComponentType() == Byte.TYPE) {
return toBytes();
- if (c.getComponentType() == Character.TYPE)
+ }
+ if (c.getComponentType() == Character.TYPE) {
return getString().toCharArray();
+ }
}
- if (c.isInstance(this))
+ if (c.isInstance(this)) {
return this;
+ }
return Py.NoConversion;
}
+ @Override
protected PyObject pyget(int i) {
return Py.newString(getString().charAt(i));
}
+ @Override
protected PyObject getslice(int start, int stop, int step) {
- if (step > 0 && stop < start)
+ if (step > 0 && stop < start) {
stop = start;
- if (step == 1)
+ }
+ if (step == 1) {
return fromSubstring(start, stop);
- else {
+ } else {
int n = sliceLength(start, stop, step);
char new_chars[] = new char[n];
int j = 0;
- for (int i=start; j<n; i+=step)
+ for (int i = start; j < n; i += step) {
new_chars[j++] = getString().charAt(i);
+ }
return createInstance(new String(new_chars), true);
}
@@ -710,7 +669,7 @@
protected PyString createInstance(String str, boolean isBasic) {
// ignore isBasic, doesn't apply to PyString, just PyUnicode
return new PyString(str);
- }
+ }
/**
* Return a String equivalent to the argument. This is a helper function to those methods that
@@ -723,8 +682,7 @@
if (obj instanceof PyString) {
// str or unicode object: go directly to the String
return ((PyString)obj).getString();
- } else
- if (obj instanceof BufferProtocol) {
+ } else if (obj instanceof BufferProtocol) {
// Other object with buffer API: briefly access the buffer
PyBuffer buf = ((BufferProtocol)obj).getBuffer(PyBUF.SIMPLE);
try {
@@ -750,23 +708,23 @@
if (ret != null) {
return ret;
} else {
- throw Py.TypeError("expected str, bytearray or buffer compatible object");
+ throw Py.TypeError("expected str, bytearray or other buffer compatible object");
}
}
/**
- * Return a String equivalent to the argument according to the calling conventions of the
- * <code>strip</code> and <code>split</code> methods of <code>str</code>. Those methods accept
- * anything bearing the buffer interface as a byte string, but also PyNone (or the argument may
- * be omitted, showing up here as null) to indicate that the criterion is whitespace. They also
- * accept a unicode argument, not dealt with here.
+ * Return a String equivalent to the argument according to the calling conventions of methods
+ * that accept anything bearing the buffer interface as a byte string, but also
+ * <code>PyNone</code>. (Or the argument may be omitted, showing up here as null.) These include
+ * the <code>strip</code> and <code>split</code> methods of <code>str</code>, where a null
+ * indicates that the criterion is whitespace, and <code>str.translate</code>.
*
- * @param obj to coerce to a String or nullk
+ * @param obj to coerce to a String or null
* @param name of method
* @return coerced value or null
* @throws PyException if the coercion fails
*/
- private static String asStripSepOrError(PyObject obj, String name) throws PyException {
+ private static String asStringNullOrError(PyObject obj, String name) throws PyException {
if (obj == null || obj == Py.None) {
return null;
@@ -774,13 +732,37 @@
String ret = asStringOrNull(obj);
if (ret != null) {
return ret;
+ } else if (name == null) {
+ // A nameless method is the client
+ throw Py.TypeError("expected None, str or buffer compatible object");
} else {
- throw Py.TypeError(name
- + " arg must be None, str, unicode, buffer compatible object");
+ // Tuned for .strip and its relations, which supply their name
+ throw Py.TypeError(name + " arg must be None, str or buffer compatible object");
}
}
}
+ /**
+ * Return a String equivalent to the argument according to the calling conventions of the
+ * certain methods of <code>str</code>. Those methods accept anything bearing the buffer
+ * interface as a byte string, or accept a unicode argument for which they accept responsibility
+ * to interpret from its UTF16 encoded form (the internal representation returned by
+ * {@link PyUnicode#getString()}).
+ *
+ * @param obj to coerce to a String
+ * @return coerced value
+ * @throws PyException if the coercion fails
+ */
+ private static String asBMPStringOrError(PyObject obj) {
+ // PyUnicode accepted here. Care required in the client if obj is not basic plane.
+ String ret = asStringOrNull(obj);
+ if (ret != null) {
+ return ret;
+ } else {
+ throw Py.TypeError("expected str, bytearray, unicode or buffer compatible object");
+ }
+ }
+
@Override
public boolean __contains__(PyObject o) {
return str___contains__(o);
@@ -792,12 +774,13 @@
return getString().indexOf(other) >= 0;
}
+ @Override
protected PyObject repeat(int count) {
- if(count < 0) {
+ if (count < 0) {
count = 0;
}
int s = getString().length();
- if((long)s * count > Integer.MAX_VALUE) {
+ if ((long)s * count > Integer.MAX_VALUE) {
// Since Strings store their data in an array, we can't make one
// longer than Integer.MAX_VALUE. Without this check we get
// NegativeArraySize exceptions when we create the array on the
@@ -805,7 +788,7 @@
throw Py.OverflowError("max str len is " + Integer.MAX_VALUE);
}
char new_chars[] = new char[s * count];
- for(int i = 0; i < count; i++) {
+ for (int i = 0; i < count; i++) {
getString().getChars(0, s, new_chars, i * s);
}
return createInstance(new String(new_chars));
@@ -823,7 +806,7 @@
}
return repeat(o.asIndex(Py.OverflowError));
}
-
+
@Override
public PyObject __rmul__(PyObject o) {
return str___rmul__(o);
@@ -881,17 +864,16 @@
public PyObject __mod__(PyObject other) {
return str___mod__(other);
}
-
+
@ExposedMethod(doc = BuiltinDocs.str___mod___doc)
- public PyObject str___mod__(PyObject other){
+ public PyObject str___mod__(PyObject other) {
StringFormatter fmt = new StringFormatter(getString(), false);
return fmt.format(other);
}
@Override
public PyObject __int__() {
- try
- {
+ try {
return Py.newInteger(atoi(10));
} catch (PyException e) {
if (e.match(Py.OverflowError)) {
@@ -913,17 +895,17 @@
@Override
public PyObject __pos__() {
- throw Py.TypeError("bad operand type for unary +");
+ throw Py.TypeError("bad operand type for unary +");
}
@Override
public PyObject __neg__() {
- throw Py.TypeError("bad operand type for unary -");
+ throw Py.TypeError("bad operand type for unary -");
}
@Override
public PyObject __invert__() {
- throw Py.TypeError("bad operand type for unary ~");
+ throw Py.TypeError("bad operand type for unary ~");
}
@SuppressWarnings("fallthrough")
@@ -936,8 +918,9 @@
int s = 0;
int n = getString().length();
- while (s < n && Character.isSpaceChar(getString().charAt(s)))
+ while (s < n && Character.isSpaceChar(getString().charAt(s))) {
s++;
+ }
if (s == n) {
throw Py.ValueError("empty string for complex()");
@@ -951,107 +934,113 @@
do {
char c = getString().charAt(s);
switch (c) {
- case '-':
- sign = -1;
- /* Fallthrough */
- case '+':
- if (done || s+1 == n) {
- sw_error = true;
- break;
- }
- // a character is guaranteed, but it better be a digit
- // or J or j
- c = getString().charAt(++s); // eat the sign character
- // and check the next
- if (!Character.isDigit(c) && c!='J' && c!='j')
- sw_error = true;
- break;
-
- case 'J':
- case 'j':
- if (got_im || done) {
- sw_error = true;
- break;
- }
- if (z < 0.0) {
- y = sign;
- } else {
- y = sign * z;
- }
- got_im = true;
- done = got_re;
- sign = 1;
- s++; // eat the J or j
- break;
-
- case ' ':
- while (s < n && Character.isSpaceChar(getString().charAt(s)))
- s++;
- if (s != n)
- sw_error = true;
- break;
-
- default:
- boolean digit_or_dot = (c == '.' || Character.isDigit(c));
- if (!digit_or_dot) {
- sw_error = true;
- break;
- }
- int end = endDouble(getString(),s);
- z = Double.valueOf(getString().substring(s, end)).doubleValue();
- if (z == Double.POSITIVE_INFINITY) {
- throw Py.ValueError(String.format("float() out of range: %.150s", getString()));
- }
-
- s=end;
- if (s < n) {
- c = getString().charAt(s);
- if (c == 'J' || c == 'j') {
+ case '-':
+ sign = -1;
+ /* Fallthrough */
+ case '+':
+ if (done || s + 1 == n) {
+ sw_error = true;
break;
}
- }
- if (got_re) {
- sw_error = true;
- break;
- }
-
- /* accept a real part */
- x = sign * z;
- got_re = true;
- done = got_im;
- z = -1.0;
- sign = 1;
- break;
-
- } /* end of switch */
+ // a character is guaranteed, but it better be a digit
+ // or J or j
+ c = getString().charAt(++s); // eat the sign character
+ // and check the next
+ if (!Character.isDigit(c) && c != 'J' && c != 'j') {
+ sw_error = true;
+ }
+ break;
+
+ case 'J':
+ case 'j':
+ if (got_im || done) {
+ sw_error = true;
+ break;
+ }
+ if (z < 0.0) {
+ y = sign;
+ } else {
+ y = sign * z;
+ }
+ got_im = true;
+ done = got_re;
+ sign = 1;
+ s++; // eat the J or j
+ break;
+
+ case ' ':
+ while (s < n && Character.isSpaceChar(getString().charAt(s))) {
+ s++;
+ }
+ if (s != n) {
+ sw_error = true;
+ }
+ break;
+
+ default:
+ boolean digit_or_dot = (c == '.' || Character.isDigit(c));
+ if (!digit_or_dot) {
+ sw_error = true;
+ break;
+ }
+ int end = endDouble(getString(), s);
+ z = Double.valueOf(getString().substring(s, end)).doubleValue();
+ if (z == Double.POSITIVE_INFINITY) {
+ throw Py.ValueError(String.format("float() out of range: %.150s",
+ getString()));
+ }
+
+ s = end;
+ if (s < n) {
+ c = getString().charAt(s);
+ if (c == 'J' || c == 'j') {
+ break;
+ }
+ }
+ if (got_re) {
+ sw_error = true;
+ break;
+ }
+
+ /* accept a real part */
+ x = sign * z;
+ got_re = true;
+ done = got_im;
+ z = -1.0;
+ sign = 1;
+ break;
+
+ } /* end of switch */
} while (s < n && !sw_error);
if (sw_error) {
- throw Py.ValueError("malformed string for complex() " +
- getString().substring(s));
+ throw Py.ValueError("malformed string for complex() " + getString().substring(s));
}
- return new PyComplex(x,y);
+ return new PyComplex(x, y);
}
private int endDouble(String string, int s) {
int n = string.length();
while (s < n) {
char c = string.charAt(s++);
- if (Character.isDigit(c))
+ if (Character.isDigit(c)) {
continue;
- if (c == '.')
+ }
+ if (c == '.') {
continue;
+ }
if (c == 'e' || c == 'E') {
if (s < n) {
c = string.charAt(s);
- if (c == '+' || c == '-')
+ if (c == '+' || c == '-') {
s++;
+ }
continue;
}
}
- return s-1;
+ return s - 1;
}
return s;
}
@@ -1060,7 +1049,7 @@
public String lower() {
return str_lower();
}
-
+
@ExposedMethod(doc = BuiltinDocs.str_lower_doc)
final String str_lower() {
return getString().toLowerCase();
@@ -1087,17 +1076,17 @@
boolean previous_is_cased = false;
for (int i = 0; i < n; i++) {
char ch = chars[i];
- if (previous_is_cased)
+ if (previous_is_cased) {
chars[i] = Character.toLowerCase(ch);
- else
+ } else {
chars[i] = Character.toTitleCase(ch);
-
- if (Character.isLowerCase(ch) ||
- Character.isUpperCase(ch) ||
- Character.isTitleCase(ch))
+ }
+
+ if (Character.isLowerCase(ch) || Character.isUpperCase(ch) || Character.isTitleCase(ch)) {
previous_is_cased = true;
- else
+ } else {
previous_is_cased = false;
+ }
}
return new String(chars);
}
@@ -1109,13 +1098,12 @@
@ExposedMethod(doc = BuiltinDocs.str_swapcase_doc)
final String str_swapcase() {
char[] chars = getString().toCharArray();
- int n=chars.length;
- for (int i=0; i<n; i++) {
+ int n = chars.length;
+ for (int i = 0; i < n; i++) {
char c = chars[i];
if (Character.isUpperCase(c)) {
chars[i] = Character.toLowerCase(c);
- }
- else if (Character.isLowerCase(c)) {
+ } else if (Character.isLowerCase(c)) {
chars[i] = Character.toUpperCase(c);
}
}
@@ -1123,8 +1111,8 @@
}
/**
- * Equivalent of Python str.strip() with no argument, meaning strip whitespace. Any whitespace
- * byte/character will be discarded from either end of this <code>str</code>.
+ * Equivalent of Python <code>str.strip()</code> with no argument, meaning strip whitespace. Any
+ * whitespace byte/character will be discarded from either end of this <code>str</code>.
*
* @return a new String, stripped of the whitespace characters/bytes
*/
@@ -1133,9 +1121,7 @@
}
/**
- * Equivalent of Python str.strip(). Any byte/character matching one of those in
- * <code>stripChars</code> will be discarded from either end of this <code>str</code>. If
- * <code>stripChars == null</code>, whitespace will be stripped.
+ * Equivalent of Python <code>str.strip()</code>.
*
* @param stripChars characters to strip from either end of this str/bytes, or null
* @return a new String, stripped of the specified characters/bytes
@@ -1144,26 +1130,40 @@
return _strip(stripChars);
}
+ /**
+ * Equivalent of Python <code>str.strip()</code>. Any byte/character matching one of those in
+ * <code>stripChars</code> will be discarded from either end of this <code>str</code>. If
+ * <code>stripChars == null</code>, whitespace will be stripped. If <code>stripChars</code> is a
+ * <code>PyUnicode</code>, the result will also be a <code>PyUnicode</code>.
+ *
+ * @param stripChars characters to strip from either end of this str/bytes, or null
+ * @return a new <code>PyString</code> (or {@link PyUnicode}), stripped of the specified
+ * characters/bytes
+ */
+ public PyObject strip(PyObject stripChars) {
+ return str_strip(stripChars);
+ }
+
@ExposedMethod(defaults = "null", doc = BuiltinDocs.str_strip_doc)
final PyObject str_strip(PyObject chars) {
if (chars instanceof PyUnicode) {
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_strip(chars);
} else {
- // It ought to be None, null, some kind of bytes the with buffer API.
- String stripChars = asStripSepOrError(chars, "strip");
+ // It ought to be None, null, some kind of bytes with the buffer API.
+ String stripChars = asStringNullOrError(chars, "strip");
// Strip specified characters or whitespace if stripChars == null
return new PyString(_strip(stripChars));
}
}
/**
- * Implementation of Python str.strip() common to exposed and Java API, when stripping
- * whitespace. Any whitespace byte/character will be discarded from either end of this
+ * Implementation of Python <code>str.strip()</code> common to exposed and Java API, when
+ * stripping whitespace. Any whitespace byte/character will be discarded from either end of this
* <code>str</code>.
* <p>
- * Implementation note: although a str contains only bytes, this method is also called by
- * {@link PyUnicode#unicode_strip(PyObject)} when this is a basic-plane string.
+ * Implementation note: although a <code>str</code> contains only bytes, this method is also
+ * called by {@link PyUnicode#unicode_strip(PyObject)} when this is a basic-plane string.
*
* @return a new String, stripped of the whitespace characters/bytes
*/
@@ -1182,12 +1182,14 @@
}
/**
- * Implementation of Python str.strip() common to exposed and Java API. Any byte/character
- * matching one of those in <code>stripChars</code> will be discarded from either end of this
- * <code>str</code>. If <code>stripChars == null</code>, whitespace will be stripped.
+ * Implementation of Python <code>str.strip()</code> common to exposed and Java API. Any
+ * byte/character matching one of those in <code>stripChars</code> will be discarded from either
+ * end of this <code>str</code>. If <code>stripChars == null</code>, whitespace will be
+ * stripped.
* <p>
- * Implementation note: although a str contains only bytes, this method is also called by
- * {@link PyUnicode#unicode_strip(PyObject)} when both arguments are basic-plane strings.
+ * Implementation note: although a <code>str</code> contains only bytes, this method is also
+ * called by {@link PyUnicode#unicode_strip(PyObject)} when both arguments are basic-plane
+ * strings.
*
* @param stripChars characters to strip or null
* @return a new String, stripped of the specified characters/bytes
@@ -1212,7 +1214,7 @@
}
/**
- * Helper for strip, lstrip implementation, when stripping whitespace.
+ * Helper for <code>strip</code>, <code>lstrip</code> implementation, when stripping whitespace.
*
* @param s string to search (only <code>s[0:right]</code> is searched).
* @param right rightmost extent of string search
@@ -1228,7 +1230,8 @@
}
/**
- * Helper for strip, lstrip implementation, when stripping specified characters.
+ * Helper for <code>strip</code>, <code>lstrip</code> implementation, when stripping specified
+ * characters.
*
* @param s string to search (only <code>s[0:right]</code> is searched).
* @param stripChars specifies set of characters to strip
@@ -1246,7 +1249,7 @@
}
/**
- * Helper for strip, rstrip implementation, when stripping whitespace.
+ * Helper for <code>strip</code>, <code>rstrip</code> implementation, when stripping whitespace.
*
* @param s string to search.
* @return index of rightmost non-whitespace character or -1 if they all are.
@@ -1261,7 +1264,8 @@
}
/**
- * Helper for strip, rstrip implementation, when stripping specified characters.
+ * Helper for <code>strip</code>, <code>rstrip</code> implementation, when stripping specified
+ * characters.
*
* @param s string to search.
* @param stripChars specifies set of characters to strip
@@ -1277,8 +1281,8 @@
}
/**
- * Equivalent of Python str.lstrip() with no argument, meaning strip whitespace. Any whitespace
- * byte/character will be discarded from the left of this <code>str</code>.
+ * Equivalent of Python <code>str.lstrip()</code> with no argument, meaning strip whitespace.
+ * Any whitespace byte/character will be discarded from the left of this <code>str</code>.
*
* @return a new String, stripped of the whitespace characters/bytes
*/
@@ -1287,34 +1291,46 @@
}
/**
- * Equivalent of Python str.lstrip(). Any byte/character matching one of those in
- * <code>stripChars</code> will be discarded from the left end of this <code>str</code>. If
- * <code>stripChars == null</code>, whitespace will be stripped.
+ * Equivalent of Python <code>str.lstrip()</code>.
*
- * @param stripChars characters to strip from either end of this str/bytes, or null
+ * @param stripChars characters to strip from the left end of this str/bytes, or null
* @return a new String, stripped of the specified characters/bytes
*/
public String lstrip(String sep) {
return _lstrip(sep);
}
+ /**
+ * Equivalent of Python <code>str.lstrip()</code>. Any byte/character matching one of those in
+ * <code>stripChars</code> will be discarded from the left end of this <code>str</code>. If
+ * <code>stripChars == null</code>, whitespace will be stripped. If <code>stripChars</code> is a
+ * <code>PyUnicode</code>, the result will also be a <code>PyUnicode</code>.
+ *
+ * @param stripChars characters to strip from the left end of this str/bytes, or null
+ * @return a new <code>PyString</code> (or {@link PyUnicode}), stripped of the specified
+ * characters/bytes
+ */
+ public PyObject lstrip(PyObject sep) {
+ return str_lstrip(sep);
+ }
+
@ExposedMethod(defaults = "null", doc = BuiltinDocs.str_lstrip_doc)
final PyObject str_lstrip(PyObject chars) {
if (chars instanceof PyUnicode) {
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_lstrip(chars);
} else {
- // It ought to be None, null, some kind of bytes the with buffer API.
- String stripChars = asStripSepOrError(chars, "lstrip");
+ // It ought to be None, null, some kind of bytes with the buffer API.
+ String stripChars = asStringNullOrError(chars, "lstrip");
// Strip specified characters or whitespace if stripChars == null
return new PyString(_lstrip(stripChars));
}
}
/**
- * Implementation of Python str.lstrip() common to exposed and Java API, when stripping
- * whitespace. Any whitespace byte/character will be discarded from the left end of this
- * <code>str</code>.
+ * Implementation of Python <code>str.lstrip()</code> common to exposed and Java API, when
+ * stripping whitespace. Any whitespace byte/character will be discarded from the left end of
+ * this <code>str</code>.
* <p>
* Implementation note: although a str contains only bytes, this method is also called by
* {@link PyUnicode#unicode_lstrip(PyObject)} when this is a basic-plane string.
@@ -1329,12 +1345,14 @@
}
/**
- * Implementation of Python str.lstrip() common to exposed and Java API. Any byte/character
- * matching one of those in <code>stripChars</code> will be discarded from the left end of this
- * <code>str</code>. If <code>stripChars == null</code>, whitespace will be stripped.
+ * Implementation of Python <code>str.lstrip()</code> common to exposed and Java API. Any
+ * byte/character matching one of those in <code>stripChars</code> will be discarded from the
+ * left end of this <code>str</code>. If <code>stripChars == null</code>, whitespace will be
+ * stripped.
* <p>
- * Implementation note: although a str contains only bytes, this method is also called by
- * {@link PyUnicode#unicode_lstrip(PyObject)} when both arguments are basic-plane strings.
+ * Implementation note: although a <code>str</code> contains only bytes, this method is also
+ * called by {@link PyUnicode#unicode_lstrip(PyObject)} when both arguments are basic-plane
+ * strings.
*
* @param stripChars characters to strip or null
* @return a new String, stripped of the specified characters/bytes
@@ -1352,8 +1370,8 @@
}
/**
- * Equivalent of Python str.rstrip() with no argument, meaning strip whitespace. Any whitespace
- * byte/character will be discarded from the right end of this <code>str</code>.
+ * Equivalent of Python <code>str.rstrip()</code> with no argument, meaning strip whitespace.
+ * Any whitespace byte/character will be discarded from the right end of this <code>str</code>.
*
* @return a new String, stripped of the whitespace characters/bytes
*/
@@ -1362,9 +1380,7 @@
}
/**
- * Equivalent of Python str.rstrip(). Any byte/character matching one of those in
- * <code>stripChars</code> will be discarded from thr right end of this <code>str</code>. If
- * <code>stripChars == null</code>, whitespace will be stripped.
+ * Equivalent of Python <code>str.rstrip()</code>.
*
* @param stripChars characters to strip from either end of this str/bytes, or null
* @return a new String, stripped of the specified characters/bytes
@@ -1373,26 +1389,40 @@
return _rstrip(sep);
}
+ /**
+ * Equivalent of Python <code>str.rstrip()</code>. Any byte/character matching one of those in
+ * <code>stripChars</code> will be discarded from the right end of this <code>str</code>. If
+ * <code>stripChars == null</code>, whitespace will be stripped. If <code>stripChars</code> is a
+ * <code>PyUnicode</code>, the result will also be a <code>PyUnicode</code>.
+ *
+ * @param stripChars characters to strip from the right end of this str/bytes, or null
+ * @return a new <code>PyString</code> (or {@link PyUnicode}), stripped of the specified
+ * characters/bytes
+ */
+ public PyObject rstrip(PyObject sep) {
+ return str_rstrip(sep);
+ }
+
@ExposedMethod(defaults = "null", doc = BuiltinDocs.str_rstrip_doc)
final PyObject str_rstrip(PyObject chars) {
if (chars instanceof PyUnicode) {
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_rstrip(chars);
} else {
- // It ought to be None, null, some kind of bytes the with buffer API.
- String stripChars = asStripSepOrError(chars, "rstrip");
+ // It ought to be None, null, some kind of bytes with the buffer API.
+ String stripChars = asStringNullOrError(chars, "rstrip");
// Strip specified characters or whitespace if stripChars == null
return new PyString(_rstrip(stripChars));
}
}
/**
- * Implementation of Python str.rstrip() common to exposed and Java API, when stripping
- * whitespace. Any whitespace byte/character will be discarded from the right end of this
- * <code>str</code>.
+ * Implementation of Python <code>str.rstrip()</code> common to exposed and Java API, when
+ * stripping whitespace. Any whitespace byte/character will be discarded from the right end of
+ * this <code>str</code>.
* <p>
- * Implementation note: although a str contains only bytes, this method is also called by
- * {@link PyUnicode#unicode_rstrip(PyObject)} when this is a basic-plane string.
+ * Implementation note: although a <code>str</code> contains only bytes, this method is also
+ * called by {@link PyUnicode#unicode_rstrip(PyObject)} when this is a basic-plane string.
*
* @return a new String, stripped of the whitespace characters/bytes
*/
@@ -1410,12 +1440,14 @@
}
/**
- * Implementation of Python str.rstrip() common to exposed and Java API. Any byte/character
- * matching one of those in <code>stripChars</code> will be discarded from the right end of this
- * <code>str</code>. If <code>stripChars == null</code>, whitespace will be stripped.
+ * Implementation of Python <code>str.rstrip()</code> common to exposed and Java API. Any
+ * byte/character matching one of those in <code>stripChars</code> will be discarded from the
+ * right end of this <code>str</code>. If <code>stripChars == null</code>, whitespace will be
+ * stripped.
* <p>
- * Implementation note: although a str contains only bytes, this method is also called by
- * {@link PyUnicode#unicode_strip(PyObject)} when both arguments are basic-plane strings.
+ * Implementation note: although a <code>str</code> contains only bytes, this method is also
+ * called by {@link PyUnicode#unicode_strip(PyObject)} when both arguments are basic-plane
+ * strings.
*
* @param stripChars characters to strip or null
* @return a new String, stripped of the specified characters/bytes
@@ -1498,8 +1530,8 @@
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_split(sepObj, maxsplit);
} else {
- // It ought to be None, null, some kind of bytes the with buffer API.
- String sep = asStripSepOrError(sepObj, "split");
+ // It ought to be None, null, some kind of bytes with the buffer API.
+ String sep = asStringNullOrError(sepObj, "split");
// Split on specified string or whitespace if sep == null
return _split(sep, maxsplit);
}
@@ -1563,9 +1595,10 @@
// Find the next occurrence of non-whitespace
while (start < length) {
- if (!Character.isWhitespace(s.charAt(start)))
+ if (!Character.isWhitespace(s.charAt(start))) {
// Break leaving start pointing at non-whitespace
break;
+ }
start++;
}
@@ -1580,9 +1613,10 @@
} else {
// The next segment runs up to the next next whitespace or end
for (index = start; index < length; index++) {
- if (Character.isWhitespace(s.charAt(index)))
+ if (Character.isWhitespace(s.charAt(index))) {
// Break leaving index pointing at whitespace
break;
+ }
}
}
@@ -1747,15 +1781,15 @@
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_rsplit(sepObj, maxsplit);
} else {
- // It ought to be None, null, some kind of bytes the with buffer API.
- String sep = asStripSepOrError(sepObj, "rsplit");
+ // It ought to be None, null, some kind of bytes with the buffer API.
+ String sep = asStringNullOrError(sepObj, "rsplit");
// Split on specified string or whitespace if sep == null
return _rsplit(sep, maxsplit);
}
}
/**
- * Implementation of Python str.rsplit() common to exposed and Java API returning a
+ * Implementation of Python <code>str.rsplit()</code> common to exposed and Java API returning a
* {@link PyList} of <code>PyString</code>s. The <code>str</code> will be split at each
* occurrence of <code>sep</code>, working from the right. If <code>sep == null</code>,
* whitespace will be used as the criterion. If <code>sep</code> has zero length, a Python
@@ -1813,9 +1847,10 @@
// Find the next occurrence of non-whitespace (working leftwards)
while (end >= 0) {
- if (!Character.isWhitespace(s.charAt(end)))
+ if (!Character.isWhitespace(s.charAt(end))) {
// Break leaving end pointing at non-whitespace
break;
+ }
--end;
}
@@ -1830,9 +1865,10 @@
} else {
// The next segment runs back to the next next whitespace or beginning
for (index = end; index >= 0; --index) {
- if (Character.isWhitespace(s.charAt(index)))
+ if (Character.isWhitespace(s.charAt(index))) {
// Break leaving index pointing at whitespace
break;
+ }
}
}
@@ -1913,108 +1949,118 @@
return list;
}
+ /**
+ * Equivalent to Python <code>str.partition()</code>, splits the <code>PyString</code> at the
+ * first occurrence of <code>sepObj</code> returning a {@link PyTuple} containing the part
+ * before the separator, the separator itself, and the part after the separator.
+ *
+ * @param sepObj str, unicode or object implementing {@link BufferProtocol}
+ * @return tuple of parts
+ */
public PyTuple partition(PyObject sepObj) {
return str_partition(sepObj);
}
@ExposedMethod(doc = BuiltinDocs.str_partition_doc)
final PyTuple str_partition(PyObject sepObj) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- String sep;
if (sepObj instanceof PyUnicode) {
+ // Deal with Unicode separately
return unicodePartition(sepObj);
- } else if (sepObj instanceof PyString) {
- sep = ((PyString) sepObj).getString();
+
} else {
- throw Py.TypeError("expected a character buffer object");
+ // It ought to be some kind of bytes with the buffer API.
+ String sep = asStringOrError(sepObj);
+
+ if (sep.length() == 0) {
+ throw Py.ValueError("empty separator");
+ }
+
+ int index = getString().indexOf(sep);
+ if (index != -1) {
+ return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(
+ index + sep.length(), getString().length()));
+ } else {
+ return new PyTuple(this, Py.EmptyString, Py.EmptyString);
+ }
}
+ }
+
+ final PyTuple unicodePartition(PyObject sepObj) {
+ PyUnicode strObj = __unicode__();
+ String str = strObj.getString();
+
+ // Will throw a TypeError if not a basestring
+ String sep = sepObj.asString();
+ sepObj = sepObj.__unicode__();
if (sep.length() == 0) {
throw Py.ValueError("empty separator");
}
- int index = getString().indexOf(sep);
- if (index != -1) {
- return new PyTuple(fromSubstring(0, index), sepObj,
- fromSubstring(index + sep.length(), getString().length()));
- } else {
- return new PyTuple(this, Py.EmptyString, Py.EmptyString);
- }
- }
-
- final PyTuple unicodePartition(PyObject sepObj) {
- PyUnicode strObj = __unicode__();
- String str = strObj.getString();
-
- // Will throw a TypeError if not a basestring
- String sep = sepObj.asString();
- sepObj = sepObj.__unicode__();
-
- if (sep.length() == 0) {
- throw Py.ValueError("empty separator");
- }
-
int index = str.indexOf(sep);
if (index != -1) {
- return new PyTuple(strObj.fromSubstring(0, index), sepObj,
- strObj.fromSubstring(index + sep.length(), str.length()));
+ return new PyTuple(strObj.fromSubstring(0, index), sepObj, strObj.fromSubstring(index
+ + sep.length(), str.length()));
} else {
PyUnicode emptyUnicode = Py.newUnicode("");
return new PyTuple(this, emptyUnicode, emptyUnicode);
}
}
+ /**
+ * Equivalent to Python <code>str.rpartition()</code>, splits the <code>PyString</code> at the
+ * last occurrence of <code>sepObj</code> returning a {@link PyTuple} containing the part before
+ * the separator, the separator itself, and the part after the separator.
+ *
+ * @param sepObj str, unicode or object implementing {@link BufferProtocol}
+ * @return tuple of parts
+ */
public PyTuple rpartition(PyObject sepObj) {
return str_rpartition(sepObj);
}
@ExposedMethod(doc = BuiltinDocs.str_rpartition_doc)
final PyTuple str_rpartition(PyObject sepObj) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- String sep;
if (sepObj instanceof PyUnicode) {
+ // Deal with Unicode separately
return unicodeRpartition(sepObj);
- } else if (sepObj instanceof PyString) {
- sep = ((PyString) sepObj).getString();
+
} else {
- throw Py.TypeError("expected a character buffer object");
+ // It ought to be some kind of bytes with the buffer API.
+ String sep = asStringOrError(sepObj);
+
+ if (sep.length() == 0) {
+ throw Py.ValueError("empty separator");
+ }
+
+ int index = getString().lastIndexOf(sep);
+ if (index != -1) {
+ return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(
+ index + sep.length(), getString().length()));
+ } else {
+ return new PyTuple(Py.EmptyString, Py.EmptyString, this);
+ }
}
+ }
+
+ final PyTuple unicodeRpartition(PyObject sepObj) {
+ PyUnicode strObj = __unicode__();
+ String str = strObj.getString();
+
+ // Will throw a TypeError if not a basestring
+ String sep = sepObj.asString();
+ sepObj = sepObj.__unicode__();
if (sep.length() == 0) {
throw Py.ValueError("empty separator");
}
- int index = getString().lastIndexOf(sep);
- if (index != -1) {
- return new PyTuple(fromSubstring(0, index), sepObj,
- fromSubstring(index + sep.length(), getString().length()));
- } else {
- return new PyTuple(Py.EmptyString, Py.EmptyString, this);
- }
- }
-
- final PyTuple unicodeRpartition(PyObject sepObj) {
- PyUnicode strObj = __unicode__();
- String str = strObj.getString();
-
- // Will throw a TypeError if not a basestring
- String sep = sepObj.asString();
- sepObj = sepObj.__unicode__();
-
- if (sep.length() == 0) {
- throw Py.ValueError("empty separator");
- }
-
int index = str.lastIndexOf(sep);
if (index != -1) {
- return new PyTuple(strObj.fromSubstring(0, index), sepObj,
- strObj.fromSubstring(index + sep.length(), str.length()));
+ return new PyTuple(strObj.fromSubstring(0, index), sepObj, strObj.fromSubstring(index
+ + sep.length(), str.length()));
} else {
PyUnicode emptyUnicode = Py.newUnicode("");
return new PyTuple(emptyUnicode, emptyUnicode, this);
@@ -2034,24 +2080,27 @@
PyList list = new PyList();
char[] chars = getString().toCharArray();
- int n=chars.length;
+ int n = chars.length;
int j = 0;
- for (int i = 0; i < n; ) {
+ for (int i = 0; i < n;) {
/* Find a line and append it */
- while (i < n && chars[i] != '\n' && chars[i] != '\r' &&
- Character.getType(chars[i]) != Character.LINE_SEPARATOR)
+ while (i < n && chars[i] != '\n' && chars[i] != '\r'
+ && Character.getType(chars[i]) != Character.LINE_SEPARATOR) {
i++;
+ }
/* Skip the line break reading CRLF as one line break */
int eol = i;
if (i < n) {
- if (chars[i] == '\r' && i + 1 < n && chars[i+1] == '\n')
+ if (chars[i] == '\r' && i + 1 < n && chars[i + 1] == '\n') {
i += 2;
- else
+ } else {
i++;
- if (keepends)
+ }
+ if (keepends) {
eol = i;
+ }
}
list.append(fromSubstring(j, eol));
j = i;
@@ -2062,113 +2111,392 @@
return list;
}
+ /**
+ * Return a new object <em>of the same type as this one</em> equal to the slice
+ * <code>[begin:end]</code>. (Python end-relative indexes etc. are not supported.) Subclasses (
+ * {@link PyUnicode#fromSubstring(int, int)}) override this to return their own type.)
+ *
+ * @param begin first included character.
+ * @param end first excluded character.
+ * @return new object.
+ */
protected PyString fromSubstring(int begin, int end) {
return createInstance(getString().substring(begin, end), true);
}
+ /**
+ * Return the lowest index in the string where substring <code>sub</code> is found. Raises
+ * <code>ValueError</code> if the substring is not found.
+ *
+ * @param sub substring to find.
+ * @return index of <code>sub</code> in this object.
+ * @throws PyException(ValueError) if not found.
+ */
+ public int index(PyObject sub) {
+ return str_index(sub, null, null);
+ }
+
+ /**
+ * Return the lowest index in the string where substring <code>sub</code> is found, such that
+ * <code>sub</code> is contained in the slice <code>s[start:]</code>. Raises
+ * <code>ValueError</code> if the substring is not found.
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @return index of <code>sub</code> in this object.
+ * @throws PyException(ValueError) if not found.
+ */
+ public int index(PyObject sub, PyObject start) throws PyException {
+ return str_index(sub, start, null);
+ }
+
+ /**
+ * Return the lowest index in the string where substring <code>sub</code> is found, such that
+ * <code>sub</code> is contained in the slice <code>s[start:end]</code>. Arguments
+ * <code>start</code> and <code>end</code> are interpreted as in slice notation, with null or
+ * {@link Py#None} representing "missing". Raises <code>ValueError</code> if the substring is
+ * not found.
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return index of <code>sub</code> in this object.
+ * @throws PyException(ValueError) if not found.
+ */
+ public int index(PyObject sub, PyObject start, PyObject end) throws PyException {
+ return checkIndex(str_index(sub, start, end));
+ }
+
+ /** Equivalent to {@link #index(PyObject)} specialized to <code>String</code>. */
public int index(String sub) {
- return str_index(sub, null, null);
- }
-
+ return index(sub, null, null);
+ }
+
+ /** Equivalent to {@link #index(PyObject, PyObject)} specialized to <code>String</code>. */
public int index(String sub, PyObject start) {
- return str_index(sub, start, null);
- }
-
+ return index(sub, start, null);
+ }
+
+ /**
+ * Equivalent to {@link #index(PyObject, PyObject, PyObject)} specialized to <code>String</code>
+ * .
+ */
public int index(String sub, PyObject start, PyObject end) {
- return str_index(sub, start, end);
+ return checkIndex(_find(sub, start, end));
}
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_index_doc)
- final int str_index(String sub, PyObject start, PyObject end) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- int index = str_find(sub, start, end);
- if (index == -1)
- throw Py.ValueError("substring not found in string.index");
- return index;
- }
-
+ final int str_index(PyObject subObj, PyObject start, PyObject end) {
+ return checkIndex(str_find(subObj, start, end));
+ }
+
+ /**
+ * Return the highest index in the string where substring <code>sub</code> is found. Raises
+ * <code>ValueError</code> if the substring is not found.
+ *
+ * @param sub substring to find.
+ * @return index of <code>sub</code> in this object.
+ * @throws PyException(ValueError) if not found.
+ */
+ public int rindex(PyObject sub) {
+ return str_rindex(sub, null, null);
+ }
+
+ /**
+ * Return the highest index in the string where substring <code>sub</code> is found, such that
+ * <code>sub</code> is contained in the slice <code>s[start:]</code>. Raises
+ * <code>ValueError</code> if the substring is not found.
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @return index of <code>sub</code> in this object.
+ * @throws PyException(ValueError) if not found.
+ */
+ public int rindex(PyObject sub, PyObject start) throws PyException {
+ return str_rindex(sub, start, null);
+ }
+
+ /**
+ * Return the highest index in the string where substring <code>sub</code> is found, such that
+ * <code>sub</code> is contained in the slice <code>s[start:end]</code>. Arguments
+ * <code>start</code> and <code>end</code> are interpreted as in slice notation, with null or
+ * {@link Py#None} representing "missing". Raises <code>ValueError</code> if the substring is
+ * not found.
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return index of <code>sub</code> in this object.
+ * @throws PyException(ValueError) if not found.
+ */
+ public int rindex(PyObject sub, PyObject start, PyObject end) throws PyException {
+ return checkIndex(str_rindex(sub, start, end));
+ }
+
+ /** Equivalent to {@link #rindex(PyObject)} specialized to <code>String</code>. */
public int rindex(String sub) {
- return str_rindex(sub, null, null);
- }
-
+ return rindex(sub, null, null);
+ }
+
+ /** Equivalent to {@link #rindex(PyObject, PyObject)} specialized to <code>String</code>. */
public int rindex(String sub, PyObject start) {
- return str_rindex(sub, start, null);
- }
-
+ return rindex(sub, start, null);
+ }
+
+ /**
+ * Equivalent to {@link #rindex(PyObject, PyObject, PyObject)} specialized to
+ * <code>String</code>.
+ */
public int rindex(String sub, PyObject start, PyObject end) {
- return str_rindex(sub, start, end);
+ return checkIndex(_rfind(sub, start, end));
}
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_rindex_doc)
- final int str_rindex(String sub, PyObject start, PyObject end) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- int index = str_rfind(sub, start, end);
- if(index == -1)
- throw Py.ValueError("substring not found in string.rindex");
- return index;
- }
-
+ final int str_rindex(PyObject subObj, PyObject start, PyObject end) {
+ return checkIndex(str_rfind(subObj, start, end));
+ }
+
+ /**
+ * A little helper for converting str.find to str.index that will raise
+ * <code>ValueError("substring not found")</code> if the argument is negative, otherwise passes
+ * the argument through.
+ *
+ * @param index to check
+ * @return <code>index</code> if non-negative
+ * @throws PyException(ValueError) if not found
+ */
+ protected final int checkIndex(int index) throws PyException {
+ if (index >= 0) {
+ return index;
+ } else {
+ throw Py.ValueError("substring not found");
+ }
+ }
+
+ /**
+ * Return the number of non-overlapping occurrences of substring <code>sub</code>.
+ *
+ * @param sub substring to find.
+ * @return count of occurrences.
+ */
+ public int count(PyObject sub) {
+ return count(sub, null, null);
+ }
+
+ /**
+ * Return the number of non-overlapping occurrences of substring <code>sub</code> in the range
+ * <code>[start:]</code>.
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @return count of occurrences.
+ */
+ public int count(PyObject sub, PyObject start) {
+ return count(sub, start, null);
+ }
+
+ /**
+ * Return the number of non-overlapping occurrences of substring <code>sub</code> in the range
+ * <code>[start:end]</code>. Optional arguments <code>start</code> and <code>end</code> are
+ * interpreted as in slice notation.
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return count of occurrences.
+ */
+ public int count(PyObject sub, PyObject start, PyObject end) {
+ return str_count(sub, start, end);
+ }
+
+ /** Equivalent to {@link #count(PyObject)} specialized to <code>String</code>. */
public int count(String sub) {
- return str_count(sub, null, null);
- }
-
+ return count(sub, null, null);
+ }
+
+ /** Equivalent to {@link #count(PyObject, PyObject)} specialized to <code>String</code>. */
public int count(String sub, PyObject start) {
- return str_count(sub, start, null);
- }
-
+ return count(sub, start, null);
+ }
+
+ /**
+ * Equivalent to {@link #count(PyObject, PyObject, PyObject)} specialized to <code>String</code>
+ * .
+ */
public int count(String sub, PyObject start, PyObject end) {
- return str_count(sub, start, end);
- }
-
+ return _count(sub, start, end);
+ }
+
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_count_doc)
- final int str_count(String sub, PyObject start, PyObject end) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- if (sub == null) {
- throw Py.TypeError("count() takes at least 1 argument (0 given)");
+ final int str_count(PyObject subObj, PyObject start, PyObject end) {
+ if (subObj instanceof PyUnicode) {
+ // Promote the problem to a Unicode one
+ return ((PyUnicode)decode()).unicode_count(subObj, start, end);
+ } else {
+ // It ought to be some kind of bytes with the buffer API.
+ String sub = asStringOrError(subObj);
+ return _count(sub, start, end);
}
- int[] indices = translateIndices(start, end);
- int n = sub.length();
- if(n == 0) {
+ }
+
+ /**
+ * Helper common to the Python and Java API returning the number of occurrences of a substring.
+ * It accepts slice-like arguments, which may be <code>None</code> or end-relative (negative).
+ * This method also supports {@link PyUnicode#unicode_count(PyObject, PyObject, PyObject)}.
+ *
+ * @param sub substring to find.
+ * @param startObj start of slice.
+ * @param endObj end of slice.
+ * @return count of occurrences
+ */
+ protected final int _count_old(String sub, PyObject startObj, PyObject endObj) {
+// xxx
+ // Interpret the slice indices as concrete values
+ int[] indices = translateIndices(startObj, endObj);
+ int subLen = sub.length();
+
+ if (subLen == 0) {
+ // Special case counting the occurrences of an empty string
if (indices[2] > getString().length()) {
return 0;
+ } else {
+ return indices[1] - indices[0] + 1;
}
- return indices[1] - indices[0] + 1;
+
+ } else {
+ // Skip down this string finding occurrences of sub
+ int start = indices[0], end = indices[1], count = 0;
+ while (true) {
+ int index = getString().indexOf(sub, start);
+ if (index < 0) {
+ break; // not found
+ } else {
+ // Found at index. Next search begins at end of this instance, at:
+ start = index + subLen;
+ if (start <= end) {
+ count += 1; // ... and the instance found fits within this string.
+ } else {
+ break; // ... but the instance found overlaps the end, so is not valid.
+ }
+ }
+ }
+ return count;
}
- int count = 0;
- while(true){
- int index = getString().indexOf(sub, indices[0]);
- indices[0] = index + n;
- if(indices[0] > indices[1] || index == -1) {
- break;
+ }
+
+ protected final int _count(String sub, PyObject startObj, PyObject endObj) {
+
+ // Interpret the slice indices as concrete values
+ int[] indices = translateIndices(startObj, endObj);
+ int subLen = sub.length();
+
+ if (subLen == 0) {
+ // Special case counting the occurrences of an empty string
+ if (indices[2] > getString().length()) {
+ return 0;
+ } else {
+ return indices[1] - indices[0] + 1;
}
- count++;
+
+ } else {
+
+ // Skip down this string finding occurrences of sub
+ int start = indices[0], limit = indices[1] - subLen, count = 0;
+
+ while (start <= limit) {
+ int index = getString().indexOf(sub, start);
+ if (index >= 0 && index <= limit) {
+ // Found at index.
+ count += 1;
+ // Next search begins after this instance, at:
+ start = index + subLen;
+ } else {
+ // not found, or found too far right (index>limit)
+ break;
+ }
+ }
+ return count;
}
- return count;
- }
-
+ }
+
+ /**
+ * Return the lowest index in the string where substring <code>sub</code> is found.
+ *
+ * @param sub substring to find.
+ * @return index of <code>sub</code> in this object or -1 if not found.
+ */
+ public int find(PyObject sub) {
+ return find(sub, null, null);
+ }
+
+ /**
+ * Return the lowest index in the string where substring <code>sub</code> is found, such that
+ * <code>sub</code> is contained in the slice <code>s[start:]</code>.
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @return index of <code>sub</code> in this object or -1 if not found.
+ */
+ public int find(PyObject sub, PyObject start) {
+ return find(sub, start, null);
+ }
+
+ /**
+ * Return the lowest index in the string where substring <code>sub</code> is found, such that
+ * <code>sub</code> is contained in the slice <code>s[start:end]</code>. Arguments
+ * <code>start</code> and <code>end</code> are interpreted as in slice notation, with null or
+ * {@link Py#None} representing "missing".
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return index of <code>sub</code> in this object or -1 if not found.
+ */
+ public int find(PyObject sub, PyObject start, PyObject end) {
+ return str_find(sub, start, end);
+ }
+
+ /** Equivalent to {@link #find(PyObject)} specialized to <code>String</code>. */
public int find(String sub) {
- return str_find(sub, null, null);
- }
-
+ return find(sub, null, null);
+ }
+
+ /** Equivalent to {@link #find(PyObject, PyObject)} specialized to <code>String</code>. */
public int find(String sub, PyObject start) {
- return str_find(sub, start, null);
- }
-
+ return find(sub, start, null);
+ }
+
+ /**
+ * Equivalent to {@link #find(PyObject, PyObject, PyObject)} specialized to <code>String</code>.
+ */
public int find(String sub, PyObject start, PyObject end) {
- return str_find(sub, start, end);
+ return _find(sub, start, end);
}
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_find_doc)
- final int str_find(String sub, PyObject start, PyObject end) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
+ final int str_find(PyObject subObj, PyObject start, PyObject end) {
+ if (subObj instanceof PyUnicode) {
+ // Promote the problem to a Unicode one
+ return ((PyUnicode)decode()).unicode_find(subObj, start, end);
+ } else {
+ // It ought to be some kind of bytes with the buffer API.
+ String sub = asStringOrError(subObj);
+ return _find(sub, start, end);
+ }
+ }
+
+ /**
+ * Helper common to the Python and Java API returning the index of the substring or -1 for not
+ * found. It accepts slice-like arguments, which may be <code>None</code> or end-relative
+ * (negative). This method also supports
+ * {@link PyUnicode#unicode_find(PyObject, PyObject, PyObject)}.
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return index of <code>sub</code> in this object or -1 if not found.
+ */
+ protected final int _find(String sub, PyObject start, PyObject end) {
int[] indices = translateIndices(start, end);
int index = getString().indexOf(sub, indices[0]);
if (index < indices[2] || index > indices[1]) {
@@ -2177,23 +2505,84 @@
return index;
}
+ /**
+ * Return the highest index in the string where substring <code>sub</code> is found.
+ *
+ * @param sub substring to find.
+ * @return index of <code>sub</code> in this object or -1 if not found.
+ */
+ public int rfind(PyObject sub) {
+ return rfind(sub, null, null);
+ }
+
+ /**
+ * Return the highest index in the string where substring <code>sub</code> is found, such that
+ * <code>sub</code> is contained in the slice <code>s[start:]</code>.
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @return index of <code>sub</code> in this object or -1 if not found.
+ */
+ public int rfind(PyObject sub, PyObject start) {
+ return rfind(sub, start, null);
+ }
+
+ /**
+ * Return the highest index in the string where substring <code>sub</code> is found, such that
+ * <code>sub</code> is contained in the slice <code>s[start:end]</code>. Arguments
+ * <code>start</code> and <code>end</code> are interpreted as in slice notation, with null or
+ * {@link Py#None} representing "missing".
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return index of <code>sub</code> in this object or -1 if not found.
+ */
+ public int rfind(PyObject sub, PyObject start, PyObject end) {
+ return str_rfind(sub, start, end);
+ }
+
+ /** Equivalent to {@link #find(PyObject)} specialized to <code>String</code>. */
public int rfind(String sub) {
- return str_rfind(sub, null, null);
- }
-
+ return rfind(sub, null, null);
+ }
+
+ /** Equivalent to {@link #find(PyObject, PyObject)} specialized to <code>String</code>. */
public int rfind(String sub, PyObject start) {
- return str_rfind(sub, start, null);
- }
-
+ return rfind(sub, start, null);
+ }
+
+ /**
+ * Equivalent to {@link #find(PyObject, PyObject, PyObject)} specialized to <code>String</code>.
+ */
public int rfind(String sub, PyObject start, PyObject end) {
- return str_rfind(sub, start, end);
+ return _rfind(sub, start, end);
}
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_rfind_doc)
- final int str_rfind(String sub, PyObject start, PyObject end) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
+ final int str_rfind(PyObject subObj, PyObject start, PyObject end) {
+ if (subObj instanceof PyUnicode) {
+ // Promote the problem to a Unicode one
+ return ((PyUnicode)decode()).unicode_rfind(subObj, start, end);
+ } else {
+ // It ought to be some kind of bytes with the buffer API.
+ String sub = asStringOrError(subObj);
+ return _rfind(sub, start, end);
+ }
+ }
+
+ /**
+ * Helper common to the Python and Java API returning the last index of the substring or -1 for
+ * not found. It accepts slice-like arguments, which may be <code>None</code> or end-relative
+ * (negative). This method also supports
+ * {@link PyUnicode#unicode_rfind(PyObject, PyObject, PyObject)}.
+ *
+ * @param sub substring to find.
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return index of <code>sub</code> in this object or -1 if not found.
+ */
+ protected final int _rfind(String sub, PyObject start, PyObject end) {
int[] indices = translateIndices(start, end);
int index = getString().lastIndexOf(sub, indices[1] - sub.length());
if (index < indices[2]) {
@@ -2211,74 +2600,88 @@
throw Py.ValueError("null byte in argument for float()");
}
if (Character.isDigit(ch)) {
- if (s == null)
+ if (s == null) {
s = new StringBuilder(getString());
+ }
int val = Character.digit(ch, 10);
s.setCharAt(i, Character.forDigit(val, 10));
}
}
String sval = getString();
- if (s != null)
+ if (s != null) {
sval = s.toString();
+ }
try {
// Double.valueOf allows format specifier ("d" or "f") at the end
String lowSval = sval.toLowerCase();
- if (lowSval.equals("nan")) return Double.NaN;
- else if (lowSval.equals("+nan")) return Double.NaN;
- else if (lowSval.equals("-nan")) return Double.NaN;
- else if (lowSval.equals("inf")) return Double.POSITIVE_INFINITY;
- else if (lowSval.equals("+inf")) return Double.POSITIVE_INFINITY;
- else if (lowSval.equals("-inf")) return Double.NEGATIVE_INFINITY;
- else if (lowSval.equals("infinity")) return Double.POSITIVE_INFINITY;
- else if (lowSval.equals("+infinity")) return Double.POSITIVE_INFINITY;
- else if (lowSval.equals("-infinity")) return Double.NEGATIVE_INFINITY;
-
+ if (lowSval.equals("nan")) {
+ return Double.NaN;
+ } else if (lowSval.equals("+nan")) {
+ return Double.NaN;
+ } else if (lowSval.equals("-nan")) {
+ return Double.NaN;
+ } else if (lowSval.equals("inf")) {
+ return Double.POSITIVE_INFINITY;
+ } else if (lowSval.equals("+inf")) {
+ return Double.POSITIVE_INFINITY;
+ } else if (lowSval.equals("-inf")) {
+ return Double.NEGATIVE_INFINITY;
+ } else if (lowSval.equals("infinity")) {
+ return Double.POSITIVE_INFINITY;
+ } else if (lowSval.equals("+infinity")) {
+ return Double.POSITIVE_INFINITY;
+ } else if (lowSval.equals("-infinity")) {
+ return Double.NEGATIVE_INFINITY;
+ }
+
if (lowSval.endsWith("d") || lowSval.endsWith("f")) {
throw new NumberFormatException("format specifiers not allowed");
}
return Double.valueOf(sval).doubleValue();
- }
- catch (NumberFormatException exc) {
- throw Py.ValueError("invalid literal for __float__: "+getString());
+ } catch (NumberFormatException exc) {
+ throw Py.ValueError("invalid literal for __float__: " + getString());
}
}
private BigInteger asciiToBigInteger(int base, boolean isLong) {
String str = getString();
-
+
int b = 0;
int e = str.length();
- while (b < e && Character.isWhitespace(str.charAt(b)))
+ while (b < e && Character.isWhitespace(str.charAt(b))) {
b++;
-
- while (e > b && Character.isWhitespace(str.charAt(e-1)))
+ }
+
+ while (e > b && Character.isWhitespace(str.charAt(e - 1))) {
e--;
+ }
char sign = 0;
if (b < e) {
sign = str.charAt(b);
if (sign == '-' || sign == '+') {
b++;
- while (b < e && Character.isWhitespace(str.charAt(b))) b++;
+ while (b < e && Character.isWhitespace(str.charAt(b))) {
+ b++;
+ }
}
if (base == 16) {
if (str.charAt(b) == '0') {
- if (b < e-1 &&
- Character.toUpperCase(str.charAt(b+1)) == 'X') {
+ if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'X') {
b += 2;
}
}
} else if (base == 0) {
if (str.charAt(b) == '0') {
- if (b < e-1 && Character.toUpperCase(str.charAt(b+1)) == 'X') {
+ if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'X') {
base = 16;
b += 2;
- } else if (b < e-1 && Character.toUpperCase(str.charAt(b+1)) == 'O') {
+ } else if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'O') {
base = 8;
b += 2;
- } else if (b < e-1 && Character.toUpperCase(str.charAt(b+1)) == 'B') {
+ } else if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'B') {
base = 2;
b += 2;
} else {
@@ -2286,12 +2689,11 @@
}
}
} else if (base == 8) {
- if (b < e-1 && Character.toUpperCase(str.charAt(b+1)) == 'O') {
+ if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'O') {
b += 2;
}
} else if (base == 2) {
- if (b < e-1 &&
- Character.toUpperCase(str.charAt(b+1)) == 'B') {
+ if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'B') {
b += 2;
}
}
@@ -2302,10 +2704,10 @@
}
// if the base >= 22, then an 'l' or 'L' is a digit!
- if (isLong && base < 22 && e > b && (str.charAt(e-1) == 'L' || str.charAt(e-1) == 'l')) {
+ if (isLong && base < 22 && e > b && (str.charAt(e - 1) == 'L' || str.charAt(e - 1) == 'l')) {
e--;
}
-
+
String s = str;
if (b > 0 || e < str.length()) {
s = str.substring(b, e);
@@ -2336,9 +2738,11 @@
}
return bi.intValue();
} catch (NumberFormatException exc) {
- throw Py.ValueError("invalid literal for int() with base " + base + ": '" + getString()+"'");
+ throw Py.ValueError("invalid literal for int() with base " + base + ": '" + getString()
+ + "'");
} catch (StringIndexOutOfBoundsException exc) {
- throw Py.ValueError("invalid literal for int() with base " + base + ": '" + getString()+"'");
+ throw Py.ValueError("invalid literal for int() with base " + base + ": '" + getString()
+ + "'");
}
}
@@ -2351,7 +2755,6 @@
throw Py.ValueError("invalid base for long literal:" + base);
}
-
try {
BigInteger bi = asciiToBigInteger(base, true);
return new PyLong(bi);
@@ -2361,31 +2764,36 @@
// above, or add an equivalent to CPython's PyUnicode_EncodeDecimal;
// we should note that the current error string does not quite match
// CPython regardless of the codec, that's going to require some more work
- throw Py.UnicodeEncodeError("decimal", "codec can't encode character",
- 0,0, "invalid decimal Unicode string");
+ throw Py.UnicodeEncodeError("decimal", "codec can't encode character", 0, 0,
+ "invalid decimal Unicode string");
} else {
- throw Py.ValueError("invalid literal for long() with base " + base + ": '" + getString()+"'");
+ throw Py.ValueError("invalid literal for long() with base " + base + ": '"
+ + getString() + "'");
}
} catch (StringIndexOutOfBoundsException exc) {
- throw Py.ValueError("invalid literal for long() with base " + base + ": '" + getString()+"'");
+ throw Py.ValueError("invalid literal for long() with base " + base + ": '"
+ + getString() + "'");
}
}
private static String padding(int n, char pad) {
char[] chars = new char[n];
- for (int i=0; i<n; i++)
+ for (int i = 0; i < n; i++) {
chars[i] = pad;
+ }
return new String(chars);
}
private static char parse_fillchar(String function, String fillchar) {
- if (fillchar == null) { return ' '; }
+ if (fillchar == null) {
+ return ' ';
+ }
if (fillchar.length() != 1) {
throw Py.TypeError(function + "() argument 2 must be char, not str");
}
return fillchar.charAt(0);
}
-
+
public String ljust(int width) {
return str_ljust(width, null);
}
@@ -2393,44 +2801,48 @@
public String ljust(int width, String padding) {
return str_ljust(width, padding);
}
-
- @ExposedMethod(defaults="null", doc = BuiltinDocs.str_ljust_doc)
+
+ @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_ljust_doc)
final String str_ljust(int width, String fillchar) {
char pad = parse_fillchar("ljust", fillchar);
- int n = width-getString().length();
- if (n <= 0)
+ int n = width - getString().length();
+ if (n <= 0) {
return getString();
- return getString()+padding(n, pad);
+ }
+ return getString() + padding(n, pad);
}
public String rjust(int width) {
return str_rjust(width, null);
}
- @ExposedMethod(defaults="null", doc = BuiltinDocs.str_rjust_doc)
+ @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_rjust_doc)
final String str_rjust(int width, String fillchar) {
char pad = parse_fillchar("rjust", fillchar);
- int n = width-getString().length();
- if (n <= 0)
+ int n = width - getString().length();
+ if (n <= 0) {
return getString();
- return padding(n, pad)+getString();
+ }
+ return padding(n, pad) + getString();
}
public String center(int width) {
return str_center(width, null);
}
- @ExposedMethod(defaults="null", doc = BuiltinDocs.str_center_doc)
+ @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_center_doc)
final String str_center(int width, String fillchar) {
char pad = parse_fillchar("center", fillchar);
- int n = width-getString().length();
- if (n <= 0)
+ int n = width - getString().length();
+ if (n <= 0) {
return getString();
- int half = n/2;
- if (n%2 > 0 && width%2 > 0)
+ }
+ int half = n / 2;
+ if (n % 2 > 0 && width % 2 > 0) {
half += 1;
-
- return padding(half, pad)+getString()+padding(n-half, pad);
+ }
+
+ return padding(half, pad) + getString() + padding(n - half, pad);
}
public String zfill(int width) {
@@ -2441,22 +2853,23 @@
final String str_zfill(int width) {
String s = getString();
int n = s.length();
- if (n >= width)
+ if (n >= width) {
return s;
+ }
char[] chars = new char[width];
- int nzeros = width-n;
- int i=0;
- int sStart=0;
+ int nzeros = width - n;
+ int i = 0;
+ int sStart = 0;
if (n > 0) {
char start = s.charAt(0);
if (start == '+' || start == '-') {
chars[0] = start;
i += 1;
nzeros++;
- sStart=1;
+ sStart = 1;
}
}
- for(;i<nzeros; i++) {
+ for (; i < nzeros; i++) {
chars[i] = '0';
}
s.getChars(sStart, s.length(), chars, i);
@@ -2474,15 +2887,15 @@
@ExposedMethod(defaults = "8", doc = BuiltinDocs.str_expandtabs_doc)
final String str_expandtabs(int tabsize) {
String s = getString();
- StringBuilder buf = new StringBuilder((int)(s.length()*1.5));
+ StringBuilder buf = new StringBuilder((int)(s.length() * 1.5));
char[] chars = s.toCharArray();
int n = chars.length;
int position = 0;
- for(int i=0; i<n; i++) {
+ for (int i = 0; i < n; i++) {
char c = chars[i];
if (c == '\t') {
- int spaces = tabsize-position%tabsize;
+ int spaces = tabsize - position % tabsize;
position += spaces;
while (spaces-- > 0) {
buf.append(' ');
@@ -2504,56 +2917,100 @@
@ExposedMethod(doc = BuiltinDocs.str_capitalize_doc)
final String str_capitalize() {
- if (getString().length() == 0)
+ if (getString().length() == 0) {
return getString();
- String first = getString().substring(0,1).toUpperCase();
+ }
+ String first = getString().substring(0, 1).toUpperCase();
return first.concat(getString().substring(1).toLowerCase());
}
- @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_replace_doc)
- final PyString str_replace(PyObject oldPiece, PyObject newPiece, PyObject maxsplit) {
- if(!(oldPiece instanceof PyString) || !(newPiece instanceof PyString)) {
- throw Py.TypeError("str or unicode required for replace");
+ /**
+ * Equivalent to Python str.replace(old, new), returning a copy of the string with all
+ * occurrences of substring old replaced by new. If either argument is a {@link PyUnicode} (or
+ * this object is), the result will be a <code>PyUnicode</code>.
+ *
+ * @param oldPiece to replace where found.
+ * @param newPiece replacement text.
+ * @param count maximum number of replacements to make, or -1 meaning all of them.
+ * @return PyString (or PyUnicode if any string is one), this string after replacements.
+ */
+ public PyString replace(PyObject oldPieceObj, PyObject newPieceObj) {
+ return str_replace(oldPieceObj, newPieceObj, -1);
+ }
+
+ /**
+ * Equivalent to Python str.replace(old, new[, count]), returning a copy of the string with all
+ * occurrences of substring old replaced by new. If argument <code>count</code> is nonnegative,
+ * only the first <code>count</code> occurrences are replaced. If either argument is a
+ * {@link PyUnicode} (or this object is), the result will be a <code>PyUnicode</code>.
+ *
+ * @param oldPiece to replace where found.
+ * @param newPiece replacement text.
+ * @param count maximum number of replacements to make, or -1 meaning all of them.
+ * @return PyString (or PyUnicode if any string is one), this string after replacements.
+ */
+ public PyString replace(PyObject oldPieceObj, PyObject newPieceObj, int count) {
+ return str_replace(oldPieceObj, newPieceObj, count);
+ }
+
+ @ExposedMethod(defaults = "-1", doc = BuiltinDocs.str_replace_doc)
+ final PyString str_replace(PyObject oldPieceObj, PyObject newPieceObj, int count) {
+ if (oldPieceObj instanceof PyUnicode || newPieceObj instanceof PyUnicode) {
+ // Promote the problem to a Unicode one
+ return ((PyUnicode)decode()).unicode_replace(oldPieceObj, newPieceObj, count);
+ } else {
+ // Neither is a PyUnicode: both ought to be some kind of bytes with the buffer API.
+ String oldPiece = asStringOrError(oldPieceObj);
+ String newPiece = asStringOrError(newPieceObj);
+ return _replace(oldPiece, newPiece, count);
}
-
- return replace((PyString)oldPiece, (PyString)newPiece, maxsplit == null ? -1 : maxsplit.asInt());
- }
-
- protected PyString replace(PyString oldPiece, PyString newPiece, int maxsplit) {
-
- // XXX Accept PyObjects that may be BufferProtocol or PyUnicode
-
- int len = getString().length();
- int old_len = oldPiece.getString().length();
+ }
+
+ /**
+ * Helper common to the Python and Java API for <code>str.replace</code>, returning a new string
+ * equal to this string with ocurrences of <code>oldPiece</code> replaced by
+ * <code>newPiece</code>, up to a maximum of <code>count</code> occurrences, or all of them.
+ * This method also supports {@link PyUnicode#unicode_replace(PyObject, PyObject, int)}, in
+ * which context it returns a <code>PyUnicode</code>
+ *
+ * @param oldPiece to replace where found.
+ * @param newPiece replacement text.
+ * @param count maximum number of replacements to make, or -1 meaning all of them.
+ * @return PyString (or PyUnicode if this string is one), this string after replacements.
+ */
+ protected final PyString _replace(String oldPiece, String newPiece, int count) {
+
+ String s = getString();
+ int len = s.length();
+ int oldLen = oldPiece.length();
+ int newLen = newPiece.length();
+
if (len == 0) {
- if (maxsplit == -1 && old_len == 0) {
- return createInstance(newPiece.getString(), true);
+ if (count < 0 && oldLen == 0) {
+ return createInstance(newPiece, true);
}
- return createInstance(getString(), true);
- }
-
- if (old_len == 0 && newPiece.getString().length() != 0 && maxsplit !=0) {
- // old="" and new != "", interleave new piece with each char in original, taking in effect maxsplit
+ return createInstance(s, true);
+
+ } else if (oldLen == 0 && newLen != 0 && count != 0) {
+ /*
+ * old="" and new != "", interleave new piece with each char in original, taking into
+ * account count
+ */
StringBuilder buffer = new StringBuilder();
int i = 0;
- buffer.append(newPiece.getString());
- for (; i < len && (i < maxsplit-1 || maxsplit == -1); i++) {
- buffer.append(getString().charAt(i));
- buffer.append(newPiece.getString());
+ buffer.append(newPiece);
+ for (; i < len && (count < 0 || i < count - 1); i++) {
+ buffer.append(s.charAt(i)).append(newPiece);
}
- buffer.append(getString().substring(i));
+ buffer.append(s.substring(i));
return createInstance(buffer.toString(), true);
+
+ } else {
+ if (count < 0) {
+ count = (oldLen == 0) ? len + 1 : len;
+ }
+ return createInstance(newPiece).join(splitfields(oldPiece, count));
}
-
- if(maxsplit == -1) {
- if(old_len == 0) {
- maxsplit = len + 1;
- } else {
- maxsplit = len;
- }
- }
-
- return newPiece.join(splitfields(oldPiece.getString(), maxsplit));
}
public PyString join(PyObject seq) {
@@ -2587,7 +3044,7 @@
item = seq.pyget(i);
if (!(item instanceof PyString)) {
throw Py.TypeError(String.format("sequence item %d: expected string, %.80s found",
- i, item.getType().fastGetName()));
+ i, item.getType().fastGetName()));
}
if (item instanceof PyUnicode) {
// Defer to Unicode join. CAUTION: There's no gurantee that the original
@@ -2598,7 +3055,7 @@
if (i != 0) {
size += sepLen;
}
- size += ((PyString) item).getString().length();
+ size += ((PyString)item).getString().length();
if (size > Integer.MAX_VALUE) {
throw Py.OverflowError("join() result is too long for a Python string");
}
@@ -2611,7 +3068,7 @@
if (i != 0) {
buf.append(getString());
}
- buf.append(((PyString) item).getString());
+ buf.append(((PyString)item).getString());
}
return new PyString(buf.toString());
}
@@ -2643,7 +3100,7 @@
if (this instanceof PyUnicode) {
sep = getString();
} else {
- sep = ((PyUnicode) decode()).getString();
+ sep = ((PyUnicode)decode()).getString();
// In case decode()'s codec mutated seq
seqLen = seq.__len__();
}
@@ -2659,15 +3116,14 @@
// Convert item to Unicode
if (!(item instanceof PyString)) {
throw Py.TypeError(String.format("sequence item %d: expected string or Unicode,"
- + " %.80s found",
- i, item.getType().fastGetName()));
+ + " %.80s found", i, item.getType().fastGetName()));
}
if (!(item instanceof PyUnicode)) {
item = ((PyString)item).decode();
// In case decode()'s codec mutated seq
seqLen = seq.__len__();
}
- itemString = ((PyUnicode) item).getString();
+ itemString = ((PyUnicode)item).getString();
if (i != 0) {
size += sepLen;
@@ -2682,215 +3138,282 @@
return new PyUnicode(buf.toString());
}
+ /**
+ * Equivalent to the Python <code>str.startswith</code> method testing whether a string starts
+ * with a specified prefix. <code>prefix</code> can also be a tuple of prefixes to look for.
+ *
+ * @param prefix string to check for (or a <code>PyTuple</code> of them).
+ * @return <code>true</code> if this string slice starts with a specified prefix, otherwise
+ * <code>false</code>.
+ */
public boolean startswith(PyObject prefix) {
return str_startswith(prefix, null, null);
}
+ /**
+ * Equivalent to the Python <code>str.startswith</code> method, testing whether a string starts
+ * with a specified prefix, where a sub-range is specified by <code>[start:]</code>.
+ * <code>start</code> is interpreted as in slice notation, with null or {@link Py#None}
+ * representing "missing". <code>prefix</code> can also be a tuple of prefixes to look for.
+ *
+ * @param prefix string to check for (or a <code>PyTuple</code> of them).
+ * @param start start of slice.
+ * @return <code>true</code> if this string slice starts with a specified prefix, otherwise
+ * <code>false</code>.
+ */
public boolean startswith(PyObject prefix, PyObject offset) {
return str_startswith(prefix, offset, null);
}
+ /**
+ * Equivalent to the Python <code>str.startswith</code> method, testing whether a string starts
+ * with a specified prefix, where a sub-range is specified by <code>[start:end]</code>.
+ * Arguments <code>start</code> and <code>end</code> are interpreted as in slice notation, with
+ * null or {@link Py#None} representing "missing". <code>prefix</code> can also be a tuple of
+ * prefixes to look for.
+ *
+ * @param prefix string to check for (or a <code>PyTuple</code> of them).
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return <code>true</code> if this string slice starts with a specified prefix, otherwise
+ * <code>false</code>.
+ */
public boolean startswith(PyObject prefix, PyObject start, PyObject end) {
return str_startswith(prefix, start, end);
}
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_startswith_doc)
- final boolean str_startswith(PyObject prefix, PyObject start, PyObject end) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- int[] indices = translateIndices(start, end);
-
- if (prefix instanceof PyString) {
- String strPrefix = ((PyString) prefix).getString();
- if (indices[1] - indices[0] < strPrefix.length())
- return false;
-
- return getString().startsWith(strPrefix, indices[0]);
- } else if (prefix instanceof PyTuple) {
- PyObject[] prefixes = ((PyTuple)prefix).getArray();
-
- for (int i = 0 ; i < prefixes.length ; i++) {
- if (!(prefixes[i] instanceof PyString))
- throw Py.TypeError("expected a character buffer object");
-
- String strPrefix = ((PyString) prefixes[i]).getString();
- if (indices[1] - indices[0] < strPrefix.length())
- continue;
-
- if (getString().startsWith(strPrefix, indices[0]))
+ final boolean str_startswith(PyObject prefix, PyObject startObj, PyObject endObj) {
+ int[] indices = translateIndices(startObj, endObj);
+ int start = indices[0];
+ int sliceLen = indices[1] - start;
+
+ if (!(prefix instanceof PyTuple)) {
+ // It ought to be PyUnicode or some kind of bytes with the buffer API.
+ String s = asBMPStringOrError(prefix);
+ // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+ return sliceLen >= s.length() && getString().startsWith(s, start);
+
+ } else {
+ // Loop will return true if this slice starts with any prefix in the tuple
+ for (PyObject prefixObj : ((PyTuple)prefix).getArray()) {
+ // It ought to be PyUnicode or some kind of bytes with the buffer API.
+ String s = asBMPStringOrError(prefixObj);
+ // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+ if (sliceLen >= s.length() && getString().startsWith(s, start)) {
return true;
+ }
}
+ // None matched
return false;
- } else {
- throw Py.TypeError("expected a character buffer object or tuple");
}
}
+ /**
+ * Equivalent to the Python <code>str.endswith</code> method, testing whether a string ends with
+ * a specified suffix. <code>suffix</code> can also be a tuple of suffixes to look for.
+ *
+ * @param suffix string to check for (or a <code>PyTuple</code> of them).
+ * @return <code>true</code> if this string slice ends with a specified suffix, otherwise
+ * <code>false</code>.
+ */
public boolean endswith(PyObject suffix) {
return str_endswith(suffix, null, null);
}
+ /**
+ * Equivalent to the Python <code>str.endswith</code> method, testing whether a string ends with
+ * a specified suffix, where a sub-range is specified by <code>[start:]</code>.
+ * <code>start</code> is interpreted as in slice notation, with null or {@link Py#None}
+ * representing "missing". <code>suffix</code> can also be a tuple of suffixes to look for.
+ *
+ * @param suffix string to check for (or a <code>PyTuple</code> of them).
+ * @param start start of slice.
+ * @return <code>true</code> if this string slice ends with a specified suffix, otherwise
+ * <code>false</code>.
+ */
public boolean endswith(PyObject suffix, PyObject start) {
return str_endswith(suffix, start, null);
}
+ /**
+ * Equivalent to the Python <code>str.endswith</code> method, testing whether a string ends with
+ * a specified suffix, where a sub-range is specified by <code>[start:end]</code>. Arguments
+ * <code>start</code> and <code>end</code> are interpreted as in slice notation, with null or
+ * {@link Py#None} representing "missing". <code>suffix</code> can also be a tuple of suffixes
+ * to look for.
+ *
+ * @param suffix string to check for (or a <code>PyTuple</code> of them).
+ * @param start start of slice.
+ * @param end end of slice.
+ * @return <code>true</code> if this string slice ends with a specified suffix, otherwise
+ * <code>false</code>.
+ */
public boolean endswith(PyObject suffix, PyObject start, PyObject end) {
return str_endswith(suffix, start, end);
}
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_endswith_doc)
- final boolean str_endswith(PyObject suffix, PyObject start, PyObject end) {
-
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- int[] indices = translateIndices(start, end);
-
+ final boolean str_endswith(PyObject suffix, PyObject startObj, PyObject endObj) {
+
+ int[] indices = translateIndices(startObj, endObj);
String substr = getString().substring(indices[0], indices[1]);
- if (suffix instanceof PyString) {
- return substr.endsWith(((PyString) suffix).getString());
- } else if (suffix instanceof PyTuple) {
- PyObject[] suffixes = ((PyTuple)suffix).getArray();
-
- for (int i = 0 ; i < suffixes.length ; i++) {
- if (!(suffixes[i] instanceof PyString))
- throw Py.TypeError("expected a character buffer object");
-
- if (substr.endsWith(((PyString) suffixes[i]).getString()))
+
+ if (!(suffix instanceof PyTuple)) {
+ // It ought to be PyUnicode or some kind of bytes with the buffer API.
+ String s = asBMPStringOrError(suffix);
+ // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+ return substr.endsWith(s);
+
+ } else {
+ // Loop will return true if this slice ends with any suffix in the tuple
+ for (PyObject suffixObj : ((PyTuple)suffix).getArray()) {
+ // It ought to be PyUnicode or some kind of bytes with the buffer API.
+ String s = asBMPStringOrError(suffixObj);
+ // If s is non-BMP, and this is a PyString (bytes), result will correctly be false.
+ if (substr.endsWith(s)) {
return true;
+ }
}
+ // None matched
return false;
- } else {
- throw Py.TypeError("expected a character buffer object or tuple");
}
- }
+ }
/**
- * Turns the possibly negative Python slice start and end into valid indices
- * into this string.
+ * Turns the possibly negative Python slice start and end into valid indices into this string.
*
- * @return a 3 element array of indices into this string describing a
- * substring from [0] to [1]. [0] <= [1], [0] >= 0 and [1] <=
- * string.length(). The third element contains the unadjusted
- * start value.
+ * @return a 3 element array of indices into this string describing a substring from [0] to [1].
+ * [0] <= [1], [0] >= 0 and [1] <= string.length(). The third element contains the
+ * unadjusted start value (or nearest int).
*/
protected int[] translateIndices(PyObject start, PyObject end) {
- int iStart;
- int iStartAdjusted;
- int iEnd;
-
- if(end == null || end == Py.None) {
- iEnd = getString().length();
+ int iStart, iStartUnadjusted, iEnd;
+ int n = getString().length();
+
+ // Make sure the slice end decodes to something in range
+ if (end == null || end == Py.None) {
+ iEnd = n;
} else {
- iEnd = end.asInt();
- }
- int n = getString().length();
- if(iEnd < 0) {
- iEnd = n + iEnd;
- if(iEnd < 0) {
- iEnd = 0;
- }
- } else if(iEnd > n) {
- iEnd = n;
- }
- if(start == null || start == Py.None) {
- iStart = 0;
- } else {
- iStart = start.asInt();
- }
-
- iStartAdjusted = iStart;
- if(iStartAdjusted < 0) {
- iStartAdjusted = n + iStartAdjusted;
- if(iStartAdjusted < 0) {
- iStartAdjusted = 0;
+ // Convert to int but limit to Integer.MIN_VALUE <= iEnd <= Integer.MAX_VALUE
+ iEnd = end.asIndex(null);
+ if (iEnd > n) {
+ iEnd = n;
+ } else if (iEnd < 0) {
+ iEnd = n + iEnd;
+ if (iEnd < 0) {
+ iEnd = 0;
+ }
}
}
- if(iStartAdjusted > iEnd) {
- iStartAdjusted = iEnd;
+
+ // Make sure the slice start decodes to something in range
+ if (start == null || start == Py.None) {
+ iStartUnadjusted = iStart = 0;
+ } else {
+ // Convert to int but limit to Integer.MIN_VALUE <= iStart <= Integer.MAX_VALUE
+ iStartUnadjusted = iStart = start.asIndex(null);
+ if (iStart > iEnd) {
+ iStart = iEnd;
+ } else if (iStart < 0) {
+ iStart = n + iStart;
+ if (iStart > iEnd) {
+ iStart = iEnd;
+ } else if (iStart < 0) {
+ iStart = 0;
+ }
+ }
}
- return new int[] {iStartAdjusted, iEnd, iStart};
- }
-
- public String translate() {
- return str_translate(null, null);
- }
-
+
+ return new int[] {iStart, iEnd, iStartUnadjusted};
+ }
+
+ /**
+ * Equivalent to Python <code>str.translate</code> returning a copy of this string where the
+ * characters have been mapped through the translation <code>table</code>. <code>table</code>
+ * must be equivalent to a string of length 256 (if it is not <code>null</code>).
+ *
+ * @param table of character (byte) translations (or <code>null</code>)
+ * @return transformed byte string
+ */
+ public String translate(PyObject table) {
+ return translate(table, null);
+ }
+
+ /**
+ * Equivalent to Python <code>str.translate</code> returning a copy of this string where all
+ * characters (bytes) occurring in the argument <code>deletechars</code> are removed (if it is
+ * not <code>null</code>), and the remaining characters have been mapped through the translation
+ * <code>table</code>. <code>table</code> must be equivalent to a string of length 256 (if it is
+ * not <code>null</code>).
+ *
+ * @param table of character (byte) translations (or <code>null</code>)
+ * @param deletechars set of characters to remove (or <code>null</code>)
+ * @return transformed byte string
+ */
+ public String translate(PyObject table, PyObject deletechars) {
+ return str_translate(table, deletechars);
+ }
+
+ /**
+ * Equivalent to {@link #translate(PyObject)} specialized to <code>String</code>.
+ */
public String translate(String table) {
- return str_translate(table, null);
- }
-
+ return _translate(table, null);
+ }
+
+ /**
+ * Equivalent to {@link #translate(PyObject, PyObject)} specialized to <code>String</code>.
+ */
public String translate(String table, String deletechars) {
- return str_translate(table, deletechars);
+ return _translate(table, deletechars);
}
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_translate_doc)
- final String str_translate(String table, String deletechars) {
-
- // XXX Accept PyObjects that may be BufferProtocol
-
- if (table != null && table.length() != 256)
- throw Py.ValueError(
- "translation table must be 256 characters long");
+ final String str_translate(PyObject tableObj, PyObject deletecharsObj) {
+ // Accept anythiong withthe buffer API or null
+ String table = asStringNullOrError(tableObj, null);
+ String deletechars = asStringNullOrError(deletecharsObj, null);
+ return _translate(table, deletechars);
+ }
+
+ /**
+ * Helper common to the Python and Java API implementing <code>str.translate</code> returning a
+ * copy of this string where all characters (bytes) occurring in the argument
+ * <code>deletechars</code> are removed (if it is not <code>null</code>), and the remaining
+ * characters have been mapped through the translation <code>table</code>, which must be
+ * equivalent to a string of length 256 (if it is not <code>null</code>).
+ *
+ * @param table of character (byte) translations (or <code>null</code>)
+ * @param deletechars set of characters to remove (or <code>null</code>)
+ * @return transformed byte string
+ */
+ private final String _translate(String table, String deletechars) {
+
+ if (table != null && table.length() != 256) {
+ throw Py.ValueError("translation table must be 256 characters long");
+ }
StringBuilder buf = new StringBuilder(getString().length());
- for (int i=0; i < getString().length(); i++) {
+
+ for (int i = 0; i < getString().length(); i++) {
char c = getString().charAt(i);
- if (deletechars != null && deletechars.indexOf(c) >= 0)
+ if (deletechars != null && deletechars.indexOf(c) >= 0) {
continue;
- if(table == null) {
+ }
+ if (table == null) {
buf.append(c);
} else {
try {
buf.append(table.charAt(c));
- }
- catch (IndexOutOfBoundsException e) {
- throw Py.TypeError(
- "translate() only works for 8-bit character strings");
+ } catch (IndexOutOfBoundsException e) {
+ throw Py.TypeError("translate() only works for 8-bit character strings");
}
}
}
return buf.toString();
}
- //XXX: is this needed?
- public String translate(PyObject table) {
- StringBuilder v = new StringBuilder(getString().length());
- for (int i=0; i < getString().length(); i++) {
- char ch = getString().charAt(i);
-
- PyObject w = Py.newInteger(ch);
- PyObject x = table.__finditem__(w);
- if (x == null) {
- /* No mapping found: default to 1-1 mapping */
- v.append(ch);
- continue;
- }
-
- /* Apply mapping */
- if (x instanceof PyInteger) {
- int value = ((PyInteger) x).getValue();
- v.append((char) value);
- } else if (x == Py.None) {
- ;
- } else if (x instanceof PyString) {
- if (x.__len__() != 1) {
- /* 1-n mapping */
- throw new PyException(Py.NotImplementedError,
- "1-n mappings are currently not implemented");
- }
- v.append(x.toString());
- }
- else {
- /* wrong return value */
- throw Py.TypeError(
- "character mapping must return integer, " +
- "None or unicode");
- }
- }
- return v.toString();
- }
-
public boolean islower() {
return str_islower();
}
@@ -2900,17 +3423,19 @@
int n = getString().length();
/* Shortcut for single character strings */
- if (n == 1)
+ if (n == 1) {
return Character.isLowerCase(getString().charAt(0));
+ }
boolean cased = false;
for (int i = 0; i < n; i++) {
char ch = getString().charAt(i);
- if (Character.isUpperCase(ch) || Character.isTitleCase(ch))
+ if (Character.isUpperCase(ch) || Character.isTitleCase(ch)) {
return false;
- else if (!cased && Character.isLowerCase(ch))
+ } else if (!cased && Character.isLowerCase(ch)) {
cased = true;
+ }
}
return cased;
}
@@ -2924,17 +3449,19 @@
int n = getString().length();
/* Shortcut for single character strings */
- if (n == 1)
+ if (n == 1) {
return Character.isUpperCase(getString().charAt(0));
+ }
boolean cased = false;
for (int i = 0; i < n; i++) {
char ch = getString().charAt(i);
- if (Character.isLowerCase(ch) || Character.isTitleCase(ch))
+ if (Character.isLowerCase(ch) || Character.isTitleCase(ch)) {
return false;
- else if (!cased && Character.isUpperCase(ch))
+ } else if (!cased && Character.isUpperCase(ch)) {
cased = true;
+ }
}
return cased;
}
@@ -2948,17 +3475,20 @@
int n = getString().length();
/* Shortcut for single character strings */
- if (n == 1)
+ if (n == 1) {
return Character.isLetter(getString().charAt(0));
-
- if (n == 0)
+ }
+
+ if (n == 0) {
return false;
+ }
for (int i = 0; i < n; i++) {
char ch = getString().charAt(i);
- if (!Character.isLetter(ch))
+ if (!Character.isLetter(ch)) {
return false;
+ }
}
return true;
}
@@ -2972,17 +3502,20 @@
int n = getString().length();
/* Shortcut for single character strings */
- if (n == 1)
+ if (n == 1) {
return _isalnum(getString().charAt(0));
-
- if (n == 0)
+ }
+
+ if (n == 0) {
return false;
+ }
for (int i = 0; i < n; i++) {
char ch = getString().charAt(i);
- if (!_isalnum(ch))
+ if (!_isalnum(ch)) {
return false;
+ }
}
return true;
}
@@ -2992,8 +3525,7 @@
// The type is not used, the numeric property is determined from
// the presense of digit, decimal or numeric fields. These fields
// are not available in exactly the same way in java.
- return Character.isLetterOrDigit(ch) ||
- Character.getType(ch) == Character.LETTER_NUMBER;
+ return Character.isLetterOrDigit(ch) || Character.getType(ch) == Character.LETTER_NUMBER;
}
public boolean isdecimal() {
@@ -3010,14 +3542,16 @@
return _isdecimal(ch);
}
- if (n == 0)
+ if (n == 0) {
return false;
+ }
for (int i = 0; i < n; i++) {
char ch = getString().charAt(i);
- if (!_isdecimal(ch))
+ if (!_isdecimal(ch)) {
return false;
+ }
}
return true;
}
@@ -3036,17 +3570,20 @@
int n = getString().length();
/* Shortcut for single character strings */
- if (n == 1)
+ if (n == 1) {
return Character.isDigit(getString().charAt(0));
-
- if (n == 0)
+ }
+
+ if (n == 0) {
return false;
+ }
for (int i = 0; i < n; i++) {
char ch = getString().charAt(i);
- if (!Character.isDigit(ch))
+ if (!Character.isDigit(ch)) {
return false;
+ }
}
return true;
}
@@ -3060,25 +3597,27 @@
int n = getString().length();
/* Shortcut for single character strings */
- if (n == 1)
+ if (n == 1) {
return _isnumeric(getString().charAt(0));
-
- if (n == 0)
+ }
+
+ if (n == 0) {
return false;
+ }
for (int i = 0; i < n; i++) {
char ch = getString().charAt(i);
- if (!_isnumeric(ch))
+ if (!_isnumeric(ch)) {
return false;
+ }
}
return true;
}
private boolean _isnumeric(char ch) {
int type = Character.getType(ch);
- return type == Character.DECIMAL_DIGIT_NUMBER ||
- type == Character.LETTER_NUMBER ||
- type == Character.OTHER_NUMBER;
+ return type == Character.DECIMAL_DIGIT_NUMBER || type == Character.LETTER_NUMBER
+ || type == Character.OTHER_NUMBER;
}
public boolean istitle() {
@@ -3090,9 +3629,10 @@
int n = getString().length();
/* Shortcut for single character strings */
- if (n == 1)
- return Character.isTitleCase(getString().charAt(0)) ||
- Character.isUpperCase(getString().charAt(0));
+ if (n == 1) {
+ return Character.isTitleCase(getString().charAt(0))
+ || Character.isUpperCase(getString().charAt(0));
+ }
boolean cased = false;
boolean previous_is_cased = false;
@@ -3100,19 +3640,20 @@
char ch = getString().charAt(i);
if (Character.isUpperCase(ch) || Character.isTitleCase(ch)) {
- if (previous_is_cased)
+ if (previous_is_cased) {
return false;
+ }
previous_is_cased = true;
cased = true;
- }
- else if (Character.isLowerCase(ch)) {
- if (!previous_is_cased)
+ } else if (Character.isLowerCase(ch)) {
+ if (!previous_is_cased) {
return false;
+ }
previous_is_cased = true;
cased = true;
+ } else {
+ previous_is_cased = false;
}
- else
- previous_is_cased = false;
}
return cased;
}
@@ -3126,17 +3667,20 @@
int n = getString().length();
/* Shortcut for single character strings */
- if (n == 1)
+ if (n == 1) {
return Character.isWhitespace(getString().charAt(0));
-
- if (n == 0)
+ }
+
+ if (n == 0) {
return false;
+ }
for (int i = 0; i < n; i++) {
char ch = getString().charAt(i);
- if (!Character.isWhitespace(ch))
+ if (!Character.isWhitespace(ch)) {
return false;
+ }
}
return true;
}
@@ -3151,8 +3695,9 @@
int n = getString().length();
for (int i = 0; i < n; i++) {
char ch = getString().charAt(i);
- if (ch > 255)
+ if (ch > 255) {
return true;
+ }
}
return false;
}
@@ -3176,7 +3721,7 @@
String errors = ap.getString(1, null);
return encode(encoding, errors);
}
-
+
public PyObject decode() {
return decode(null, null);
}
@@ -3206,9 +3751,9 @@
final PyObject str__formatter_field_name_split() {
FieldNameIterator iterator = new FieldNameIterator(getString());
Object headObj = iterator.head();
- PyObject head = headObj instanceof Integer
- ? new PyInteger((Integer) headObj)
- : new PyString((String) headObj);
+ PyObject head =
+ headObj instanceof Integer ? new PyInteger((Integer)headObj) : new PyString(
+ (String)headObj);
return new PyTuple(head, iterator);
}
@@ -3221,7 +3766,8 @@
}
}
- protected String buildFormattedString(String value, PyObject[] args, String[] keywords, MarkupIterator enclosingIterator) {
+ protected String buildFormattedString(String value, PyObject[] args, String[] keywords,
+ MarkupIterator enclosingIterator) {
StringBuilder result = new StringBuilder();
MarkupIterator it = new MarkupIterator(value, enclosingIterator);
while (true) {
@@ -3244,8 +3790,10 @@
}
String formatSpec = chunk.formatSpec;
if (chunk.formatSpecNeedsExpanding) {
- if (enclosingIterator != null) // PEP 3101 says only 2 levels
+ if (enclosingIterator != null) {
+ // PEP 3101 says only 2 levels
throw Py.ValueError("Max string recursion exceeded");
+ }
formatSpec = buildFormattedString(formatSpec, args, keywords, it);
}
renderField(fieldObj, formatSpec, result);
@@ -3261,7 +3809,7 @@
int positionalCount = args.length - keywords.length;
if (head instanceof Integer) {
- int index = (Integer) head;
+ int index = (Integer)head;
if (index >= positionalCount) {
throw Py.IndexError("tuple index out of range");
}
@@ -3274,7 +3822,7 @@
}
}
if (obj == null) {
- throw Py.KeyError((String) head);
+ throw Py.KeyError((String)head);
}
}
if (obj != null) {
@@ -3284,11 +3832,11 @@
break;
}
if (chunk.is_attr) {
- obj = obj.__getattr__((String) chunk.value);
+ obj = obj.__getattr__((String)chunk.value);
} else {
- PyObject key = chunk.value instanceof String
- ? new PyString((String) chunk.value)
- : new PyInteger((Integer) chunk.value);
+ PyObject key =
+ chunk.value instanceof String ? new PyString((String)chunk.value)
+ : new PyInteger((Integer)chunk.value);
obj = obj.__getitem__(key);
}
if (obj == null) {
@@ -3315,7 +3863,7 @@
throw Py.TypeError("__format__ requires str or unicode");
}
- PyString formatSpecStr = (PyString) formatSpec;
+ PyString formatSpecStr = (PyString)formatSpec;
String result;
try {
String specString = formatSpecStr.getString();
@@ -3335,12 +3883,17 @@
* @return the result of the formatting
*/
public static String formatString(String text, InternalFormatSpec spec) {
- if (spec.sign != '\0')
+ if (spec.sign != '\0') {
throw new IllegalArgumentException("Sign not allowed in string format specifier");
- if (spec.alternate)
- throw new IllegalArgumentException("Alternate form (#) not allowed in string format specifier");
- if (spec.align == '=')
- throw new IllegalArgumentException("'=' alignment not allowed in string format specifier");
+ }
+ if (spec.alternate) {
+ throw new IllegalArgumentException(
+ "Alternate form (#) not allowed in string format specifier");
+ }
+ if (spec.align == '=') {
+ throw new IllegalArgumentException(
+ "'=' alignment not allowed in string format specifier");
+ }
if (spec.precision >= 0 && text.length() > spec.precision) {
text = text.substring(0, spec.precision);
}
@@ -3400,8 +3953,9 @@
}
}
-final class StringFormatter
-{
+
+final class StringFormatter {
+
int index;
String format;
StringBuilder buffer;
@@ -3435,30 +3989,31 @@
index = 0;
this.format = format;
this.unicodeCoercion = unicodeCoercion;
- buffer = new StringBuilder(format.length()+100);
+ buffer = new StringBuilder(format.length() + 100);
}
PyObject getarg() {
PyObject ret = null;
- switch(argIndex) {
- // special index indicating a mapping
- case -3:
- return args;
- // special index indicating a single item that has already been
- // used
- case -2:
- break;
+ switch (argIndex) {
+ // special index indicating a mapping
+ case -3:
+ return args;
+ // special index indicating a single item that has already been
+ // used
+ case -2:
+ break;
// special index indicating a single item that has not yet been
// used
- case -1:
- argIndex=-2;
- return args;
- default:
- ret = args.__finditem__(argIndex++);
- break;
+ case -1:
+ argIndex = -2;
+ return args;
+ default:
+ ret = args.__finditem__(argIndex++);
+ break;
}
- if (ret == null)
+ if (ret == null) {
throw Py.TypeError("not enough arguments for format string");
+ }
return ret;
}
@@ -3466,17 +4021,16 @@
char c = pop();
if (c == '*') {
PyObject o = getarg();
- if (o instanceof PyInteger)
+ if (o instanceof PyInteger) {
return ((PyInteger)o).getValue();
+ }
throw Py.TypeError("* wants int");
} else {
if (Character.isDigit(c)) {
- int numStart = index-1;
- while (Character.isDigit(c = pop()))
- ;
+ int numStart = index - 1;
+ while (Character.isDigit(c = pop())) {}
index -= 1;
- Integer i = Integer.valueOf(
- format.substring(numStart, index));
+ Integer i = Integer.valueOf(format.substring(numStart, index));
return i.intValue();
}
index -= 1;
@@ -3485,11 +4039,11 @@
}
private void checkPrecision(String type) {
- if(precision > 250) {
+ if (precision > 250) {
// A magic number. Larger than in CPython.
throw Py.OverflowError("formatted " + type + " is too long (precision too long?)");
}
-
+
}
private String formatLong(PyObject arg, char type, boolean altFlag) {
@@ -3512,11 +4066,13 @@
int ptr = 0;
int numnondigits = 0;
- if (type == 'x' || type == 'X')
+ if (type == 'x' || type == 'X') {
numnondigits = 2;
-
- if (s.endsWith("L"))
+ }
+
+ if (s.endsWith("L")) {
end--;
+ }
negative = s.charAt(0) == '-';
if (negative) {
@@ -3526,35 +4082,39 @@
int numdigits = end - numnondigits - ptr;
if (!altFlag) {
switch (type) {
- case 'o' :
- if (numdigits > 1) {
- ++ptr;
- --numdigits;
- }
- break;
- case 'x' :
- case 'X' :
- ptr += 2;
- numnondigits -= 2;
- break;
+ case 'o':
+ if (numdigits > 1) {
+ ++ptr;
+ --numdigits;
+ }
+ break;
+ case 'x':
+ case 'X':
+ ptr += 2;
+ numnondigits -= 2;
+ break;
}
}
if (precision > numdigits) {
StringBuilder buf = new StringBuilder();
- for (int i = 0; i < numnondigits; ++i)
+ for (int i = 0; i < numnondigits; ++i) {
buf.append(s.charAt(ptr++));
- for (int i = 0; i < precision - numdigits; i++)
+ }
+ for (int i = 0; i < precision - numdigits; i++) {
buf.append('0');
- for (int i = 0; i < numdigits; i++)
+ }
+ for (int i = 0; i < numdigits; i++) {
buf.append(s.charAt(ptr++));
+ }
s = buf.toString();
- } else if (end < s.length() || ptr > 0)
+ } else if (end < s.length() || ptr > 0) {
s = s.substring(ptr, end);
+ }
switch (type) {
- case 'X' :
- s = s.toUpperCase();
- break;
+ case 'X':
+ s = s.toUpperCase();
+ break;
}
return s;
}
@@ -3562,10 +4122,11 @@
/**
* Formats arg as an integer, with the specified radix
*
- * type and altFlag are needed to be passed to {@link #formatLong(PyObject, char, boolean)}
- * in case the result of <code>arg.__int__()</code> is a PyLong.
+ * type and altFlag are needed to be passed to {@link #formatLong(PyObject, char, boolean)} in
+ * case the result of <code>arg.__int__()</code> is a PyLong.
*/
- private String formatInteger(PyObject arg, int radix, boolean unsigned, char type, boolean altFlag) {
+ private String formatInteger(PyObject arg, int radix, boolean unsigned, char type,
+ boolean altFlag) {
PyObject argAsInt;
if (arg instanceof PyInteger || arg instanceof PyLong) {
argAsInt = arg;
@@ -3581,17 +4142,17 @@
// __int__ attribute). So, we would support strings as arguments
// for %d format, which is forbidden by CPython tests (on
// test_format.py).
- try {
+ try {
argAsInt = arg.__getattr__("__int__").__call__();
} catch (PyException e) {
// XXX: Swallow customs AttributeError throws from __float__ methods
// No better alternative for the moment
if (e.match(Py.AttributeError)) {
- throw Py.TypeError("int argument required");
- }
+ throw Py.TypeError("int argument required");
+ }
throw e;
- }
- }
+ }
+ }
}
if (argAsInt instanceof PyInteger) {
return formatInteger(((PyInteger)argAsInt).getValue(), radix, unsigned);
@@ -3603,8 +4164,9 @@
private String formatInteger(long v, int radix, boolean unsigned) {
checkPrecision("integer");
if (unsigned) {
- if (v < 0)
+ if (v < 0) {
v = 0x100000000l + v;
+ }
} else {
if (v < 0) {
negative = true;
@@ -3613,7 +4175,7 @@
}
String s = Long.toString(v, radix);
while (s.length() < precision) {
- s = "0"+s;
+ s = "0" + s;
}
return s;
}
@@ -3627,9 +4189,11 @@
}
static class DecimalFormatTemplate {
+
static DecimalFormat template;
static {
- template = new DecimalFormat("#,##0.#####", new DecimalFormatSymbols(java.util.Locale.US));
+ template =
+ new DecimalFormat("#,##0.#####", new DecimalFormatSymbols(java.util.Locale.US));
DecimalFormatSymbols symbols = template.getDecimalFormatSymbols();
symbols.setNaN("nan");
symbols.setInfinity("inf");
@@ -3637,6 +4201,7 @@
template.setGroupingUsed(false);
}
}
+
private static final DecimalFormat getDecimalFormat() {
return (DecimalFormat)DecimalFormatTemplate.template.clone();
}
@@ -3644,8 +4209,9 @@
private String formatFloatDecimal(double v, boolean truncate) {
checkPrecision("decimal");
int prec = precision;
- if (prec == -1)
+ if (prec == -1) {
prec = 6;
+ }
if (v < 0) {
v = -v;
negative = true;
@@ -3659,9 +4225,7 @@
return ret;
}
- private String formatFloatExponential(PyObject arg, char e,
- boolean truncate)
- {
+ private String formatFloatExponential(PyObject arg, char e, boolean truncate) {
StringBuilder buf = new StringBuilder();
double v = asDouble(arg);
boolean isNegative = false;
@@ -3670,24 +4234,24 @@
isNegative = true;
}
double power = 0.0;
- if (v > 0)
+ if (v > 0) {
power = ExtraMath.closeFloor(Math.log10(v));
- //System.err.println("formatExp: "+v+", "+power);
+ }
+ // System.err.println("formatExp: "+v+", "+power);
int savePrecision = precision;
precision = 2;
String exp = formatInteger((long)power, 10, false);
if (negative) {
negative = false;
- exp = '-'+exp;
- }
- else {
+ exp = '-' + exp;
+ } else {
exp = '+' + exp;
}
precision = savePrecision;
- double base = v/Math.pow(10, power);
+ double base = v / Math.pow(10, power);
buf.append(formatFloatDecimal(base, truncate));
buf.append(e);
@@ -3707,22 +4271,19 @@
} else {
// special index indicating a single item rather than a tuple
argIndex = -1;
- if (args instanceof PyDictionary ||
- args instanceof PyStringMap ||
- (!(args instanceof PySequence) &&
- args.__findattr__("__getitem__") != null))
- {
+ if (args instanceof PyDictionary || args instanceof PyStringMap
+ || (!(args instanceof PySequence) && args.__findattr__("__getitem__") != null)) {
dict = args;
argIndex = -3;
}
}
while (index < format.length()) {
- boolean ljustFlag=false;
- boolean signFlag=false;
- boolean blankFlag=false;
- boolean altFlag=false;
- boolean zeroFlag=false;
+ boolean ljustFlag = false;
+ boolean signFlag = false;
+ boolean blankFlag = false;
+ boolean altFlag = false;
+ boolean zeroFlag = false;
int width = -1;
precision = -1;
@@ -3734,29 +4295,41 @@
}
c = pop();
if (c == '(') {
- if (dict == null)
+ if (dict == null) {
throw Py.TypeError("format requires a mapping");
+ }
int parens = 1;
int keyStart = index;
while (parens > 0) {
c = pop();
- if (c == ')')
+ if (c == ')') {
parens--;
- else if (c == '(')
+ } else if (c == '(') {
parens++;
+ }
}
- String tmp = format.substring(keyStart, index-1);
+ String tmp = format.substring(keyStart, index - 1);
this.args = dict.__getitem__(needUnicode ? new PyUnicode(tmp) : new PyString(tmp));
} else {
push();
}
while (true) {
switch (c = pop()) {
- case '-': ljustFlag=true; continue;
- case '+': signFlag=true; continue;
- case ' ': blankFlag=true; continue;
- case '#': altFlag=true; continue;
- case '0': zeroFlag=true; continue;
+ case '-':
+ ljustFlag = true;
+ continue;
+ case '+':
+ signFlag = true;
+ continue;
+ case ' ':
+ blankFlag = true;
+ continue;
+ case '#':
+ altFlag = true;
+ continue;
+ case '0':
+ zeroFlag = true;
+ continue;
}
break;
}
@@ -3769,8 +4342,9 @@
c = pop();
if (c == '.') {
precision = getNumber();
- if (precision < -1)
+ if (precision < -1) {
precision = 0;
+ }
c = pop();
}
@@ -3783,174 +4357,181 @@
}
PyObject arg = getarg();
char fill = ' ';
- String string=null;
+ String string = null;
negative = false;
- if (zeroFlag)
+ if (zeroFlag) {
fill = '0';
- else
+ } else {
fill = ' ';
- switch(c) {
- case 's':
- if (arg instanceof PyUnicode) {
- needUnicode = true;
- }
- case 'r':
- fill = ' ';
- if (c == 's')
- if (needUnicode)
- string = arg.__unicode__().toString();
- else
- string = arg.__str__().toString();
- else
- string = arg.__repr__().toString();
- if (precision >= 0 && string.length() > precision) {
- string = string.substring(0, precision);
- }
-
- break;
- case 'i':
- case 'd':
- if (arg instanceof PyLong)
- string = formatLong(arg, c, altFlag);
- else
- string = formatInteger(arg, 10, false, c, altFlag);
- break;
- case 'u':
- if (arg instanceof PyLong)
- string = formatLong(arg, c, altFlag);
- else if (arg instanceof PyInteger || arg instanceof PyFloat)
- string = formatInteger(arg, 10, false, c, altFlag);
- else throw Py.TypeError("int argument required");
- break;
- case 'o':
- if (arg instanceof PyLong)
- string = formatLong(arg, c, altFlag);
- else if (arg instanceof PyInteger || arg instanceof PyFloat) {
- string = formatInteger(arg, 8, false, c, altFlag);
- if (altFlag && string.charAt(0) != '0') {
- string = "0" + string;
- }
- }
- else throw Py.TypeError("int argument required");
- break;
- case 'x':
- if (arg instanceof PyLong)
- string = formatLong(arg, c, altFlag);
- else if (arg instanceof PyInteger || arg instanceof PyFloat) {
- string = formatInteger(arg, 16, false, c, altFlag);
- string = string.toLowerCase();
- if (altFlag) {
- string = "0x" + string;
- }
- }
- else throw Py.TypeError("int argument required");
- break;
- case 'X':
- if (arg instanceof PyLong)
- string = formatLong(arg, c, altFlag);
- else if (arg instanceof PyInteger || arg instanceof PyFloat) {
- string = formatInteger(arg, 16, false, c, altFlag);
- string = string.toUpperCase();
- if (altFlag) {
- string = "0X" + string;
- }
- }
- else throw Py.TypeError("int argument required");
- break;
- case 'e':
- case 'E':
- string = formatFloatExponential(arg, c, false);
- if (c == 'E') {
- string = string.toUpperCase();
- }
- break;
- case 'f':
- case 'F':
- string = formatFloatDecimal(asDouble(arg), false);
- if (c == 'F') {
- string = string.toUpperCase();
- }
- break;
- case 'g':
- case 'G':
- int origPrecision = precision;
- if (precision == -1) {
- precision = 6;
- }
-
- double v = asDouble(arg);
- int exponent = (int)ExtraMath.closeFloor(Math.log10(Math.abs(v == 0 ? 1 : v)));
- if (v == Double.POSITIVE_INFINITY) {
- string = "inf";
- } else if (v == Double.NEGATIVE_INFINITY) {
- string = "-inf";
- } else if (exponent >= -4 && exponent < precision) {
- precision -= exponent + 1;
- string = formatFloatDecimal(v, !altFlag);
-
- // XXX: this block may be unnecessary now
- if (altFlag && string.indexOf('.') == -1) {
- int zpad = origPrecision - string.length();
- string += '.';
- if (zpad > 0) {
- char zeros[] = new char[zpad];
- for (int ci=0; ci<zpad; zeros[ci++] = '0')
- ;
- string += new String(zeros);
- }
- }
- } else {
- // Exponential precision is the number of digits after the decimal
- // point, whereas 'g' precision is the number of significant digits --
- // and exponential always provides one significant digit before the
- // decimal point
- precision--;
- string = formatFloatExponential(arg, (char)(c-2), !altFlag);
- }
- if (c == 'G') {
- string = string.toUpperCase();
- }
- break;
- case 'c':
- fill = ' ';
- if (arg instanceof PyString) {
- string = ((PyString)arg).toString();
- if (string.length() != 1) {
- throw Py.TypeError("%c requires int or char");
- }
+ }
+ switch (c) {
+ case 's':
if (arg instanceof PyUnicode) {
needUnicode = true;
}
+ case 'r':
+ fill = ' ';
+ if (c == 's') {
+ if (needUnicode) {
+ string = arg.__unicode__().toString();
+ } else {
+ string = arg.__str__().toString();
+ }
+ } else {
+ string = arg.__repr__().toString();
+ }
+ if (precision >= 0 && string.length() > precision) {
+ string = string.substring(0, precision);
+ }
+
break;
- }
- int val;
- try {
- // Explicitly __int__ so we can look for an AttributeError (which is
- // less invasive to mask than a TypeError)
- val = arg.__int__().asInt();
- } catch (PyException e){
- if (e.match(Py.AttributeError)) {
- throw Py.TypeError("%c requires int or char");
+ case 'i':
+ case 'd':
+ if (arg instanceof PyLong) {
+ string = formatLong(arg, c, altFlag);
+ } else {
+ string = formatInteger(arg, 10, false, c, altFlag);
}
- throw e;
- }
- if (!needUnicode) {
- if (val < 0) {
- throw Py.OverflowError("unsigned byte integer is less than minimum");
- } else if (val > 255) {
- throw Py.OverflowError("unsigned byte integer is greater than maximum");
+ break;
+ case 'u':
+ if (arg instanceof PyLong) {
+ string = formatLong(arg, c, altFlag);
+ } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
+ string = formatInteger(arg, 10, false, c, altFlag);
+ } else {
+ throw Py.TypeError("int argument required");
}
- } else if (val < 0 || val > PySystemState.maxunicode) {
- throw Py.OverflowError("%c arg not in range(0x110000) (wide Python build)");
- }
- string = new String(new int[] {val}, 0, 1);
- break;
-
- default:
- throw Py.ValueError("unsupported format character '" +
- codecs.encode(Py.newString(c), null, "replace") +
- "' (0x" + Integer.toHexString(c) + ") at index " +
- (index-1));
+ break;
+ case 'o':
+ if (arg instanceof PyLong) {
+ string = formatLong(arg, c, altFlag);
+ } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
+ string = formatInteger(arg, 8, false, c, altFlag);
+ if (altFlag && string.charAt(0) != '0') {
+ string = "0" + string;
+ }
+ } else {
+ throw Py.TypeError("int argument required");
+ }
+ break;
+ case 'x':
+ if (arg instanceof PyLong) {
+ string = formatLong(arg, c, altFlag);
+ } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
+ string = formatInteger(arg, 16, false, c, altFlag);
+ string = string.toLowerCase();
+ if (altFlag) {
+ string = "0x" + string;
+ }
+ } else {
+ throw Py.TypeError("int argument required");
+ }
+ break;
+ case 'X':
+ if (arg instanceof PyLong) {
+ string = formatLong(arg, c, altFlag);
+ } else if (arg instanceof PyInteger || arg instanceof PyFloat) {
+ string = formatInteger(arg, 16, false, c, altFlag);
+ string = string.toUpperCase();
+ if (altFlag) {
+ string = "0X" + string;
+ }
+ } else {
+ throw Py.TypeError("int argument required");
+ }
+ break;
+ case 'e':
+ case 'E':
+ string = formatFloatExponential(arg, c, false);
+ if (c == 'E') {
+ string = string.toUpperCase();
+ }
+ break;
+ case 'f':
+ case 'F':
+ string = formatFloatDecimal(asDouble(arg), false);
+ if (c == 'F') {
+ string = string.toUpperCase();
+ }
+ break;
+ case 'g':
+ case 'G':
+ int origPrecision = precision;
+ if (precision == -1) {
+ precision = 6;
+ }
+
+ double v = asDouble(arg);
+ int exponent = (int)ExtraMath.closeFloor(Math.log10(Math.abs(v == 0 ? 1 : v)));
+ if (v == Double.POSITIVE_INFINITY) {
+ string = "inf";
+ } else if (v == Double.NEGATIVE_INFINITY) {
+ string = "-inf";
+ } else if (exponent >= -4 && exponent < precision) {
+ precision -= exponent + 1;
+ string = formatFloatDecimal(v, !altFlag);
+
+ // XXX: this block may be unnecessary now
+ if (altFlag && string.indexOf('.') == -1) {
+ int zpad = origPrecision - string.length();
+ string += '.';
+ if (zpad > 0) {
+ char zeros[] = new char[zpad];
+ for (int ci = 0; ci < zpad; zeros[ci++] = '0') {}
+ string += new String(zeros);
+ }
+ }
+ } else {
+ // Exponential precision is the number of digits after the decimal
+ // point, whereas 'g' precision is the number of significant digits --
+ // and exponential always provides one significant digit before the
+ // decimal point
+ precision--;
+ string = formatFloatExponential(arg, (char)(c - 2), !altFlag);
+ }
+ if (c == 'G') {
+ string = string.toUpperCase();
+ }
+ break;
+ case 'c':
+ fill = ' ';
+ if (arg instanceof PyString) {
+ string = ((PyString)arg).toString();
+ if (string.length() != 1) {
+ throw Py.TypeError("%c requires int or char");
+ }
+ if (arg instanceof PyUnicode) {
+ needUnicode = true;
+ }
+ break;
+ }
+ int val;
+ try {
+ // Explicitly __int__ so we can look for an AttributeError (which is
+ // less invasive to mask than a TypeError)
+ val = arg.__int__().asInt();
+ } catch (PyException e) {
+ if (e.match(Py.AttributeError)) {
+ throw Py.TypeError("%c requires int or char");
+ }
+ throw e;
+ }
+ if (!needUnicode) {
+ if (val < 0) {
+ throw Py.OverflowError("unsigned byte integer is less than minimum");
+ } else if (val > 255) {
+ throw Py.OverflowError("unsigned byte integer is greater than maximum");
+ }
+ } else if (val < 0 || val > PySystemState.maxunicode) {
+ throw Py.OverflowError("%c arg not in range(0x110000) (wide Python build)");
+ }
+ string = new String(new int[] {val}, 0, 1);
+ break;
+
+ default:
+ throw Py.ValueError("unsupported format character '"
+ + codecs.encode(Py.newString(c), null, "replace") + "' (0x"
+ + Integer.toHexString(c) + ") at index " + (index - 1));
}
int length = string.length();
int skip = 0;
@@ -3965,13 +4546,16 @@
}
}
- if (width < length)
+ if (width < length) {
width = length;
+ }
if (signString != null) {
- if (fill != ' ')
+ if (fill != ' ') {
buffer.append(signString);
- if (width > length)
+ }
+ if (width > length) {
width--;
+ }
}
if (altFlag && (c == 'x' || c == 'X')) {
if (fill != ' ') {
@@ -3980,8 +4564,9 @@
skip += 2;
}
width -= 2;
- if (width < 0)
+ if (width < 0) {
width = 0;
+ }
length -= 2;
}
if (width > length && !ljustFlag) {
@@ -3990,26 +4575,26 @@
} while (--width > length);
}
if (fill == ' ') {
- if (signString != null)
+ if (signString != null) {
buffer.append(signString);
+ }
if (altFlag && (c == 'x' || c == 'X')) {
buffer.append('0');
buffer.append(c);
skip += 2;
}
}
- if (skip > 0)
+ if (skip > 0) {
buffer.append(string.substring(skip));
- else
+ } else {
buffer.append(string);
+ }
while (--width >= length) {
buffer.append(' ');
}
}
- if (argIndex == -1 ||
- (argIndex >= 0 && args.__finditem__(argIndex) != null))
- {
+ if (argIndex == -1 || (argIndex >= 0 && args.__finditem__(argIndex) != null)) {
throw Py.TypeError("not all arguments converted during string formatting");
}
if (needUnicode) {
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -24,6 +24,7 @@
UNKNOWN, BASIC, ASTRAL
}
+
private volatile Plane plane = Plane.UNKNOWN;
private volatile int codePointCount = -1;
public static final PyType TYPE = PyType.fromClass(PyUnicode.class);
@@ -51,7 +52,8 @@
}
public PyUnicode(PyType subtype, PyString pystring) {
- this(subtype, pystring instanceof PyUnicode ? pystring.string : pystring.decode().toString());
+ this(subtype, pystring instanceof PyUnicode ? pystring.string : pystring.decode()
+ .toString());
}
public PyUnicode(char c) {
@@ -59,7 +61,7 @@
}
public PyUnicode(int codepoint) {
- this(TYPE, new String(new int[]{codepoint}, 0, 1));
+ this(TYPE, new String(new int[] {codepoint}, 0, 1));
}
public PyUnicode(int[] codepoints) {
@@ -109,8 +111,8 @@
}
/**
- * Creates a PyUnicode from an already interned String. Just means it won't
- * be reinterned if used in a place that requires interned Strings.
+ * Creates a PyUnicode from an already interned String. Just means it won't be reinterned if
+ * used in a place that requires interned Strings.
*/
public static PyUnicode fromInterned(String interned) {
PyUnicode uni = new PyUnicode(TYPE, interned);
@@ -129,9 +131,9 @@
// RETAIN THE BELOW CODE, it facilitates testing astral support more completely
-// public boolean isBasicPlane() {
-// return false;
-// }
+// public boolean isBasicPlane() {
+// return false;
+// }
// END RETAIN
@@ -146,14 +148,9 @@
@ExposedNew
final static PyObject unicode_new(PyNewWrapper new_, boolean init, PyType subtype,
PyObject[] args, String[] keywords) {
- ArgParser ap = new ArgParser("unicode",
- args,
- keywords,
- new String[]{"string",
- "encoding",
- "errors"
- },
- 0);
+ ArgParser ap =
+ new ArgParser("unicode", args, keywords, new String[] {"string", "encoding",
+ "errors"}, 0);
PyObject S = ap.getPyObject(0, null);
String encoding = ap.getString(1, null);
String errors = ap.getString(2, null);
@@ -162,18 +159,18 @@
return new PyUnicode("");
}
if (S instanceof PyUnicode) {
- return new PyUnicode(((PyUnicode) S).getString());
+ return new PyUnicode(((PyUnicode)S).getString());
}
if (S instanceof PyString) {
if (S.getType() != PyString.TYPE && encoding == null && errors == null) {
return S.__unicode__();
}
- PyObject decoded = codecs.decode((PyString) S, encoding, errors);
+ PyObject decoded = codecs.decode((PyString)S, encoding, errors);
if (decoded instanceof PyUnicode) {
- return new PyUnicode((PyUnicode) decoded);
+ return new PyUnicode((PyUnicode)decoded);
} else {
- throw Py.TypeError("decoder did not return an unicode object (type=" +
- decoded.getType().fastGetName() + ")");
+ throw Py.TypeError("decoder did not return an unicode object (type="
+ + decoded.getType().fastGetName() + ")");
}
}
return S.__unicode__();
@@ -182,7 +179,7 @@
return new PyUnicodeDerived(subtype, Py.EmptyString);
}
if (S instanceof PyUnicode) {
- return new PyUnicodeDerived(subtype, (PyUnicode) S);
+ return new PyUnicodeDerived(subtype, (PyUnicode)S);
} else {
return new PyUnicodeDerived(subtype, S.__str__());
}
@@ -316,7 +313,7 @@
private class SubsequenceIteratorImpl implements Iterator {
- private int current, k, start, stop, step;
+ private int current, k, start, stop, step;
SubsequenceIteratorImpl(int start, int stop, int step) {
k = 0;
@@ -333,10 +330,12 @@
this(0, getCodePointCount(), 1);
}
+ @Override
public boolean hasNext() {
return current < stop;
}
+ @Override
public Object next() {
int codePoint = nextCodePoint();
current += 1;
@@ -361,8 +360,10 @@
return U;
}
+ @Override
public void remove() {
- throw new UnsupportedOperationException("Not supported on PyUnicode objects (immutable)");
+ throw new UnsupportedOperationException(
+ "Not supported on PyUnicode objects (immutable)");
}
}
@@ -390,10 +391,12 @@
}
}
+ @Override
public boolean hasNext() {
return lookahead != null;
}
+ @Override
public T next() {
T old = lookahead;
if (iter.hasNext()) {
@@ -407,6 +410,7 @@
return old;
}
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
@@ -419,10 +423,8 @@
public Iterator<Integer> newSubsequenceIterator(int start, int stop, int step) {
if (step < 0) {
- return new SteppedIterator(step * -1,
- new ReversedIterator(new SubsequenceIteratorImpl(stop + 1,
- start + 1,
- 1)));
+ return new SteppedIterator(step * -1, new ReversedIterator(new SubsequenceIteratorImpl(
+ stop + 1, start + 1, 1)));
} else {
return new SubsequenceIteratorImpl(start, stop, step);
}
@@ -430,30 +432,47 @@
/**
* Helper used many times to "coerce" a method argument into a <code>PyUnicode</code> (which it
+ * may already be). A <code>null</code> or incoercible argument will raise a
+ * <code>TypeError</code>.
+ *
+ * @param o the object to coerce
+ * @return an equivalent <code>PyUnicode</code> (or o itself)
+ */
+ private PyUnicode coerceToUnicode(PyObject o) {
+ if (o instanceof PyUnicode) {
+ return (PyUnicode)o;
+ } else if (o instanceof BufferProtocol) {
+ // PyString or PyByteArray, PyMemoryView, Py2kBuffer ...
+ PyBuffer buf = ((BufferProtocol)o).getBuffer(PyBUF.FULL_RO);
+ try {
+ return new PyUnicode(buf.toString());
+ } finally {
+ buf.release();
+ }
+ } else {
+ // o is some type not allowed:
+ if (o == null) {
+ // Do something safe and approximately right
+ o = Py.None;
+ }
+ throw Py.TypeError("coercing to Unicode: need string or buffer, "
+ + o.getType().fastGetName() + " found");
+ }
+ }
+
+ /**
+ * Helper used many times to "coerce" a method argument into a <code>PyUnicode</code> (which it
* may already be). A <code>null</code> argument or a <code>PyNone</code> causes
* <code>null</code> to be returned.
*
* @param o the object to coerce
* @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
*/
- private PyUnicode coerceToUnicode(PyObject o) {
- if (o == null) {
+ private PyUnicode coerceToUnicodeOrNull(PyObject o) {
+ if (o == null || o == Py.None) {
return null;
- } else if (o instanceof PyUnicode) {
- return (PyUnicode)o;
- } else if (o == Py.None) {
- return null;
- } else if (o instanceof BufferProtocol) {
- // PyString or PyByteArray, PyMemoryView, Py2kBuffer ...
- PyBuffer buf = ((BufferProtocol)o).getBuffer(PyBUF.FULL_RO);
- try {
- return new PyUnicode(buf.toString());
- } finally {
- buf.release();
- }
} else {
- throw Py.TypeError("coercing to Unicode: need string or buffer, "
- + o.getType().fastGetName() + " found");
+ return coerceToUnicode(o);
}
}
@@ -476,7 +495,7 @@
public PyObject __add__(PyObject other) {
return unicode___add__(other);
}
-
+
@ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___getslice___doc)
final PyObject unicode___add__(PyObject other) {
PyUnicode otherUnicode;
@@ -515,9 +534,8 @@
buffer.appendCodePoint(Character.toTitleCase(codePoint));
}
- if (Character.isLowerCase(codePoint) ||
- Character.isUpperCase(codePoint) ||
- Character.isTitleCase(codePoint)) {
+ if (Character.isLowerCase(codePoint) || Character.isUpperCase(codePoint)
+ || Character.isTitleCase(codePoint)) {
previous_is_cased = true;
} else {
previous_is_cased = false;
@@ -575,10 +593,12 @@
}
}
+ @Override
public boolean hasNext() {
return lookahead != -1;
}
+ @Override
public Object next() {
int old = lookahead;
if (iter.hasNext()) {
@@ -589,6 +609,7 @@
return old;
}
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
@@ -596,13 +617,23 @@
// compliance requires that we need to support a bit of inconsistency
// compared to other coercion used
+ /**
+ * Helper used in <code>.strip()</code> to "coerce" a method argument into a
+ * <code>PyUnicode</code> (which it may already be). A <code>null</code> argument or a
+ * <code>PyNone</code> causes <code>null</code> to be returned. A buffer type is not acceptable
+ * to (Unicode) <code>.strip()</code>. This is the difference from
+ * {@link #coerceToUnicodeOrNull(PyObject)}.
+ *
+ * @param o the object to coerce
+ * @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
+ */
private PyUnicode coerceStripSepToUnicode(PyObject o) {
if (o == null) {
return null;
} else if (o instanceof PyUnicode) {
- return (PyUnicode) o;
+ return (PyUnicode)o;
} else if (o instanceof PyString) {
- return new PyUnicode(((PyString) o).decode().toString());
+ return new PyUnicode(((PyString)o).decode().toString());
} else if (o == Py.None) {
return null;
} else {
@@ -679,10 +710,11 @@
@ExposedMethod(doc = BuiltinDocs.unicode_partition_doc)
final PyTuple unicode_partition(PyObject sep) {
- return unicodePartition(sep);
+ return unicodePartition(coerceToUnicode(sep));
}
private abstract class SplitIterator implements Iterator {
+
protected final int maxsplit;
protected final Iterator<Integer> iter = newSubsequenceIterator();
protected final LinkedList<Integer> lookahead = new LinkedList<Integer>();
@@ -693,9 +725,10 @@
this.maxsplit = maxsplit;
}
+ @Override
public boolean hasNext() {
- return lookahead.peek() != null ||
- (iter.hasNext() && (maxsplit == -1 || numSplits <= maxsplit));
+ return lookahead.peek() != null
+ || (iter.hasNext() && (maxsplit == -1 || numSplits <= maxsplit));
}
protected void addLookahead(StringBuilder buffer) {
@@ -705,6 +738,7 @@
lookahead.clear();
}
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
@@ -720,6 +754,7 @@
super(maxsplit);
}
+ @Override
public PyUnicode next() {
StringBuilder buffer = new StringBuilder();
@@ -770,16 +805,19 @@
return lookahead;
}
+ @Override
public boolean hasNext() {
return lookahead != null;
}
+ @Override
public T next() {
T peeked = lookahead;
lookahead = iter.hasNext() ? iter.next() : null;
return peeked;
}
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
@@ -798,14 +836,17 @@
this.iter = reversed.iterator();
}
+ @Override
public boolean hasNext() {
return iter.hasNext();
}
+ @Override
public T next() {
return iter.next();
}
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
@@ -820,10 +861,12 @@
this.keepends = keepends;
}
+ @Override
public boolean hasNext() {
return iter.hasNext();
}
+ @Override
public Object next() {
StringBuilder buffer = new StringBuilder();
while (iter.hasNext()) {
@@ -836,8 +879,8 @@
iter.next();
}
break;
- } else if (codepoint == '\n' || codepoint == '\r' ||
- Character.getType(codepoint) == Character.LINE_SEPARATOR) {
+ } else if (codepoint == '\n' || codepoint == '\r'
+ || Character.getType(codepoint) == Character.LINE_SEPARATOR) {
if (keepends) {
buffer.appendCodePoint(codepoint);
}
@@ -849,6 +892,7 @@
return new PyUnicode(buffer);
}
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
@@ -863,6 +907,7 @@
this.sep = sep;
}
+ @Override
public PyUnicode next() {
StringBuilder buffer = new StringBuilder();
@@ -878,8 +923,7 @@
while (iter.hasNext()) {
// TODO: should cache the first codepoint
inSeparator = true;
- for (Iterator<Integer> sepIter = sep.newSubsequenceIterator();
- sepIter.hasNext();) {
+ for (Iterator<Integer> sepIter = sep.newSubsequenceIterator(); sepIter.hasNext();) {
int codepoint = iter.next();
if (codepoint != sepIter.next()) {
addLookahead(buffer);
@@ -920,12 +964,12 @@
@ExposedMethod(doc = BuiltinDocs.unicode_rpartition_doc)
final PyTuple unicode_rpartition(PyObject sep) {
- return unicodeRpartition(sep);
+ return unicodeRpartition(coerceToUnicode(sep));
}
@ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_split_doc)
final PyList unicode_split(PyObject sepObj, int maxsplit) {
- PyUnicode sep = coerceToUnicode(sepObj);
+ PyUnicode sep = coerceToUnicodeOrNull(sepObj);
if (sep != null) {
return _split(sep.getString(), maxsplit);
} else {
@@ -935,7 +979,7 @@
@ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_rsplit_doc)
final PyList unicode_rsplit(PyObject sepObj, int maxsplit) {
- PyUnicode sep = coerceToUnicode(sepObj);
+ PyUnicode sep = coerceToUnicodeOrNull(sepObj);
if (sep != null) {
return _rsplit(sep.getString(), maxsplit);
} else {
@@ -954,33 +998,37 @@
@Override
protected PyString fromSubstring(int begin, int end) {
- assert(isBasicPlane()); // can only be used on a codepath from str_ equivalents
+ assert (isBasicPlane()); // can only be used on a codepath from str_ equivalents
return new PyUnicode(getString().substring(begin, end));
}
- @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
- final int unicode_index(String sub, PyObject start, PyObject end) {
- return str_index(sub, start, end);
+ @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_index_doc)
+ final int unicode_index(PyObject subObj, PyObject start, PyObject end) {
+ final PyUnicode sub = coerceToUnicode(subObj);
+ // Now use the mechanics of the PyString on the UTF-16 of the PyUnicode.
+ return checkIndex(_find(sub.getString(), start, end));
}
- @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
- final int unicode_rindex(String sub, PyObject start, PyObject end) {
- return str_rindex(sub, start, end);
+ @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_index_doc)
+ final int unicode_rindex(PyObject subObj, PyObject start, PyObject end) {
+ final PyUnicode sub = coerceToUnicode(subObj);
+ // Now use the mechanics of the PyString on the UTF-16 of the PyUnicode.
+ return checkIndex(_rfind(sub.getString(), start, end));
}
- @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
+ @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_count_doc)
final int unicode_count(PyObject subObj, PyObject start, PyObject end) {
final PyUnicode sub = coerceToUnicode(subObj);
if (isBasicPlane()) {
- return str_count(sub.getString(), start, end);
+ return _count(sub.getString(), start, end);
}
int[] indices = translateIndices(start, end);
int count = 0;
- for (Iterator<Integer> mainIter = newSubsequenceIterator(indices[0], indices[1], 1);
- mainIter.hasNext();) {
+ for (Iterator<Integer> mainIter = newSubsequenceIterator(indices[0], indices[1], 1); mainIter
+ .hasNext();) {
int matched = sub.getCodePointCount();
- for (Iterator<Integer> subIter = sub.newSubsequenceIterator();
- mainIter.hasNext() && subIter.hasNext();) {
+ for (Iterator<Integer> subIter = sub.newSubsequenceIterator(); mainIter.hasNext()
+ && subIter.hasNext();) {
if (mainIter.next() != subIter.next()) {
break;
}
@@ -994,32 +1042,35 @@
return count;
}
- @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
- final int unicode_find(String sub, PyObject start, PyObject end) {
- return str_find(sub, start, end);
+ @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_find_doc)
+ final int unicode_find(PyObject subObj, PyObject start, PyObject end) {
+ return _find(coerceToUnicode(subObj).getString(), start, end);
}
- @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
- final int unicode_rfind(String sub, PyObject start, PyObject end) {
- return str_rfind(sub, start, end);
+ @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_rfind_doc)
+ final int unicode_rfind(PyObject subObj, PyObject start, PyObject end) {
+ return _rfind(coerceToUnicode(subObj).getString(), start, end);
}
private static String padding(int n, int pad) {
StringBuilder buffer = new StringBuilder(n);
- for (int i=0; i<n; i++)
+ for (int i = 0; i < n; i++) {
buffer.appendCodePoint(pad);
+ }
return buffer.toString();
}
private static int parse_fillchar(String function, String fillchar) {
- if (fillchar == null) { return ' '; }
+ if (fillchar == null) {
+ return ' ';
+ }
if (fillchar.codePointCount(0, fillchar.length()) != 1) {
throw Py.TypeError(function + "() argument 2 must be char, not str");
}
return fillchar.codePointAt(0);
}
- @ExposedMethod(defaults="null", doc = BuiltinDocs.unicode___getslice___doc)
+ @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode___getslice___doc)
final PyObject unicode_ljust(int width, String padding) {
int n = width - getCodePointCount();
if (n <= 0) {
@@ -1029,7 +1080,7 @@
}
}
- @ExposedMethod(defaults="null", doc = BuiltinDocs.unicode___getslice___doc)
+ @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode___getslice___doc)
final PyObject unicode_rjust(int width, String padding) {
int n = width - getCodePointCount();
if (n <= 0) {
@@ -1039,7 +1090,7 @@
}
}
- @ExposedMethod(defaults="null", doc = BuiltinDocs.unicode___getslice___doc)
+ @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode___getslice___doc)
final PyObject unicode_center(int width, String padding) {
int n = width - getCodePointCount();
if (n <= 0) {
@@ -1049,7 +1100,7 @@
if (n % 2 > 0 && width % 2 > 0) {
half += 1;
}
- int pad = parse_fillchar("center", padding);
+ int pad = parse_fillchar("center", padding);
return new PyUnicode(padding(half, pad) + getString() + padding(n - half, pad));
}
@@ -1118,43 +1169,50 @@
return new PyUnicode(buffer);
}
- @ExposedMethod(defaults = "-1", doc = BuiltinDocs.unicode___getslice___doc)
- final PyObject unicode_replace(PyObject oldPieceObj, PyObject newPieceObj, int maxsplit) {
+ @ExposedMethod(defaults = "-1", doc = BuiltinDocs.unicode_replace_doc)
+ final PyString unicode_replace(PyObject oldPieceObj, PyObject newPieceObj, int count) {
+
+ // Convert other argument types to PyUnicode (or error)
PyUnicode newPiece = coerceToUnicode(newPieceObj);
PyUnicode oldPiece = coerceToUnicode(oldPieceObj);
+
if (isBasicPlane() && newPiece.isBasicPlane() && oldPiece.isBasicPlane()) {
- return replace(oldPiece, newPiece, maxsplit);
- }
+ // Use the mechanics of PyString, since all is basic plane
+ return _replace(oldPiece.getString(), newPiece.getString(), count);
- StringBuilder buffer = new StringBuilder();
+ } else {
+ // A Unicode-specific implementation is needed working in code points
+ StringBuilder buffer = new StringBuilder();
- if (oldPiece.getCodePointCount() == 0) {
- Iterator<Integer> iter = newSubsequenceIterator();
- for (int i = 1; (maxsplit == -1 || i < maxsplit) && iter.hasNext(); i++) {
- if (i == 1) {
+ if (oldPiece.getCodePointCount() == 0) {
+ Iterator<Integer> iter = newSubsequenceIterator();
+ for (int i = 1; (count == -1 || i < count) && iter.hasNext(); i++) {
+ if (i == 1) {
+ buffer.append(newPiece.getString());
+ }
+ buffer.appendCodePoint(iter.next());
buffer.append(newPiece.getString());
}
- buffer.appendCodePoint(iter.next());
- buffer.append(newPiece.getString());
- }
- while (iter.hasNext()) {
- buffer.appendCodePoint(iter.next());
- }
- return new PyUnicode(buffer);
- } else {
- SplitIterator iter = newSplitIterator(oldPiece, maxsplit);
- int numSplits = 0;
- while (iter.hasNext()) {
- buffer.append(((PyUnicode) iter.next()).getString());
- if (iter.hasNext()) {
+ while (iter.hasNext()) {
+ buffer.appendCodePoint(iter.next());
+ }
+ return new PyUnicode(buffer);
+
+ } else {
+ SplitIterator iter = newSplitIterator(oldPiece, count);
+ int numSplits = 0;
+ while (iter.hasNext()) {
+ buffer.append(((PyUnicode)iter.next()).getString());
+ if (iter.hasNext()) {
+ buffer.append(newPiece.getString());
+ }
+ numSplits++;
+ }
+ if (iter.getEndsWithSeparator() && (count == -1 || numSplits <= count)) {
buffer.append(newPiece.getString());
}
- numSplits++;
+ return new PyUnicode(buffer);
}
- if (iter.getEndsWithSeparator() && (maxsplit == -1 || numSplits <= maxsplit)) {
- buffer.append(newPiece.getString());
- }
- return new PyUnicode(buffer);
}
}
@@ -1169,13 +1227,15 @@
return unicodeJoin(seq);
}
- @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
+ @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_startswith_doc)
final boolean unicode_startswith(PyObject prefix, PyObject start, PyObject end) {
+ // FIXME: slice indexing logic incorrect when this is ASTRAL
return str_startswith(prefix, start, end);
}
- @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode___getslice___doc)
+ @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_endswith_doc)
final boolean unicode_endswith(PyObject suffix, PyObject start, PyObject end) {
+ // FIXME: slice indexing logic incorrect when this is ASTRAL
return str_endswith(suffix, start, end);
}
@@ -1246,8 +1306,7 @@
}
for (Iterator<Integer> iter = newSubsequenceIterator(); iter.hasNext();) {
int codePoint = iter.next();
- if (!(Character.isLetterOrDigit(codePoint) ||
- Character.getType(codePoint) == Character.LETTER_NUMBER)) {
+ if (!(Character.isLetterOrDigit(codePoint) || Character.getType(codePoint) == Character.LETTER_NUMBER)) {
return false;
}
}
@@ -1296,9 +1355,8 @@
}
for (Iterator<Integer> iter = newSubsequenceIterator(); iter.hasNext();) {
int type = Character.getType(iter.next());
- if (type != Character.DECIMAL_DIGIT_NUMBER &&
- type != Character.LETTER_NUMBER &&
- type != Character.OTHER_NUMBER) {
+ if (type != Character.DECIMAL_DIGIT_NUMBER && type != Character.LETTER_NUMBER
+ && type != Character.OTHER_NUMBER) {
return false;
}
}
@@ -1388,6 +1446,7 @@
}
}
+ @Override
public Iterator<Integer> iterator() {
return newSubsequenceIterator();
}
@@ -1443,14 +1502,14 @@
}
// All other characters are considered unencodable
codecs.encoding_error("strict", "decimal", getString(), i, i + 1,
- "invalid decimal Unicode string");
+ "invalid decimal Unicode string");
}
return sb.toString();
}
/**
- * Encode unicode in the basic plane into a valid decimal String. Throws a
- * UnicodeEncodeError on invalid characters.
+ * Encode unicode in the basic plane into a valid decimal String. Throws a UnicodeEncodeError on
+ * invalid characters.
*
* @return a valid decimal as an encoded String
*/
@@ -1474,7 +1533,7 @@
}
// All other characters are considered unencodable
codecs.encoding_error("strict", "decimal", getString(), i, i + 1,
- "invalid decimal Unicode string");
+ "invalid decimal Unicode string");
}
return sb.toString();
}
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list