[Jython-checkins] jython: Convert PyUnicode.substring to use UTF-16 stranslated indices.
jeff.allen
jython-checkins at python.org
Wed Sep 17 00:55:25 CEST 2014
http://hg.python.org/jython/rev/bb69cef6b56c
changeset: 7383:bb69cef6b56c
user: Jeff Allen <ja.py at farowl.co.uk>
date: Sun Sep 07 23:49:49 2014 +0100
summary:
Convert PyUnicode.substring to use UTF-16 stranslated indices.
files:
src/org/python/core/PyString.java | 7 ++++++
src/org/python/core/PyUnicode.java | 19 +++++++++++------
2 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -155,6 +155,13 @@
return pybuf;
}
+ /**
+ * Return a substring of this object as a Java String.
+ *
+ * @param start the beginning index, inclusive.
+ * @param end the ending index, exclusive.
+ * @return the specified substring.
+ */
public String substring(int start, int end) {
return getString().substring(start, end);
}
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -238,15 +238,20 @@
// ------------------------------------------------------------------------------------------
- // modified to know something about codepoints; we just need to return the
- // corresponding substring; darn UTF16!
- // TODO: we could avoid doing this unnecessary copy
+ /**
+ * {@inheritDoc}
+ * The indices are code point indices, not UTF-16 (<code>char</code>) indices. For example:
+ *
+ * <pre>
+ * PyUnicode u = new PyUnicode("..\ud800\udc02\ud800\udc03...");
+ * // (Python) u = u'..\U00010002\U00010003...'
+ *
+ * String s = u.substring(2, 4); // = "\ud800\udc02\ud800\udc03" (Java)
+ * </pre>
+ */
@Override
public String substring(int start, int end) {
- if (isBasicPlane()) {
- return super.substring(start, end);
- }
- return new PyUnicode(newSubsequenceIterator(start, end, 1)).getString();
+ return super.substring(translator.utf16Index(start), translator.utf16Index(end));
}
/**
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list