[Jython-checkins] jython: Convert PyUnicode.substring to use UTF-16 stranslated indices.

jeff.allen jython-checkins at python.org
Wed Sep 17 00:55:25 CEST 2014


http://hg.python.org/jython/rev/bb69cef6b56c
changeset:   7383:bb69cef6b56c
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Sun Sep 07 23:49:49 2014 +0100
summary:
  Convert PyUnicode.substring to use UTF-16 stranslated indices.

files:
  src/org/python/core/PyString.java  |   7 ++++++
  src/org/python/core/PyUnicode.java |  19 +++++++++++------
  2 files changed, 19 insertions(+), 7 deletions(-)


diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -155,6 +155,13 @@
         return pybuf;
     }
 
+    /**
+     * Return a substring of this object as a Java String.
+     *
+     * @param start the beginning index, inclusive.
+     * @param end the ending index, exclusive.
+     * @return the specified substring.
+     */
     public String substring(int start, int end) {
         return getString().substring(start, end);
     }
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -238,15 +238,20 @@
 
     // ------------------------------------------------------------------------------------------
 
-    // modified to know something about codepoints; we just need to return the
-    // corresponding substring; darn UTF16!
-    // TODO: we could avoid doing this unnecessary copy
+    /**
+     * {@inheritDoc}
+     * The indices  are code point indices, not UTF-16 (<code>char</code>) indices. For example:
+     *
+     * <pre>
+     * PyUnicode u = new PyUnicode("..\ud800\udc02\ud800\udc03...");
+     * // (Python) u = u'..\U00010002\U00010003...'
+     *
+     * String s = u.substring(2, 4);  // = "\ud800\udc02\ud800\udc03" (Java)
+     * </pre>
+     */
     @Override
     public String substring(int start, int end) {
-        if (isBasicPlane()) {
-            return super.substring(start, end);
-        }
-        return new PyUnicode(newSubsequenceIterator(start, end, 1)).getString();
+        return super.substring(translator.utf16Index(start), translator.utf16Index(end));
     }
 
     /**

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list