[issue16757] Faster _PyUnicode_FindMaxChar()

Serhiy Storchaka report at bugs.python.org
Mon Dec 24 21:53:38 CET 2012


Serhiy Storchaka added the comment:

I think it is redundant and useless, but do as you want.

----------
Added file: http://bugs.python.org/file28428/unicode_findmaxchar_2.patch

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue16757>
_______________________________________
-------------- next part --------------
diff -r a7c9869a5114 Include/unicodeobject.h
--- a/Include/unicodeobject.h	Mon Dec 24 13:17:11 2012 +0200
+++ b/Include/unicodeobject.h	Mon Dec 24 22:52:17 2012 +0200
@@ -739,10 +739,15 @@
 #ifndef Py_LIMITED_API
 /* Compute the maximum character of the substring unicode[start:end].
    Return 127 for an empty string. */
-PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
+#define _PyUnicode_FindMaxChar(unicode, start, end) \
+    _PyUnicode_FindMaxChar2((unicode), (start), (end), 127)
+/* Compute the maximum character of the substring unicode[start:end] and
+   maxchar. */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar2(
     PyObject *unicode,
     Py_ssize_t start,
-    Py_ssize_t end);
+    Py_ssize_t end,
+    Py_UCS4 maxchar);
 #endif
 
 /* Copy the string into a UCS4 buffer including the null character if copy_null
diff -r a7c9869a5114 Objects/stringlib/unicode_format.h
--- a/Objects/stringlib/unicode_format.h	Mon Dec 24 13:17:11 2012 +0200
+++ b/Objects/stringlib/unicode_format.h	Mon Dec 24 22:52:17 2012 +0200
@@ -886,8 +886,9 @@
                                          &format_spec_needs_expanding)) == 2) {
         sublen = literal.end - literal.start;
         if (sublen) {
-            maxchar = _PyUnicode_FindMaxChar(literal.str,
-                                             literal.start, literal.end);
+            maxchar = _PyUnicode_FindMaxChar2(literal.str,
+                                              literal.start, literal.end,
+                                              writer->maxchar);
             err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
             if (err == -1)
                 return 0;
diff -r a7c9869a5114 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Mon Dec 24 13:17:11 2012 +0200
+++ b/Objects/unicodeobject.c	Mon Dec 24 22:52:17 2012 +0200
@@ -2002,24 +2002,31 @@
 }
 
 Py_UCS4
-_PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end)
+_PyUnicode_FindMaxChar2(PyObject *unicode, Py_ssize_t start, Py_ssize_t end,
+                        Py_UCS4 maxchar)
 {
     enum PyUnicode_Kind kind;
     void *startptr, *endptr;
+    Py_UCS4 maxchar2;
 
     assert(PyUnicode_IS_READY(unicode));
     assert(0 <= start);
     assert(end <= PyUnicode_GET_LENGTH(unicode));
     assert(start <= end);
 
-    if (start == 0 && end == PyUnicode_GET_LENGTH(unicode))
-        return PyUnicode_MAX_CHAR_VALUE(unicode);
+    if (start == 0 && end == PyUnicode_GET_LENGTH(unicode)) {
+        maxchar2 = PyUnicode_MAX_CHAR_VALUE(unicode);
+        return MAX_MAXCHAR(maxchar, maxchar2);
+    }
 
     if (start == end)
-        return 127;
+        return maxchar;
 
     if (PyUnicode_IS_ASCII(unicode))
-        return 127;
+        return maxchar;
+
+    if (maxchar >= PyUnicode_MAX_CHAR_VALUE(unicode))
+        return maxchar;
 
     kind = PyUnicode_KIND(unicode);
     startptr = PyUnicode_DATA(unicode);
@@ -2027,15 +2034,19 @@
     startptr = (char *)startptr + start * kind;
     switch(kind) {
     case PyUnicode_1BYTE_KIND:
-        return ucs1lib_find_max_char(startptr, endptr);
+        maxchar2 = ucs1lib_find_max_char(startptr, endptr);
+        break;
     case PyUnicode_2BYTE_KIND:
-        return ucs2lib_find_max_char(startptr, endptr);
+        maxchar2 = ucs2lib_find_max_char(startptr, endptr);
+        break;
     case PyUnicode_4BYTE_KIND:
-        return ucs4lib_find_max_char(startptr, endptr);
+        maxchar2 = ucs4lib_find_max_char(startptr, endptr);
+        break;
     default:
         assert(0);
         return 0;
     }
+    return MAX_MAXCHAR(maxchar, maxchar2);
 }
 
 /* Ensure that a string uses the most efficient storage, if it is not the
@@ -13740,7 +13751,7 @@
     Py_ssize_t pindex;
     Py_UCS4 signchar;
     Py_ssize_t buflen;
-    Py_UCS4 maxchar, bufmaxchar;
+    Py_UCS4 bufmaxchar;
     Py_ssize_t sublen;
     _PyUnicodeWriter *writer = &ctx->writer;
     Py_UCS4 fill;
@@ -13793,7 +13804,7 @@
         arg->width = len;
 
     /* Prepare the writer */
-    bufmaxchar = 127;
+    bufmaxchar = writer->maxchar;
     if (!(arg->flags & F_LJUST)) {
         if (arg->sign) {
             if ((arg->width-1) > len)
@@ -13804,8 +13815,7 @@
                 bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
         }
     }
-    maxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
-    bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar);
+    bufmaxchar = _PyUnicode_FindMaxChar2(str, 0, pindex+len, bufmaxchar);
     buflen = arg->width;
     if (arg->sign && len == arg->width)
         buflen++;
@@ -13975,8 +13985,9 @@
                 ctx.writer.overallocate = 0;
             }
             sublen = ctx.fmtpos - nonfmtpos;
-            maxchar = _PyUnicode_FindMaxChar(ctx.fmtstr,
-                                             nonfmtpos, nonfmtpos + sublen);
+            maxchar = _PyUnicode_FindMaxChar2(ctx.fmtstr,
+                                              nonfmtpos, nonfmtpos + sublen,
+                                              ctx.writer.maxchar);
             if (_PyUnicodeWriter_Prepare(&ctx.writer, sublen, maxchar) == -1)
                 goto onError;
 
diff -r a7c9869a5114 Python/formatter_unicode.c
--- a/Python/formatter_unicode.c	Mon Dec 24 13:17:11 2012 +0200
+++ b/Python/formatter_unicode.c	Mon Dec 24 22:52:17 2012 +0200
@@ -771,7 +771,7 @@
 
     calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
 
-    maxchar = _PyUnicode_FindMaxChar(value, 0, len);
+    maxchar = _PyUnicode_FindMaxChar2(value, 0, len, writer->maxchar);
     if (lpad != 0 || rpad != 0)
         maxchar = Py_MAX(maxchar, format->fill_char);
 


More information about the Python-bugs-list mailing list