[pypy-commit] pypy unicode-utf8: capitalize and {starts, ends}with.
jerith
pypy.commits at gmail.com
Sat Oct 7 10:29:24 EDT 2017
Author: Jeremy Thurgood <firxen at gmail.com>
Branch: unicode-utf8
Changeset: r92634:2d6fe4fc14a3
Date: 2017-10-07 16:28 +0200
http://bitbucket.org/pypy/pypy/changeset/2d6fe4fc14a3/
Log: capitalize and {starts,ends}with.
diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -632,16 +632,13 @@
return space.w_True
return space.w_False
+ # This is overridden in unicodeobject, but the two above are not.
def _startswith(self, space, value, w_prefix, start, end):
prefix = self._op_val(space, w_prefix)
if start > len(value):
- return self._starts_ends_overflow(prefix)
+ return False
return startswith(value, prefix, start, end)
- def _starts_ends_overflow(self, prefix):
- return False # bug-to-bug compat: this is for strings and
- # bytearrays, but overridden for unicodes
-
def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
value, start, end, _ = self._convert_idx_params(space, w_start, w_end)
if space.isinstance_w(w_suffix, space.w_tuple):
@@ -655,10 +652,11 @@
return space.w_True
return space.w_False
+ # This is overridden in unicodeobject, but the two above are not.
def _endswith(self, space, value, w_prefix, start, end):
prefix = self._op_val(space, w_prefix)
if start > len(value):
- return self._starts_ends_overflow(prefix)
+ return False
return endswith(value, prefix, start, end)
def _strip(self, space, w_chars, left, right, name='strip'):
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -6,8 +6,9 @@
from rpython.rlib.buffer import StringBuffer
from rpython.rlib.mutbuffer import MutableStringBuffer
from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder,\
- replace_count
+from rpython.rlib.rstring import (
+ StringBuilder, split, rsplit, UnicodeBuilder, replace_count, startswith,
+ endswith)
from rpython.rlib.runicode import make_unicode_escape_function
from rpython.rlib import rutf8, jit
@@ -139,6 +140,10 @@
return True
@staticmethod
+ def _op_utf8(space, w_other, strict=None):
+ return W_UnicodeObject.convert_arg_to_w_unicode(space, w_other, strict)._utf8
+
+ @staticmethod
def _op_val(space, w_other, strict=None):
return W_UnicodeObject.convert_arg_to_w_unicode(space, w_other, strict)._utf8.decode('utf8')
@@ -520,8 +525,17 @@
i = rutf8.next_codepoint_pos(val, i)
return space.newbool(cased)
- def _starts_ends_overflow(self, prefix):
- return len(prefix) == 0
+ def _startswith(self, space, value, w_prefix, start, end):
+ prefix = self._op_utf8(space, w_prefix)
+ if start > len(value):
+ return len(prefix) == 0
+ return startswith(value, prefix, start, end)
+
+ def _endswith(self, space, value, w_prefix, start, end):
+ prefix = self._op_utf8(space, w_prefix)
+ if start > len(value):
+ return len(prefix) == 0
+ return endswith(value, prefix, start, end)
def descr_add(self, space, w_other):
try:
@@ -644,6 +658,21 @@
return space.newlist_utf8(res)
+ def descr_capitalize(self, space):
+ value = self._utf8
+ if len(value) == 0:
+ return self._empty()
+
+ builder = StringBuilder(len(value))
+ uchar = rutf8.codepoint_at_pos(value, 0)
+ i = rutf8.next_codepoint_pos(value, 0)
+ rutf8.unichr_as_utf8_append(builder, unicodedb.toupper(uchar))
+ while i < len(value):
+ uchar = rutf8.codepoint_at_pos(value, i)
+ i = rutf8.next_codepoint_pos(value, i)
+ rutf8.unichr_as_utf8_append(builder, unicodedb.tolower(uchar))
+ return W_UnicodeObject(builder.build(), self._len())
+
@unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
def descr_center(self, space, width, w_fillchar):
value = self._utf8
More information about the pypy-commit
mailing list