[pypy-commit] pypy unicode-utf8: implement splitlines, strange unreachable code?
fijal
pypy.commits at gmail.com
Thu Feb 23 13:39:27 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r90331:4f3f66d1551a
Date: 2017-02-23 19:38 +0100
http://bitbucket.org/pypy/pypy/changeset/4f3f66d1551a/
Log: implement splitlines, strange unreachable code?
diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -608,6 +608,7 @@
eol = pos
strs.append(value[sol:eol])
if pos < length:
+ # XXX is this code reachable ever?
strs.append(value[pos:length])
return self._newlist_unwrapped(space, strs)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -185,8 +185,8 @@
def _iscased(self, ch):
return unicodedb.iscased(ord(ch))
- def _islinebreak(self, ch):
- return unicodedb.islinebreak(ord(ch))
+ def _islinebreak(self, s, pos):
+ return rutf8.check_newline_utf8(s, pos)
def _upper(self, ch):
return unichr(unicodedb.toupper(ord(ch)))
@@ -475,23 +475,27 @@
def descr_splitlines(self, space, keepends=False):
value = self._val(space)
length = len(value)
- strs = []
+ strs_w = []
pos = 0
while pos < length:
sol = pos
+ lgt = 0
while pos < length and not self._islinebreak(value, pos):
pos = rutf8.next_codepoint_pos(value, pos)
+ lgt += 1
eol = pos
- pos += 1
+ if pos < length:
+ pos = rutf8.next_codepoint_pos(value, pos)
# read CRLF as one line break
if pos < length and value[eol] == '\r' and value[pos] == '\n':
pos += 1
+ if keepends:
+ lgt += 1
if keepends:
eol = pos
- strs.append(value[sol:eol])
- if pos < length:
- strs.append(value[pos:length])
- return self._newlist_unwrapped(space, strs)
+ lgt += 2
+ strs_w.append(W_UnicodeObject(value[sol:eol], lgt))
+ return space.newlist(strs_w)
def wrapunicode(space, uni):
More information about the pypy-commit
mailing list