[pypy-commit] pypy refactor-str-types: Make descr_splitlines() unicode-aware.
Manuel Jacob
noreply at buildbot.pypy.org
Tue Jul 30 14:15:42 CEST 2013
Author: Manuel Jacob
Branch: refactor-str-types
Changeset: r65818:93f93f772e11
Date: 2013-07-29 19:40 +0200
http://bitbucket.org/pypy/pypy/changeset/93f93f772e11/
Log: Make descr_splitlines() unicode-aware.
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -74,6 +74,9 @@
_iscased = _isalpha
+ def _islinebreak(self, ch):
+ return (ch == '\n') or (ch == '\r')
+
def _upper(self, ch):
if ch.islower():
o = ord(ch) - 32
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -117,6 +117,9 @@
_iscased = _isalpha
+ def _islinebreak(self, ch):
+ return (ch == '\n') or (ch == '\r')
+
def _upper(self, ch):
if ch.islower():
o = ord(ch) - 32
diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -628,26 +628,24 @@
@unwrap_spec(keepends=bool)
@specialize.argtype(0)
def descr_splitlines(self, space, keepends=False):
- data = self._val(space)
- selflen = len(data)
+ value = self._val(space)
+ length = len(value)
strs = []
- i = j = 0
- while i < selflen:
- # Find a line and append it
- while i < selflen and data[i] != '\n' and data[i] != '\r':
- i += 1
- # Skip the line break reading CRLF as one line break
- eol = i
- i += 1
- if i < selflen and data[i-1] == '\r' and data[i] == '\n':
- i += 1
+ pos = 0
+ while pos < length:
+ sol = pos
+ while pos < length and not self._islinebreak(value[pos]):
+ pos += 1
+ eol = pos
+ pos += 1
+ # read CRLF as one line break
+ if pos < length and value[eol] == '\r' and value[pos] == '\n':
+ pos += 1
if keepends:
- eol = i
- strs.append(data[j:eol])
- j = i
-
- if j < selflen:
- strs.append(data[j:len(data)])
+ eol = pos
+ strs.append(value[sol:eol])
+ if pos < length:
+ strs.append(value[pos:length])
return self._newlist_unwrapped(space, strs)
@specialize.argtype(0)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -125,6 +125,9 @@
def _iscased(self, ch):
return unicodedb.iscased(ord(ch))
+ def _islinebreak(self, ch):
+ return unicodedb.islinebreak(ord(ch))
+
def _upper(self, ch):
return unichr(unicodedb.toupper(ord(ch)))
More information about the pypy-commit
mailing list