[pypy-commit] pypy unicode-utf8: hg merge default
rlamy
pypy.commits at gmail.com
Fri Nov 24 15:26:03 EST 2017
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: unicode-utf8
Changeset: r93170:f9a1926628b2
Date: 2017-11-24 20:22 +0000
http://bitbucket.org/pypy/pypy/changeset/f9a1926628b2/
Log: hg merge default
diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_textio.py
@@ -0,0 +1,27 @@
+from hypothesis import given, strategies as st
+
+from io import BytesIO, TextIOWrapper
+
+LINESEP = ['', '\r', '\n', '\r\n']
+
+ at st.composite
+def text_with_newlines(draw):
+ sep = draw(st.sampled_from(LINESEP))
+ lines = draw(st.lists(st.text(max_size=10), max_size=10))
+ return sep.join(lines)
+
+ at given(txt=text_with_newlines(),
+ mode=st.sampled_from(['\r', '\n', '\r\n', '']),
+ limit=st.integers(min_value=-1))
+def test_readline(txt, mode, limit):
+ textio = TextIOWrapper(BytesIO(txt.encode('utf-8')), newline=mode)
+ lines = []
+ while True:
+ line = textio.readline(limit)
+ if limit > 0:
+ assert len(line) < limit
+ if line:
+ lines.append(line)
+ else:
+ break
+ assert u''.join(lines) == txt
diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_continuation/test/conftest.py
@@ -0,0 +1,7 @@
+import pytest
+import sys
+
+def pytest_configure(config):
+ if sys.platform.startswith('linux'):
+ from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux
+ configure_libbacktrace_linux()
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -174,18 +174,16 @@
start = self.pos
if limit < 0 or limit > len(self.buf) - self.pos:
limit = len(self.buf) - self.pos
+ assert limit >= 0
- assert limit >= 0
- end = start + limit
-
- endpos, consumed = self._find_line_ending(
+ endpos, found = self._find_line_ending(
# XXX: super inefficient, makes a copy of the entire contents.
u"".join(self.buf),
start,
- end
+ limit
)
- if endpos < 0:
- endpos = end
+ if not found:
+ endpos = start + limit
assert endpos >= 0
self.pos = endpos
return space.newunicode(u"".join(self.buf[start:endpos]))
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -221,44 +221,49 @@
def newlines_get_w(self, space):
return space.w_None
- def _find_line_ending(self, line, start, end):
- size = end - start
+ def _find_newline_universal(self, line, start, limit):
+ # Universal newline search. Find any of \r, \r\n, \n
+ # The decoder ensures that \r\n are not split in two pieces
+ limit = min(limit, len(line) - start)
+ end = start + limit
+ i = start
+ while i < end:
+ ch = line[i]
+ i += 1
+ if ch == '\n':
+ return i, True
+ if ch == '\r':
+ if i >= end:
+ break
+ if line[i] == '\n':
+ return i + 1, True
+ else:
+ return i, True
+ return end, False
+
+ def _find_marker(self, marker, line, start, limit):
+ limit = min(limit, len(line) - start)
+ end = start + limit
+ for i in range(start, end - len(marker) + 1):
+ ch = line[i]
+ if ch == marker[0]:
+ for j in range(1, len(marker)):
+ if line[i + j] != marker[j]:
+ break # from inner loop
+ else:
+ return i + len(marker), True
+ return end - len(marker) + 1, False
+
+ def _find_line_ending(self, line, start, limit):
if self.readuniversal:
- # Universal newline search. Find any of \r, \r\n, \n
- # The decoder ensures that \r\n are not split in two pieces
- i = start
- while True:
- # Fast path for non-control chars.
- while i < end and line[i] > '\r':
- i += 1
- if i >= end:
- return -1, size
- ch = line[i]
- i += 1
- if ch == '\n':
- return i, 0
- if ch == '\r':
- if line[i] == '\n':
- return i + 1, 0
- else:
- return i, 0
+ return self._find_newline_universal(line, start, limit)
if self.readtranslate:
# Newlines are already translated, only search for \n
newline = '\n'
else:
# Non-universal mode.
newline = self.readnl
- end_scan = end - len(newline) + 1
- for i in range(start, end_scan):
- ch = line[i]
- if ch == newline[0]:
- for j in range(1, len(newline)):
- if line[i + j] != newline[j]:
- break
- else:
- return i + len(newline), 0
- return -1, end_scan
-
+ return self._find_marker(newline, line, start, limit)
W_TextIOBase.typedef = TypeDef(
'_io._TextIOBase', W_IOBase.typedef,
@@ -661,7 +666,7 @@
limit = convert_size(space, w_limit)
line = None
- remaining = None
+ remnant = None
builder = StringBuilder()
while True:
@@ -669,61 +674,60 @@
has_data = self._ensure_data(space)
if not has_data:
# end of file
- start = endpos = offset_to_buffer = 0
+ start = end_scan = 0
break
- if not remaining:
- line = self.decoded_chars
- start = self.decoded_chars_used
- offset_to_buffer = 0
+ if remnant:
+ assert not self.readtranslate and self.readnl == '\r\n'
+ assert self.decoded_chars_used == 0
+ if remnant == '\r' and self.decoded_chars[0] == '\n':
+ builder.append('\r\n')
+ self.decoded_chars_used = 1
+ line = remnant = None
+ start = end_scan = 0
+ break
+ else:
+ builder.append(remnant)
+ remnant = None
+ continue
+
+ line = self.decoded_chars
+ start = self.decoded_chars_used
+ if limit > 0:
+ remaining = limit - builder.getlength()
+ assert remaining >= 0
else:
- assert self.decoded_chars_used == 0
- line = remaining + self.decoded_chars
- start = 0
- offset_to_buffer = len(remaining)
- remaining = None
+ remaining = sys.maxint
+ end_scan, found = self._find_line_ending(line, start, remaining)
+ assert end_scan >= 0
+ if found:
+ break
- line_len = len(line)
- endpos, consumed = self._find_line_ending(line, start, line_len)
- chunked = builder.getlength()
- if endpos >= 0:
- if limit >= 0 and endpos >= start + limit - chunked:
- endpos = start + limit - chunked
- assert endpos >= 0
- break
- assert consumed >= 0
-
- # We can put aside up to `endpos`
- endpos = consumed + start
- if limit >= 0 and endpos >= start + limit - chunked:
+ if limit >= 0 and end_scan - start >= remaining:
# Didn't find line ending, but reached length limit
- endpos = start + limit - chunked
- assert endpos >= 0
break
# No line ending seen yet - put aside current data
- if endpos > start:
- s = line[start:endpos]
+ if end_scan > start:
+ s = line[start:end_scan]
builder.append(s)
- # There may be some remaining bytes we'll have to prepend to the
+ # There may be some remaining chars we'll have to prepend to the
# next chunk of data
- if endpos < line_len:
- remaining = line[endpos:]
+ if end_scan < len(line):
+ remnant = line[end_scan:]
line = None
# We have consumed the buffer
self._unset_decoded()
if line:
# Our line ends in the current buffer
- decoded_chars_used = endpos - offset_to_buffer
- assert decoded_chars_used >= 0
- self.decoded_chars_used = decoded_chars_used
- if start > 0 or endpos < len(line):
- line = line[start:endpos]
+ self.decoded_chars_used = end_scan
+ if start > 0 or end_scan < len(line):
+ line = line[start:end_scan]
builder.append(line)
- elif remaining:
- builder.append(remaining)
+ elif remnant:
+ builder.append(remnant)
result = builder.build()
return space.new_from_utf8(result)
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -0,0 +1,33 @@
+from hypothesis import given, strategies as st, assume
+from pypy.module._io.interp_bytesio import W_BytesIO
+from pypy.module._io.interp_textio import W_TextIOWrapper
+
+LINESEP = ['', '\r', '\n', '\r\n']
+
+ at st.composite
+def text_with_newlines(draw):
+ sep = draw(st.sampled_from(LINESEP))
+ lines = draw(st.lists(st.text(max_size=10), max_size=10))
+ return sep.join(lines)
+
+ at given(txt=text_with_newlines(),
+ mode=st.sampled_from(['\r', '\n', '\r\n', '']),
+ limit=st.integers(min_value=-1))
+def test_readline(space, txt, mode, limit):
+ assume(limit != 0)
+ w_stream = W_BytesIO(space)
+ w_stream.descr_init(space, space.newbytes(txt.encode('utf-8')))
+ w_textio = W_TextIOWrapper(space)
+ w_textio.descr_init(
+ space, w_stream, encoding='utf-8',
+ w_newline=space.newtext(mode))
+ lines = []
+ while True:
+ line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
+ if limit > 0:
+ assert len(line) <= limit
+ if line:
+ lines.append(line)
+ else:
+ break
+ assert u''.join(lines) == txt
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
cffi>=1.4.0
-vmprof>=0.4.10 # required to parse log files in rvmprof tests
+
+# parse log files in rvmprof tests
+vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x
# hypothesis is used for test generation on untranslated tests
hypothesis
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -9,6 +9,7 @@
from rpython.rtyper.tool import rffi_platform as platform
from rpython.rlib import rthread, jit
from rpython.rlib.objectmodel import we_are_translated
+from rpython.config.translationoption import get_translation_config
class VMProfPlatformUnsupported(Exception):
pass
@@ -133,11 +134,17 @@
#endif
"""])
+if get_translation_config() is None:
+ # tests need the full eci here
+ _eci = global_eci
+else:
+ _eci = auto_eci
+
vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [],
- rffi.INT, compilation_info=auto_eci,
+ rffi.INT, compilation_info=_eci,
_nowrapper=True)
vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [],
- lltype.Void, compilation_info=auto_eci,
+ lltype.Void, compilation_info=_eci,
_nowrapper=True)
More information about the pypy-commit
mailing list