[pypy-commit] pypy default: Extract DecodeBuffer object from W_TextIOWrapper
rlamy
pypy.commits at gmail.com
Fri Nov 24 21:31:03 EST 2017
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch:
Changeset: r93172:e1dbf4f46c45
Date: 2017-11-25 01:15 +0000
http://bitbucket.org/pypy/pypy/changeset/e1dbf4f46c45/
Log: Extract DecodeBuffer object from W_TextIOWrapper
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -333,6 +333,45 @@
self.input = input
+class DecodeBuffer(object):
+ def __init__(self):
+ self.text = None
+ self.pos = 0
+
+ def set(self, space, w_decoded):
+ check_decoded(space, w_decoded)
+ self.text = space.unicode_w(w_decoded)
+ self.pos = 0
+
+ def reset(self):
+ self.text = None
+ self.pos = 0
+
+ def get_chars(self, size):
+ if self.text is None:
+ return u""
+
+ available = len(self.text) - self.pos
+ if size < 0 or size > available:
+ size = available
+ assert size >= 0
+
+ if self.pos > 0 or size < available:
+ start = self.pos
+ end = self.pos + size
+ assert start >= 0
+ assert end >= 0
+ chars = self.text[start:end]
+ else:
+ chars = self.text
+
+ self.pos += size
+ return chars
+
+ def has_data(self):
+ return (self.text is not None and self.pos < len(self.text))
+
+
def check_decoded(space, w_decoded):
if not space.isinstance_w(w_decoded, space.w_unicode):
msg = "decoder should return a string result, not '%T'"
@@ -346,8 +385,7 @@
self.w_encoder = None
self.w_decoder = None
- self.decoded_chars = None # buffer for text returned from decoder
- self.decoded_chars_used = 0 # offset into _decoded_chars for read()
+ self.decoded = DecodeBuffer()
self.pending_bytes = None # list of bytes objects waiting to be
# written, or NULL
self.chunk_size = 8192
@@ -515,44 +553,10 @@
# _____________________________________________________________
# read methods
- def _unset_decoded(self):
- self.decoded_chars = None
- self.decoded_chars_used = 0
-
- def _set_decoded(self, space, w_decoded):
- check_decoded(space, w_decoded)
- self.decoded_chars = space.unicode_w(w_decoded)
- self.decoded_chars_used = 0
-
- def _get_decoded_chars(self, size):
- if self.decoded_chars is None:
- return u""
-
- available = len(self.decoded_chars) - self.decoded_chars_used
- if size < 0 or size > available:
- size = available
- assert size >= 0
-
- if self.decoded_chars_used > 0 or size < available:
- start = self.decoded_chars_used
- end = self.decoded_chars_used + size
- assert start >= 0
- assert end >= 0
- chars = self.decoded_chars[start:end]
- else:
- chars = self.decoded_chars
-
- self.decoded_chars_used += size
- return chars
-
- def _has_data(self):
- return (self.decoded_chars is not None and
- self.decoded_chars_used < len(self.decoded_chars))
-
def _read_chunk(self, space):
"""Read and decode the next chunk of data from the BufferedReader.
The return value is True unless EOF was reached. The decoded string
- is placed in self._decoded_chars (replacing its previous value).
+ is placed in self.decoded (replacing its previous value).
The entire input chunk is sent to the decoder, though some of it may
remain buffered in the decoder, yet to be converted."""
@@ -572,7 +576,7 @@
dec_buffer = None
dec_flags = 0
- # Read a chunk, decode it, and put the result in self._decoded_chars
+ # Read a chunk, decode it, and put the result in self.decoded
w_input = space.call_method(self.w_buffer, "read1",
space.newint(self.chunk_size))
@@ -584,7 +588,7 @@
eof = space.len_w(w_input) == 0
w_decoded = space.call_method(self.w_decoder, "decode",
w_input, space.newbool(eof))
- self._set_decoded(space, w_decoded)
+ self.decoded.set(space, w_decoded)
if space.len_w(w_decoded) > 0:
eof = False
@@ -597,10 +601,10 @@
return not eof
def _ensure_data(self, space):
- while not self._has_data():
+ while not self.decoded.has_data():
try:
if not self._read_chunk(space):
- self._unset_decoded()
+ self.decoded.reset()
self.snapshot = None
return False
except OperationError as e:
@@ -633,7 +637,7 @@
w_bytes = space.call_method(self.w_buffer, "read")
w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
check_decoded(space, w_decoded)
- w_result = space.newunicode(self._get_decoded_chars(-1))
+ w_result = space.newunicode(self.decoded.get_chars(-1))
w_final = space.add(w_result, w_decoded)
self.snapshot = None
return w_final
@@ -645,7 +649,7 @@
while remaining > 0:
if not self._ensure_data(space):
break
- data = self._get_decoded_chars(remaining)
+ data = self.decoded.get_chars(remaining)
builder.append(data)
remaining -= len(data)
@@ -672,10 +676,10 @@
if remnant:
assert not self.readtranslate and self.readnl == u'\r\n'
- assert self.decoded_chars_used == 0
- if remnant == u'\r' and self.decoded_chars[0] == u'\n':
+ assert self.decoded.pos == 0
+ if remnant == u'\r' and self.decoded.text[0] == u'\n':
builder.append(u'\r\n')
- self.decoded_chars_used = 1
+ self.decoded.pos = 1
line = remnant = None
start = end_scan = 0
break
@@ -684,8 +688,8 @@
remnant = None
continue
- line = self.decoded_chars
- start = self.decoded_chars_used
+ line = self.decoded.text
+ start = self.decoded.pos
if limit > 0:
remaining = limit - builder.getlength()
assert remaining >= 0
@@ -711,11 +715,11 @@
remnant = line[end_scan:]
line = None
# We have consumed the buffer
- self._unset_decoded()
+ self.decoded.reset()
if line:
# Our line ends in the current buffer
- self.decoded_chars_used = end_scan
+ self.decoded.pos = end_scan
if start > 0 or end_scan < len(line):
line = line[start:end_scan]
builder.append(line)
@@ -855,7 +859,7 @@
raise oefmt(space.w_IOError,
"can't do nonzero end-relative seeks")
space.call_method(self, "flush")
- self._unset_decoded()
+ self.decoded.reset()
self.snapshot = None
if self.w_decoder:
space.call_method(self.w_decoder, "reset")
@@ -880,7 +884,7 @@
# Seek back to the safe start point
space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos))
- self._unset_decoded()
+ self.decoded.reset()
self.snapshot = None
# Restore the decoder to its state from the safe start point.
@@ -901,13 +905,13 @@
w_decoded = space.call_method(self.w_decoder, "decode",
w_chunk, space.newbool(bool(cookie.need_eof)))
- self._set_decoded(space, w_decoded)
+ self.decoded.set(space, w_decoded)
# Skip chars_to_skip of the decoded characters
- if len(self.decoded_chars) < cookie.chars_to_skip:
+ if len(self.decoded.text) < cookie.chars_to_skip:
raise oefmt(space.w_IOError,
"can't restore logical file position")
- self.decoded_chars_used = cookie.chars_to_skip
+ self.decoded.pos = cookie.chars_to_skip
else:
self.snapshot = PositionSnapshot(cookie.dec_flags, "")
@@ -933,7 +937,7 @@
w_pos = space.call_method(self.w_buffer, "tell")
if self.w_decoder is None or self.snapshot is None:
- assert not self.decoded_chars
+ assert not self.decoded.text
return w_pos
cookie = PositionCookie(space.bigint_w(w_pos))
@@ -944,11 +948,11 @@
cookie.start_pos -= len(input)
# How many decoded characters have been used up since the snapshot?
- if not self.decoded_chars_used:
+ if not self.decoded.pos:
# We haven't moved from the snapshot point.
return space.newlong_from_rbigint(cookie.pack())
- chars_to_skip = self.decoded_chars_used
+ chars_to_skip = self.decoded.pos
# Starting from the snapshot position, we will walk the decoder
# forward until it gives us enough decoded characters.
More information about the pypy-commit
mailing list