[pypy-commit] pypy default: Extract DecodeBuffer object from W_TextIOWrapper

rlamy pypy.commits at gmail.com
Fri Nov 24 21:31:03 EST 2017


Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93172:e1dbf4f46c45
Date: 2017-11-25 01:15 +0000
http://bitbucket.org/pypy/pypy/changeset/e1dbf4f46c45/

Log:	Extract DecodeBuffer object from W_TextIOWrapper

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -333,6 +333,45 @@
         self.input = input
 
 
+class DecodeBuffer(object):
+    def __init__(self):
+        self.text = None
+        self.pos = 0
+
+    def set(self, space, w_decoded):
+        check_decoded(space, w_decoded)
+        self.text = space.unicode_w(w_decoded)
+        self.pos = 0
+
+    def reset(self):
+        self.text = None
+        self.pos = 0
+
+    def get_chars(self, size):
+        if self.text is None:
+            return u""
+
+        available = len(self.text) - self.pos
+        if size < 0 or size > available:
+            size = available
+        assert size >= 0
+
+        if self.pos > 0 or size < available:
+            start = self.pos
+            end = self.pos + size
+            assert start >= 0
+            assert end >= 0
+            chars = self.text[start:end]
+        else:
+            chars = self.text
+
+        self.pos += size
+        return chars
+
+    def has_data(self):
+        return (self.text is not None and self.pos < len(self.text))
+
+
 def check_decoded(space, w_decoded):
     if not space.isinstance_w(w_decoded, space.w_unicode):
         msg = "decoder should return a string result, not '%T'"
@@ -346,8 +385,7 @@
         self.w_encoder = None
         self.w_decoder = None
 
-        self.decoded_chars = None   # buffer for text returned from decoder
-        self.decoded_chars_used = 0 # offset into _decoded_chars for read()
+        self.decoded = DecodeBuffer()
         self.pending_bytes = None   # list of bytes objects waiting to be
                                     # written, or NULL
         self.chunk_size = 8192
@@ -515,44 +553,10 @@
     # _____________________________________________________________
     # read methods
 
-    def _unset_decoded(self):
-        self.decoded_chars = None
-        self.decoded_chars_used = 0
-
-    def _set_decoded(self, space, w_decoded):
-        check_decoded(space, w_decoded)
-        self.decoded_chars = space.unicode_w(w_decoded)
-        self.decoded_chars_used = 0
-
-    def _get_decoded_chars(self, size):
-        if self.decoded_chars is None:
-            return u""
-
-        available = len(self.decoded_chars) - self.decoded_chars_used
-        if size < 0 or size > available:
-            size = available
-        assert size >= 0
-
-        if self.decoded_chars_used > 0 or size < available:
-            start = self.decoded_chars_used
-            end = self.decoded_chars_used + size
-            assert start >= 0
-            assert end >= 0
-            chars = self.decoded_chars[start:end]
-        else:
-            chars = self.decoded_chars
-
-        self.decoded_chars_used += size
-        return chars
-
-    def _has_data(self):
-        return (self.decoded_chars is not None and
-            self.decoded_chars_used < len(self.decoded_chars))
-
     def _read_chunk(self, space):
         """Read and decode the next chunk of data from the BufferedReader.
         The return value is True unless EOF was reached.  The decoded string
-        is placed in self._decoded_chars (replacing its previous value).
+        is placed in self.decoded (replacing its previous value).
         The entire input chunk is sent to the decoder, though some of it may
         remain buffered in the decoder, yet to be converted."""
 
@@ -572,7 +576,7 @@
             dec_buffer = None
             dec_flags = 0
 
-        # Read a chunk, decode it, and put the result in self._decoded_chars
+        # Read a chunk, decode it, and put the result in self.decoded
         w_input = space.call_method(self.w_buffer, "read1",
                                     space.newint(self.chunk_size))
 
@@ -584,7 +588,7 @@
         eof = space.len_w(w_input) == 0
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
-        self._set_decoded(space, w_decoded)
+        self.decoded.set(space, w_decoded)
         if space.len_w(w_decoded) > 0:
             eof = False
 
@@ -597,10 +601,10 @@
         return not eof
 
     def _ensure_data(self, space):
-        while not self._has_data():
+        while not self.decoded.has_data():
             try:
                 if not self._read_chunk(space):
-                    self._unset_decoded()
+                    self.decoded.reset()
                     self.snapshot = None
                     return False
             except OperationError as e:
@@ -633,7 +637,7 @@
             w_bytes = space.call_method(self.w_buffer, "read")
             w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
             check_decoded(space, w_decoded)
-            w_result = space.newunicode(self._get_decoded_chars(-1))
+            w_result = space.newunicode(self.decoded.get_chars(-1))
             w_final = space.add(w_result, w_decoded)
             self.snapshot = None
             return w_final
@@ -645,7 +649,7 @@
         while remaining > 0:
             if not self._ensure_data(space):
                 break
-            data = self._get_decoded_chars(remaining)
+            data = self.decoded.get_chars(remaining)
             builder.append(data)
             remaining -= len(data)
 
@@ -672,10 +676,10 @@
 
             if remnant:
                 assert not self.readtranslate and self.readnl == u'\r\n'
-                assert self.decoded_chars_used == 0
-                if remnant == u'\r' and self.decoded_chars[0] == u'\n':
+                assert self.decoded.pos == 0
+                if remnant == u'\r' and self.decoded.text[0] == u'\n':
                     builder.append(u'\r\n')
-                    self.decoded_chars_used = 1
+                    self.decoded.pos = 1
                     line = remnant = None
                     start = end_scan = 0
                     break
@@ -684,8 +688,8 @@
                     remnant = None
                     continue
 
-            line = self.decoded_chars
-            start = self.decoded_chars_used
+            line = self.decoded.text
+            start = self.decoded.pos
             if limit > 0:
                 remaining = limit - builder.getlength()
                 assert remaining >= 0
@@ -711,11 +715,11 @@
                 remnant = line[end_scan:]
             line = None
             # We have consumed the buffer
-            self._unset_decoded()
+            self.decoded.reset()
 
         if line:
             # Our line ends in the current buffer
-            self.decoded_chars_used = end_scan
+            self.decoded.pos = end_scan
             if start > 0 or end_scan < len(line):
                 line = line[start:end_scan]
             builder.append(line)
@@ -855,7 +859,7 @@
                 raise oefmt(space.w_IOError,
                             "can't do nonzero end-relative seeks")
             space.call_method(self, "flush")
-            self._unset_decoded()
+            self.decoded.reset()
             self.snapshot = None
             if self.w_decoder:
                 space.call_method(self.w_decoder, "reset")
@@ -880,7 +884,7 @@
         # Seek back to the safe start point
         space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos))
 
-        self._unset_decoded()
+        self.decoded.reset()
         self.snapshot = None
 
         # Restore the decoder to its state from the safe start point.
@@ -901,13 +905,13 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            self._set_decoded(space, w_decoded)
+            self.decoded.set(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
-            if len(self.decoded_chars) < cookie.chars_to_skip:
+            if len(self.decoded.text) < cookie.chars_to_skip:
                 raise oefmt(space.w_IOError,
                             "can't restore logical file position")
-            self.decoded_chars_used = cookie.chars_to_skip
+            self.decoded.pos = cookie.chars_to_skip
         else:
             self.snapshot = PositionSnapshot(cookie.dec_flags, "")
 
@@ -933,7 +937,7 @@
         w_pos = space.call_method(self.w_buffer, "tell")
 
         if self.w_decoder is None or self.snapshot is None:
-            assert not self.decoded_chars
+            assert not self.decoded.text
             return w_pos
 
         cookie = PositionCookie(space.bigint_w(w_pos))
@@ -944,11 +948,11 @@
         cookie.start_pos -= len(input)
 
         # How many decoded characters have been used up since the snapshot?
-        if not self.decoded_chars_used:
+        if not self.decoded.pos:
             # We haven't moved from the snapshot point.
             return space.newlong_from_rbigint(cookie.pack())
 
-        chars_to_skip = self.decoded_chars_used
+        chars_to_skip = self.decoded.pos
 
         # Starting from the snapshot position, we will walk the decoder
         # forward until it gives us enough decoded characters.


More information about the pypy-commit mailing list