[pypy-commit] pypy unicode-utf8: whack at _io module

fijal pypy.commits at gmail.com
Fri Dec 8 06:11:02 EST 2017


Author: fijal
Branch: unicode-utf8
Changeset: r93308:7ffcfc6493e6
Date: 2017-12-08 10:38 +0200
http://bitbucket.org/pypy/pypy/changeset/7ffcfc6493e6/

Log:	whack at _io module

diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -1,3 +1,5 @@
+from rpython.rlib.rutf8 import get_utf8_length
+
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.typedef import (
     TypeDef, generic_new_descr, GetSetProperty)
@@ -152,7 +154,7 @@
         if self.readnl is None:
             w_readnl = space.w_None
         else:
-            w_readnl = space.str(space.new_from_utf8(self.readnl))  # YYY
+            w_readnl = space.str(space.newutf8(self.readnl, get_utf8_length(self.readnl)))  # YYY
         return space.newtuple([
             w_initialval, w_readnl, space.newint(self.buf.pos), w_dict
         ])
@@ -215,7 +217,8 @@
         if self.writenl:
             w_decoded = space.call_method(
                 w_decoded, "replace",
-                space.newtext("\n"), space.new_from_utf8(self.writenl))
+                space.newtext("\n"), space.newutf8(self.writenl,
+                    get_utf8_length(self.writenl)))
         string = space.utf8_w(w_decoded)
         if string:
             self.buf.write(string)
@@ -225,7 +228,9 @@
     def read_w(self, space, w_size=None):
         self._check_closed(space)
         size = convert_size(space, w_size)
-        return space.new_from_utf8(self.buf.read(size))
+        v = self.buf.read(size)
+        lgt = get_utf8_length(v)
+        return space.newutf8(v, lgt)
 
     def readline_w(self, space, w_limit=None):
         self._check_closed(space)
@@ -239,7 +244,8 @@
             else:
                 newline = self.readnl
             result = self.buf.readline(newline, limit)
-        return space.new_from_utf8(result)
+        resultlen = get_utf8_length(result)
+        return space.newutf8(result, resultlen)
 
 
     @unwrap_spec(pos=int, mode=int)
@@ -276,7 +282,9 @@
 
     def getvalue_w(self, space):
         self._check_closed(space)
-        return space.new_from_utf8(self.buf.getvalue())
+        v = self.buf.getvalue()
+        lgt = get_utf8_length(v)
+        return space.newutf8(v, lgt)
 
     def readable_w(self, space):
         self._check_closed(space)
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -12,7 +12,8 @@
 from rpython.rlib.rbigint import rbigint
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.rutf8 import (check_utf8, next_codepoint_pos,
-                                codepoints_in_utf8)
+                                codepoints_in_utf8, get_utf8_length,
+                                Utf8StringBuilder)
 
 
 STATE_ZERO, STATE_OK, STATE_DETACHED = range(3)
@@ -684,13 +685,15 @@
             w_bytes = space.call_method(self.w_buffer, "read")
             w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
             check_decoded(space, w_decoded)
-            w_result = space.new_from_utf8(self.decoded.get_chars(-1))
+            chars = self.decoded.get_chars(-1)
+            lgt = get_utf8_length(chars)
+            w_result = space.newutf8(chars, lgt)
             w_final = space.add(w_result, w_decoded)
             self.snapshot = None
             return w_final
 
         remaining = size
-        builder = StringBuilder(size)
+        builder = Utf8StringBuilder(size)
 
         # Keep reading chunks until we have n characters to return
         while remaining > 0:
@@ -700,7 +703,7 @@
             builder.append(data)
             remaining -= len(data)
 
-        return space.new_from_utf8(builder.build())
+        return space.newutf8(builder.build(), builder.get_length())
 
     def _scan_line_ending(self, limit):
         if self.readuniversal:
@@ -725,6 +728,7 @@
         limit = convert_size(space, w_limit)
         remnant = None
         builder = StringBuilder()
+        # XXX maybe use Utf8StringBuilder instead?
         while True:
             # First, get some data if necessary
             has_data = self._ensure_data(space)
@@ -771,7 +775,8 @@
             self.decoded.reset()
 
         result = builder.build()
-        return space.new_from_utf8(result)
+        lgt = get_utf8_length(result)
+        return space.newutf8(result, lgt)
 
     # _____________________________________________________________
     # write methods
@@ -794,8 +799,8 @@
             if text.find('\n') >= 0:
                 haslf = True
         if haslf and self.writetranslate and self.writenl:
-            w_text = space.call_method(w_text, "replace", space.new_from_utf8('\n'),
-                                       space.new_from_utf8(self.writenl))
+            w_text = space.call_method(w_text, "replace", space.newutf8('\n', 1),
+                                       space.newutf8(self.writenl, get_utf8_length(self.writenl)))
             text = space.utf8_w(w_text)
 
         needflush = False
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -212,9 +212,6 @@
     def newutf8(self, x, l):
         return w_some_obj()
 
-    def new_from_utf8(self, a):
-        return w_some_obj()
-
     def newunicode(self, a):
         return w_some_obj()
 


More information about the pypy-commit mailing list