[pypy-commit] pypy unicode-utf8: whack at _io module
fijal
pypy.commits at gmail.com
Fri Dec 8 06:11:02 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r93308:7ffcfc6493e6
Date: 2017-12-08 10:38 +0200
http://bitbucket.org/pypy/pypy/changeset/7ffcfc6493e6/
Log: whack at _io module
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -1,3 +1,5 @@
+from rpython.rlib.rutf8 import get_utf8_length
+
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.typedef import (
TypeDef, generic_new_descr, GetSetProperty)
@@ -152,7 +154,7 @@
if self.readnl is None:
w_readnl = space.w_None
else:
- w_readnl = space.str(space.new_from_utf8(self.readnl)) # YYY
+ w_readnl = space.str(space.newutf8(self.readnl, get_utf8_length(self.readnl))) # YYY
return space.newtuple([
w_initialval, w_readnl, space.newint(self.buf.pos), w_dict
])
@@ -215,7 +217,8 @@
if self.writenl:
w_decoded = space.call_method(
w_decoded, "replace",
- space.newtext("\n"), space.new_from_utf8(self.writenl))
+ space.newtext("\n"), space.newutf8(self.writenl,
+ get_utf8_length(self.writenl)))
string = space.utf8_w(w_decoded)
if string:
self.buf.write(string)
@@ -225,7 +228,9 @@
def read_w(self, space, w_size=None):
self._check_closed(space)
size = convert_size(space, w_size)
- return space.new_from_utf8(self.buf.read(size))
+ v = self.buf.read(size)
+ lgt = get_utf8_length(v)
+ return space.newutf8(v, lgt)
def readline_w(self, space, w_limit=None):
self._check_closed(space)
@@ -239,7 +244,8 @@
else:
newline = self.readnl
result = self.buf.readline(newline, limit)
- return space.new_from_utf8(result)
+ resultlen = get_utf8_length(result)
+ return space.newutf8(result, resultlen)
@unwrap_spec(pos=int, mode=int)
@@ -276,7 +282,9 @@
def getvalue_w(self, space):
self._check_closed(space)
- return space.new_from_utf8(self.buf.getvalue())
+ v = self.buf.getvalue()
+ lgt = get_utf8_length(v)
+ return space.newutf8(v, lgt)
def readable_w(self, space):
self._check_closed(space)
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -12,7 +12,8 @@
from rpython.rlib.rbigint import rbigint
from rpython.rlib.rstring import StringBuilder
from rpython.rlib.rutf8 import (check_utf8, next_codepoint_pos,
- codepoints_in_utf8)
+ codepoints_in_utf8, get_utf8_length,
+ Utf8StringBuilder)
STATE_ZERO, STATE_OK, STATE_DETACHED = range(3)
@@ -684,13 +685,15 @@
w_bytes = space.call_method(self.w_buffer, "read")
w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
check_decoded(space, w_decoded)
- w_result = space.new_from_utf8(self.decoded.get_chars(-1))
+ chars = self.decoded.get_chars(-1)
+ lgt = get_utf8_length(chars)
+ w_result = space.newutf8(chars, lgt)
w_final = space.add(w_result, w_decoded)
self.snapshot = None
return w_final
remaining = size
- builder = StringBuilder(size)
+ builder = Utf8StringBuilder(size)
# Keep reading chunks until we have n characters to return
while remaining > 0:
@@ -700,7 +703,7 @@
builder.append(data)
remaining -= len(data)
- return space.new_from_utf8(builder.build())
+ return space.newutf8(builder.build(), builder.get_length())
def _scan_line_ending(self, limit):
if self.readuniversal:
@@ -725,6 +728,7 @@
limit = convert_size(space, w_limit)
remnant = None
builder = StringBuilder()
+ # XXX maybe use Utf8StringBuilder instead?
while True:
# First, get some data if necessary
has_data = self._ensure_data(space)
@@ -771,7 +775,8 @@
self.decoded.reset()
result = builder.build()
- return space.new_from_utf8(result)
+ lgt = get_utf8_length(result)
+ return space.newutf8(result, lgt)
# _____________________________________________________________
# write methods
@@ -794,8 +799,8 @@
if text.find('\n') >= 0:
haslf = True
if haslf and self.writetranslate and self.writenl:
- w_text = space.call_method(w_text, "replace", space.new_from_utf8('\n'),
- space.new_from_utf8(self.writenl))
+ w_text = space.call_method(w_text, "replace", space.newutf8('\n', 1),
+ space.newutf8(self.writenl, get_utf8_length(self.writenl)))
text = space.utf8_w(w_text)
needflush = False
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -212,9 +212,6 @@
def newutf8(self, x, l):
return w_some_obj()
- def new_from_utf8(self, a):
- return w_some_obj()
-
def newunicode(self, a):
return w_some_obj()
More information about the pypy-commit
mailing list