[pypy-commit] pypy unicode-utf8: test, fix for StringIO(unicode).read(cnt)
mattip
pypy.commits at gmail.com
Wed Jan 16 17:40:22 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8
Changeset: r95655:6185982e509f
Date: 2019-01-16 23:42 +0200
http://bitbucket.org/pypy/pypy/changeset/6185982e509f/
Log: test, fix for StringIO(unicode).read(cnt)
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -1,4 +1,4 @@
-from rpython.rlib.rutf8 import get_utf8_length
+from rpython.rlib.rutf8 import get_utf8_length, next_codepoint_pos
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.typedef import (
@@ -11,8 +11,16 @@
class UnicodeIO(object):
def __init__(self, data=None, pos=0):
if data is None:
- data = []
- self.data = data
+ data = ''
+ self.data = []
+ self.pos = 0
+ # break the data into unicode codepoints
+ _pos = 0
+ while _pos < pos:
+ _pos = next_codepoint_pos(data, _pos)
+ if _pos >= len(data):
+ break
+ self.write(data[_pos:])
self.pos = pos
def resize(self, newlength):
@@ -85,12 +93,14 @@
return result
def write(self, string):
- length = len(string)
+ length = get_utf8_length(string)
if self.pos + length > len(self.data):
self.resize(self.pos + length)
-
+ pos = 0
for i in range(length):
- self.data[self.pos + i] = string[i]
+ nextpos = next_codepoint_pos(string, pos)
+ self.data[self.pos + i] = string[pos:nextpos]
+ pos = nextpos
self.pos += length
def seek(self, pos):
@@ -186,7 +196,7 @@
if pos < 0:
raise oefmt(space.w_ValueError,
"position value cannot be negative")
- self.buf = UnicodeIO(list(initval), pos)
+ self.buf = UnicodeIO(initval, pos)
if not space.is_w(w_dict, space.w_None):
if not space.isinstance_w(w_dict, space.w_dict):
raise oefmt(
diff --git a/pypy/module/_io/test/test_stringio.py b/pypy/module/_io/test/test_stringio.py
--- a/pypy/module/_io/test/test_stringio.py
+++ b/pypy/module/_io/test/test_stringio.py
@@ -42,6 +42,17 @@
assert buf[5:] == sio.read(900)
assert u"" == sio.read()
+ def test_read_binary(self):
+ # data is from a test_imghdr test for a GIF file
+ import io
+ buf_in = (u'\x47\x49\x46\x38\x39\x61\x10\x00\x10\x00\xf6\x64\x00\xeb'
+ u'\xbb\x18\xeb\xbe\x21\xf3\xc1\x1a\xfa\xc7\x19\xfd\xcb\x1b'
+ u'\xff\xcc\x1c\xeb')
+ assert len(buf_in) == 32
+ sio = io.StringIO(buf_in)
+ buf_out = sio.read(32)
+ assert buf_in == buf_out
+
def test_readline(self):
import io
sio = io.StringIO(u'123\n456')
More information about the pypy-commit
mailing list