[pypy-svn] r79432 - in pypy/branch/fast-forward/pypy/module/_io: . test
afa at codespeak.net
afa at codespeak.net
Tue Nov 23 19:57:32 CET 2010
Author: afa
Date: Tue Nov 23 19:57:30 2010
New Revision: 79432
Modified:
pypy/branch/fast-forward/pypy/module/_io/interp_textio.py
pypy/branch/fast-forward/pypy/module/_io/test/test_textio.py
Log:
Implement the most complex tell() function in the world.
Modified: pypy/branch/fast-forward/pypy/module/_io/interp_textio.py
==============================================================================
--- pypy/branch/fast-forward/pypy/module/_io/interp_textio.py (original)
+++ pypy/branch/fast-forward/pypy/module/_io/interp_textio.py Tue Nov 23 19:57:30 2010
@@ -230,7 +230,30 @@
)
class PositionCookie:
- pass
+ def __init__(self, bigint):
+ self.start_pos = bigint.ulonglongmask()
+ bigint = bigint.rshift(r_ulonglong.BITS)
+ self.dec_flags = 0
+ self.bytes_to_feed = 0
+ self.chars_to_skip = 0
+ self.need_eof = 0
+
+ def pack(self):
+ # The meaning of a tell() cookie is: seek to position, set the
+ # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
+ # into the decoder with need_eof as the EOF flag, then skip
+ # chars_to_skip characters of the decoded result. For most simple
+ # decoders, tell() will often just give a byte offset in the file.
+ return (self.start_pos |
+ (self.dec_flags<<64) |
+ (self.bytes_to_feed<<128) |
+ (self.chars_to_skip<<192) |
+ bool(self.need_eof)<<256)
+
+class PositionSnapshot:
+ def __init__(self, flags, input):
+ self.flags = flags
+ self.input = input
class W_TextIOWrapper(W_TextIOBase):
def __init__(self, space):
@@ -251,6 +274,7 @@
self.encodefunc = None # Specialized encoding func (see below)
self.encoding_start_of_stream = False # Whether or not it's the start
# of the stream
+ self.snapshot = None
@unwrap_spec('self', ObjSpace, W_Root, W_Root, W_Root, W_Root, int)
def descr_init(self, space, w_buffer, w_encoding=None,
@@ -418,8 +442,18 @@
if not self.w_decoder:
raise OperationError(space.w_IOError, space.wrap("not readable"))
- # XXX
- # if self.telling...
+ if self.telling:
+ # To prepare for tell(), we need to snapshot a point in the file
+ # where the decoder's input buffer is empty.
+ w_state = space.call_method(self.w_decoder, "getstate")
+ # Given this, we know there was a valid snapshot point
+ # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
+ w_dec_buffer, w_dec_flags = space.unpackiterable(w_state, 2)
+ dec_buffer = space.str_w(w_dec_buffer)
+ dec_flags = space.int_w(w_dec_flags)
+ else:
+ dec_buffer = None
+ dec_flags = 0
# Read a chunk, decode it, and put the result in self._decoded_chars
w_input = space.call_method(self.w_buffer, "read1",
@@ -431,8 +465,11 @@
if space.int_w(space.len(w_decoded)) > 0:
eof = False
- # XXX
- # if self.telling...
+ if self.telling:
+ # At the snapshot point, len(dec_buffer) bytes before the read,
+ # the next input to be decoded is dec_buffer + input_chunk.
+ next_input = dec_buffer + space.str_w(w_input)
+ self.snapshot = PositionSnapshot(dec_flags, next_input)
return not eof
@@ -675,26 +712,6 @@
# _____________________________________________________________
# seek/tell
- def _pack_cookie(self, start_pos, dec_flags=0,
- bytes_to_feed=0, need_eof=0, chars_to_skip=0):
- # The meaning of a tell() cookie is: seek to position, set the
- # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
- # into the decoder with need_eof as the EOF flag, then skip
- # chars_to_skip characters of the decoded result. For most simple
- # decoders, tell() will often just give a byte offset in the file.
- return (start_pos | (dec_flags<<64) | (bytes_to_feed<<128) |
- (chars_to_skip<<192) | bool(need_eof)<<256)
-
- def _unpack_cookie(self, bigint):
- cookie = PositionCookie()
- cookie.start_pos = bigint.ulonglongmask()
- bigint = bigint.rshift(r_ulonglong.BITS)
- cookie.dec_flags = 0
- cookie.bytes_to_feed = 0
- cookie.chars_to_skip = 0
- cookie.need_eof = 0
- return cookie
-
def _decoder_setstate(self, space, cookie):
# When seeking to the start of the stream, we call decoder.reset()
# rather than decoder.getstate().
@@ -760,7 +777,7 @@
# The strategy of seek() is to go back to the safe start point and
# replay the effect of read(chars_to_skip) from there.
- cookie = self._unpack_cookie(space.bigint_w(w_pos))
+ cookie = PositionCookie(space.bigint_w(w_pos))
# Seek back to the safe start point
space.call_method(self.w_buffer, "seek", space.wrap(cookie.start_pos))
@@ -775,8 +792,9 @@
if cookie.chars_to_skip:
# Just like _read_chunk, feed the decoder and save a snapshot.
w_chunk = space.call_method(self.w_buffer, "read",
- space.wrap(cookie.chars_to_feed))
- # XXX self.snapshot = cookie.dec_flags, w_chunk
+ space.wrap(cookie.bytes_to_feed))
+ self.snapshot = PositionSnapshot(cookie.dec_flags,
+ space.str_w(w_chunk))
w_decoded = space.call_method(self.w_decoder, "decode",
w_chunk, space.wrap(cookie.need_eof))
@@ -788,8 +806,7 @@
"can't restore logical file position"))
self.decoded_chars_used = cookie.chars_to_skip
else:
- # XXX self.snapshot = cookie.dec_flags, space.wrap(u"")
- pass
+ self.snapshot = PositionSnapshot(cookie.dec_flags, "")
# Finally, reset the encoder (merely useful for proper BOM handling)
if self.w_encoder:
@@ -797,6 +814,95 @@
return w_pos
+ @unwrap_spec('self', ObjSpace)
+ def tell_w(self, space):
+ self._check_closed(space)
+
+ if not self.seekable:
+ raise OperationError(space.w_IOError, space.wrap(
+ "underlying stream is not seekable"))
+
+ if not self.telling:
+ raise OperationError(space.w_IOError, space.wrap(
+ "telling position disabled by next() call"))
+
+ self._writeflush(space)
+ space.call_method(self, "flush")
+
+ w_pos = space.call_method(self.w_buffer, "tell")
+
+ if self.w_decoder is None or self.snapshot is None:
+ assert not self.decoded_chars
+ return w_pos
+
+ cookie = PositionCookie(space.bigint_w(w_pos))
+
+ # Skip backward to the snapshot point (see _read_chunk)
+ cookie.dec_flags = self.snapshot.flags
+ input = self.snapshot.input
+ cookie.start_pos -= len(input)
+
+ # How many decoded characters have been used up since the snapshot?
+ if not self.decoded_chars_used:
+ # We haven't moved from the snapshot point.
+ return space.wrap(cookie.pack())
+
+ chars_to_skip = self.decoded_chars_used
+
+ # Starting from the snapshot position, we will walk the decoder
+ # forward until it gives us enough decoded characters.
+ w_saved_state = space.call_method(self.w_decoder, "getstate")
+
+ try:
+ # Note our initial start point
+ self._decoder_setstate(space, cookie)
+
+ # Feed the decoder one byte at a time. As we go, note the nearest
+ # "safe start point" before the current location (a point where
+ # the decoder has nothing buffered, so seek() can safely start
+ # from there and advance to this location).
+
+ chars_decoded = 0
+ i = 0
+ while i < len(input):
+ w_decoded = space.call_method(self.w_decoder, "decode",
+ space.wrap(input[i]))
+ chars_decoded += len(space.unicode_w(w_decoded))
+
+ cookie.bytes_to_feed += 1
+
+ w_state = space.call_method(self.w_decoder, "getstate")
+ w_dec_buffer, w_flags = space.unpackiterable(w_state, 2)
+ dec_buffer_len = len(space.str_w(w_dec_buffer))
+
+ if dec_buffer_len == 0 and chars_decoded <= chars_to_skip:
+ # Decoder buffer is empty, so this is a safe start point.
+ cookie.start_pos += cookie.bytes_to_feed
+ chars_to_skip -= chars_decoded
+ assert chars_to_skip >= 0
+ cookie.dec_flags = space.int_w(w_flags)
+ cookie.bytes_to_feed = 0
+ chars_decoded = 0
+ if chars_decoded >= chars_to_skip:
+ break
+ i += 1
+ else:
+ # We didn't get enough decoded data; signal EOF to get more.
+ w_decoded = space.call_method(self.w_decoder, "decode",
+ space.wrap(""),
+ space.wrap(1)) # final=1
+ chars_decoded += len(space.unicode_w(w_decoded))
+ cookie.need_eof = 1
+
+ if chars_decoded < chars_to_skip:
+ raise OperationError(space.w_IOError, space.wrap(
+ "can't reconstruct logical file position"))
+ finally:
+ space.call_method(self.w_decoder, "setstate", w_saved_state)
+
+ # The returned cookie corresponds to the last safe start point.
+ cookie.chars_to_skip = chars_to_skip
+ return space.wrap(cookie.pack())
W_TextIOWrapper.typedef = TypeDef(
'TextIOWrapper', W_TextIOBase.typedef,
@@ -807,6 +913,7 @@
readline = interp2app(W_TextIOWrapper.readline_w),
write = interp2app(W_TextIOWrapper.write_w),
seek = interp2app(W_TextIOWrapper.seek_w),
+ tell = interp2app(W_TextIOWrapper.tell_w),
detach = interp2app(W_TextIOWrapper.detach_w),
flush = interp2app(W_TextIOWrapper.flush_w),
close = interp2app(W_TextIOWrapper.close_w),
Modified: pypy/branch/fast-forward/pypy/module/_io/test/test_textio.py
==============================================================================
--- pypy/branch/fast-forward/pypy/module/_io/test/test_textio.py (original)
+++ pypy/branch/fast-forward/pypy/module/_io/test/test_textio.py Tue Nov 23 19:57:30 2010
@@ -106,6 +106,14 @@
assert f.read() == data * 2
assert buf.getvalue() == (data * 2).encode(encoding)
+ def test_tell(self):
+ import _io
+ r = _io.BytesIO("abc\ndef\n")
+ t = _io.TextIOWrapper(r)
+ assert t.tell() == 0
+ t.read(4)
+ assert t.tell() == 4
+
def test_destructor(self):
import _io
l = []
More information about the Pypy-commit
mailing list