[pypy-commit] pypy default: univ newline support for rfile read
bdkearns
noreply at buildbot.pypy.org
Tue Sep 9 01:37:24 CEST 2014
Author: Brian Kearns <bdkearns at gmail.com>
Branch:
Changeset: r73385:4d7e9cbb05f7
Date: 2014-09-08 18:12 -0400
http://bitbucket.org/pypy/pypy/changeset/4d7e9cbb05f7/
Log: univ newline support for rfile read
diff --git a/rpython/rlib/rfile.py b/rpython/rlib/rfile.py
--- a/rpython/rlib/rfile.py
+++ b/rpython/rlib/rfile.py
@@ -47,6 +47,11 @@
BASE_BUF_SIZE = 4096
BASE_LINE_SIZE = 100
+NEWLINE_UNKNOWN = 0
+NEWLINE_CR = 1
+NEWLINE_LF = 2
+NEWLINE_CRLF = 4
+
def llexternal(*args, **kwargs):
return rffi.llexternal(*args, compilation_info=eci, **kwargs)
@@ -128,10 +133,10 @@
def create_file(filename, mode="r", buffering=-1):
- mode = _sanitize_mode(mode)
+ newmode = _sanitize_mode(mode)
ll_name = rffi.str2charp(filename)
try:
- ll_mode = rffi.str2charp(mode)
+ ll_mode = rffi.str2charp(newmode)
try:
ll_file = c_fopen(ll_name, ll_mode)
if not ll_file:
@@ -150,14 +155,14 @@
c_setvbuf(ll_file, buf, _IOLBF, BUFSIZ)
else:
c_setvbuf(ll_file, buf, _IOFBF, buffering)
- return RFile(ll_file)
+ return RFile(ll_file, mode)
def create_fdopen_rfile(fd, mode="r"):
- mode = _sanitize_mode(mode)
+ newmode = _sanitize_mode(mode)
fd = rffi.cast(rffi.INT, fd)
rposix.validate_fd(fd)
- ll_mode = rffi.str2charp(mode)
+ ll_mode = rffi.str2charp(newmode)
try:
ll_file = c_fdopen(fd, ll_mode)
if not ll_file:
@@ -166,7 +171,7 @@
finally:
lltype.free(ll_mode, flavor='raw')
_dircheck(ll_file)
- return RFile(ll_file)
+ return RFile(ll_file, mode)
def create_temp_rfile():
@@ -194,8 +199,14 @@
class RFile(object):
- def __init__(self, ll_file, close2=_fclose2):
+ _univ_newline = False
+ _newlinetypes = NEWLINE_UNKNOWN
+ _skipnextlf = False
+
+ def __init__(self, ll_file, mode=None, close2=_fclose2):
self._ll_file = ll_file
+ if mode is not None:
+ self._univ_newline = 'U' in mode
self._close2 = close2
def __del__(self):
@@ -232,6 +243,52 @@
if not self._ll_file:
raise ValueError("I/O operation on closed file")
+ def _fread(self, buf, n, stream):
+ if not self._univ_newline:
+ return c_fread(buf, 1, n, stream)
+
+ i = 0
+ dst = buf
+ newlinetypes = self._newlinetypes
+ skipnextlf = self._skipnextlf
+ while n:
+ nread = c_fread(dst, 1, n, stream)
+ if nread == 0:
+ break
+
+ src = dst
+ n -= nread
+ shortread = n != 0
+ while nread:
+ nread -= 1
+ c = src[0]
+ src = rffi.ptradd(src, 1)
+ if c == '\r':
+ dst[0] = '\n'
+ dst = rffi.ptradd(dst, 1)
+ i += 1
+ skipnextlf = True
+ elif skipnextlf and c == '\n':
+ skipnextlf = False
+ newlinetypes |= NEWLINE_CRLF
+ n += 1
+ else:
+ if c == '\n':
+ newlinetypes |= NEWLINE_LF
+ elif skipnextlf:
+ newlinetypes |= NEWLINE_CR
+ dst[0] = c
+ dst = rffi.ptradd(dst, 1)
+ i += 1
+ skipnextlf = False
+ if shortread:
+ if skipnextlf and c_feof(stream):
+ newlinetypes |= NEWLINE_CR
+ break
+ self._newlinetypes = newlinetypes
+ self._skipnextlf = skipnextlf
+ return i
+
def read(self, size=-1):
# XXX CPython uses a more delicate logic here
self._check_closed()
@@ -244,7 +301,7 @@
try:
s = StringBuilder()
while True:
- returned_size = c_fread(buf, 1, BASE_BUF_SIZE, ll_file)
+ returned_size = self._fread(buf, BASE_BUF_SIZE, ll_file)
returned_size = intmask(returned_size) # is between 0 and BASE_BUF_SIZE
if returned_size == 0:
if c_feof(ll_file):
@@ -256,7 +313,7 @@
lltype.free(buf, flavor='raw')
else: # size > 0
with rffi.scoped_alloc_buffer(size) as buf:
- returned_size = c_fread(buf.raw, 1, size, ll_file)
+ returned_size = self._fread(buf.raw, size, ll_file)
returned_size = intmask(returned_size) # is between 0 and size
if returned_size == 0:
if not c_feof(ll_file):
diff --git a/rpython/rlib/test/test_rfile.py b/rpython/rlib/test/test_rfile.py
--- a/rpython/rlib/test/test_rfile.py
+++ b/rpython/rlib/test/test_rfile.py
@@ -189,6 +189,23 @@
f()
self.interpret(f, [])
+ def test_read_universal(self):
+ fname = self.tmpdir.join('read_univ')
+ fname.write("dupa\ndupb\r\ndupc")
+ fname = str(fname)
+
+ def f():
+ f = open(fname, 'U')
+ assert f.read() == "dupa\ndupb\ndupc"
+ f.close()
+ f = open(fname, 'U')
+ assert f.read(9) == "dupa\ndupb"
+ assert f.read(42) == "\ndupc"
+ f.close()
+
+ f()
+ self.interpret(f, [])
+
def test_seek(self):
fname = str(self.tmpdir.join('file_4'))
More information about the pypy-commit
mailing list