[pypy-commit] pypy utf8-unicode2: Fix _io

Sat Jul 19 14:42:01 CEST 2014

Author: Tyler Wade <wayedt at gmail.com>
Branch: utf8-unicode2
Changeset: r72470:ed2146bad83c
Date: 2014-07-17 23:18 -0500
http://bitbucket.org/pypy/pypy/changeset/ed2146bad83c/

Log:	Fix _io

diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -1,6 +1,7 @@
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.typedef import (
     TypeDef, generic_new_descr, GetSetProperty)
+from pypy.interpreter.utf8 import Utf8Str, utf8ord
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
 from pypy.module._io.interp_textio import W_TextIOBase, W_IncrementalNewlineDecoder
 from pypy.module._io.interp_iobase import convert_size
@@ -26,8 +27,8 @@
         else:
             newline = space.unicode_w(w_newline)
 
-        if (newline is not None and newline != u"" and newline != u"\n" and
-            newline != u"\r" and newline != u"\r\n"):
+        if (newline is not None and len(newline) != 0 and
+            newline not in (Utf8Str('\n'), Utf8Str('\r\n'), Utf8Str('\r'))):
             # Not using oefmt() because I don't know how to ues it
             # with unicode
             raise OperationError(space.w_ValueError,
@@ -37,9 +38,9 @@
             )
         if newline is not None:
             self.readnl = newline
-        self.readuniversal = newline is None or newline == u""
+        self.readuniversal = newline is None or len(newline) == 0
         self.readtranslate = newline is None
-        if newline and newline[0] == u"\r":
+        if newline and utf8ord(newline) == ord("\r"):
             self.writenl = newline
         if self.readuniversal:
             self.w_decoder = space.call_function(
@@ -112,7 +113,7 @@
         if len(self.buf) > newlength:
             self.buf = self.buf[:newlength]
         if len(self.buf) < newlength:
-            self.buf.extend([u'\0'] * (newlength - len(self.buf)))
+            self.buf.extend([Utf8Str('\0')] * (newlength - len(self.buf)))
 
     def write(self, string):
         length = len(string)
@@ -156,21 +157,21 @@
         start = self.pos
         available = len(self.buf) - start
         if available <= 0:
-            return space.wrap(u"")
+            return space.wrap(Utf8Str(""))
         if size >= 0 and size <= available:
             end = start + size
         else:
             end = len(self.buf)
         assert 0 <= start <= end
         self.pos = end
-        return space.wrap(u''.join(self.buf[start:end]))
+        return space.wrap(Utf8Str('').join(self.buf[start:end]))
 
     def readline_w(self, space, w_limit=None):
         self._check_closed(space)
         limit = convert_size(space, w_limit)
 
         if self.pos >= len(self.buf):
-            return space.wrap(u"")
+            return space.wrap(Utf8Str(""))
 
         start = self.pos
         if limit < 0 or limit > len(self.buf) - self.pos:
@@ -181,7 +182,7 @@
 
         endpos, consumed = self._find_line_ending(
             # XXX: super inefficient, makes a copy of the entire contents.
-            u"".join(self.buf),
+            Utf8Str("").join(self.buf),
             start,
             end
         )
@@ -191,7 +192,7 @@
             endpos = end
         assert endpos >= 0
         self.pos = endpos
-        return space.wrap(u"".join(self.buf[start:endpos]))
+        return space.wrap(Utf8Str("").join(self.buf[start:endpos]))
 
     @unwrap_spec(pos=int, mode=int)
     def seek_w(self, space, pos, mode=0):
@@ -234,7 +235,7 @@
 
     def getvalue_w(self, space):
         self._check_closed(space)
-        return space.wrap(u''.join(self.buf))
+        return space.wrap(Utf8Str('').join(self.buf))
 
     def readable_w(self, space):
         self._check_closed(space)
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -6,11 +6,11 @@
 from pypy.interpreter.typedef import (
     GetSetProperty, TypeDef, generic_new_descr, interp_attrproperty,
     interp_attrproperty_w)
+from pypy.interpreter.utf8 import Utf8Str, Utf8Builder, utf8ord
 from pypy.module._codecs import interp_codecs
 from pypy.module._io.interp_iobase import W_IOBase, convert_size, trap_eintr
 from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong
 from rpython.rlib.rbigint import rbigint
-from rpython.rlib.rstring import UnicodeBuilder
 
 
 STATE_ZERO, STATE_OK, STATE_DETACHED = range(3)
@@ -29,17 +29,17 @@
 
     def __init__(self, space):
         self.w_newlines_dict = {
-            SEEN_CR: space.wrap(u"\r"),
-            SEEN_LF: space.wrap(u"\n"),
-            SEEN_CRLF: space.wrap(u"\r\n"),
+            SEEN_CR: space.wrap(Utf8Str("\r")),
+            SEEN_LF: space.wrap(Utf8Str("\n")),
+            SEEN_CRLF: space.wrap(Utf8Str("\r\n")),
             SEEN_CR | SEEN_LF: space.newtuple(
-                [space.wrap(u"\r"), space.wrap(u"\n")]),
+                [space.wrap(Utf8Str("\r")), space.wrap(Utf8Str("\n"))]),
             SEEN_CR | SEEN_CRLF: space.newtuple(
-                [space.wrap(u"\r"), space.wrap(u"\r\n")]),
+                [space.wrap(Utf8Str("\r")), space.wrap(Utf8Str("\r\n"))]),
             SEEN_LF | SEEN_CRLF: space.newtuple(
-                [space.wrap(u"\n"), space.wrap(u"\r\n")]),
+                [space.wrap(Utf8Str("\n")), space.wrap(Utf8Str("\r\n"))]),
             SEEN_CR | SEEN_LF | SEEN_CRLF: space.newtuple(
-                [space.wrap(u"\r"), space.wrap(u"\n"), space.wrap(u"\r\n")]),
+                [space.wrap(Utf8Str("\r")), space.wrap(Utf8Str("\n")), space.wrap(Utf8Str("\r\n"))]),
             }
 
     @unwrap_spec(translate=int)
@@ -76,7 +76,7 @@
         output = space.unicode_w(w_output)
         output_len = len(output)
         if self.pendingcr and (final or output_len):
-            output = u'\r' + output
+            output = Utf8Str('\r') + output
             self.pendingcr = False
             output_len += 1
 
@@ -85,13 +85,13 @@
         if not final and output_len > 0:
             last = output_len - 1
             assert last >= 0
-            if output[last] == u'\r':
+            if output[last] == Utf8Str('\r'):
                 output = output[:last]
                 self.pendingcr = True
                 output_len -= 1
 
         if output_len == 0:
-            return space.wrap(u"")
+            return space.wrap(Utf8Str(""))
 
         # Record which newlines are read and do newline translation if
         # desired, all in one pass.
@@ -101,12 +101,12 @@
         # for the \r
         only_lf = False
         if seennl == SEEN_LF or seennl == 0:
-            only_lf = (output.find(u'\r') < 0)
+            only_lf = (output.find(Utf8Str('\r')) < 0)
 
         if only_lf:
             # If not already seen, quick scan for a possible "\n" character.
             # (there's nothing else to be done, even when in translation mode)
-            if seennl == 0 and output.find(u'\n') >= 0:
+            if seennl == 0 and output.find('\n') >= 0:
                 seennl |= SEEN_LF
                 # Finished: we have scanned for newlines, and none of them
                 # need translating.
@@ -115,32 +115,32 @@
             while i < output_len:
                 if seennl == SEEN_ALL:
                     break
-                c = output[i]
+                c = utf8ord(output, i)
                 i += 1
-                if c == u'\n':
+                if c == ord('\n'):
                     seennl |= SEEN_LF
-                elif c == u'\r':
-                    if i < output_len and output[i] == u'\n':
+                elif c == ord('\r'):
+                    if i < output_len and utf8ord(output, i) == ord('\n'):
                         seennl |= SEEN_CRLF
                         i += 1
                     else:
                         seennl |= SEEN_CR
-        elif output.find(u'\r') >= 0:
+        elif output.find('\r') >= 0:
             # Translate!
-            builder = UnicodeBuilder(output_len)
+            builder = Utf8Builder(output_len)
             i = 0
             while i < output_len:
-                c = output[i]
+                c = utf8ord(output, i)
                 i += 1
-                if c == u'\n':
+                if c == ord('\n'):
                     seennl |= SEEN_LF
-                elif c == u'\r':
-                    if i < output_len and output[i] == u'\n':
+                elif c == ord('\r'):
+                    if i < output_len and utf8ord(output, i) == ord('\n'):
                         seennl |= SEEN_CRLF
                         i += 1
                     else:
                         seennl |= SEEN_CR
-                    builder.append(u'\n')
+                    builder.append('\n')
                     continue
                 builder.append(c)
             output = builder.build()
@@ -217,7 +217,7 @@
         if self.readtranslate:
 
             # Newlines are already translated, only search for \n
-            pos = line.find(u'\n', start, end)
+            pos = line.find('\n', start, end)
             if pos >= 0:
                 return pos - start + 1, 0
             else:
@@ -229,16 +229,16 @@
             while True:
                 # Fast path for non-control chars. The loop always ends
                 # since the Py_UNICODE storage is NUL-terminated.
-                while i < size and line[start + i] > '\r':
+                while i < size and utf8ord(line, start + i) > ord('\r'):
                     i += 1
                 if i >= size:
                     return -1, size
-                ch = line[start + i]
+                ch = utf8ord(line, start + i)
                 i += 1
-                if ch == '\n':
+                if ch == ord('\n'):
                     return i, 0
-                if ch == '\r':
-                    if line[start + i] == '\n':
+                if ch == ord('\r'):
+                    if utf8ord(line, start + i) == ord('\n'):
                         return i + 1, 0
                     else:
                         return i, 0
@@ -371,7 +371,8 @@
             newline = None
         else:
             newline = space.unicode_w(w_newline)
-        if newline and newline not in (u'\n', u'\r\n', u'\r'):
+        if newline and newline not in (Utf8Str('\n'), Utf8Str('\r\n'),
+                                       Utf8Str('\r')):
             r = space.str_w(space.repr(w_newline))
             raise OperationError(space.w_ValueError, space.wrap(
                 "illegal newline value: %s" % (r,)))
@@ -382,13 +383,13 @@
         self.readtranslate = newline is None
         self.readnl = newline
 
-        self.writetranslate = (newline != u'')
+        self.writetranslate = (newline != Utf8Str(''))
         if not self.readuniversal:
             self.writenl = self.readnl
-            if self.writenl == u'\n':
+            if self.writenl == Utf8Str('\n'):
                 self.writenl = None
         elif _WINDOWS:
-            self.writenl = u"\r\n"
+            self.writenl = Utf8Str("\r\n")
         else:
             self.writenl = None
 
@@ -508,7 +509,7 @@
 
     def _get_decoded_chars(self, size):
         if self.decoded_chars is None:
-            return u""
+            return Utf8Str("")
 
         available = len(self.decoded_chars) - self.decoded_chars_used
         if size < 0 or size > available:
@@ -603,7 +604,7 @@
             return w_final
 
         remaining = size
-        builder = UnicodeBuilder(size)
+        builder = Utf8Builder(size)
 
         # Keep reading chunks until we have n characters to return
         while True:
@@ -710,12 +711,12 @@
         if chunks:
             if line:
                 chunks.append(line)
-            line = u''.join(chunks)
+            line = Utf8Str('').join(chunks)
 
         if line:
             return space.wrap(line)
         else:
-            return space.wrap(u'')
+            return space.wrap(Utf8Str(''))
 
     # _____________________________________________________________
     # write methods
@@ -736,15 +737,16 @@
 
         haslf = False
         if (self.writetranslate and self.writenl) or self.line_buffering:
-            if text.find(u'\n') >= 0:
+            if text.find('\n') >= 0:
                 haslf = True
         if haslf and self.writetranslate and self.writenl:
-            w_text = space.call_method(w_text, "replace", space.wrap(u'\n'),
+            w_text = space.call_method(w_text, "replace",
+                                       space.wrap(Utf8Str('\n')),
                                        space.wrap(self.writenl))
             text = space.unicode_w(w_text)
 
         needflush = False
-        if self.line_buffering and (haslf or text.find(u'\r') >= 0):
+        if self.line_buffering and (haslf or text.find('\r') >= 0):
             needflush = True
 
         # XXX What if we were just reading?