[pypy-commit] pypy default: Finish the reader logic. Mostly untested so far, apart from the

arigo noreply at buildbot.pypy.org
Mon Sep 24 18:10:22 CEST 2012


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r57498:47511fcae6a9
Date: 2012-09-24 15:32 +0200
http://bitbucket.org/pypy/pypy/changeset/47511fcae6a9/

Log:	Finish the reader logic. Mostly untested so far, apart from the one
	test in test_reader.

diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -1,7 +1,10 @@
+from pypy.rlib.rstring import StringBuilder
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.gateway import NoneNotWrapped
 from pypy.interpreter.typedef import TypeDef, interp2app
+from pypy.interpreter.typedef import interp_attrproperty_w, interp_attrproperty
+from pypy.module._csv.interp_csv import _build_dialect
 from pypy.module._csv.interp_csv import (QUOTE_MINIMAL, QUOTE_ALL,
                                          QUOTE_NONNUMERIC, QUOTE_NONE)
 
@@ -15,6 +18,9 @@
     w_error = space.getattr(w_module, space.wrap('Error'))
     raise OperationError(w_error, space.wrap(msg))
 
+def new_field_builder():
+    return StringBuilder(64)
+
 
 class W_Reader(Wrappable):
 
@@ -27,35 +33,58 @@
     def iter_w(self):
         return self.space.wrap(self)
 
+    def save_field(self, field_builder):
+        field = field_builder.build()
+        if self.numeric_field:
+            from pypy.objspace.std.strutil import ParseStringError
+            from pypy.objspace.std.strutil import string_to_float
+            self.numeric_field = False
+            try:
+                ff = string_to_float(field)
+            except ParseStringError, e:
+                raise OperationError(space.w_ValueError,
+                                     space.wrap(e.msg))
+            w_obj = self.space.wrap(ff)
+        else:
+            w_obj = self.space.wrap(field)
+        self.fields_w.append(w_obj)
+
     def next_w(self):
         space = self.space
         dialect = self.dialect
         self.fields_w = []
         self.numeric_field = False
-        field = ''
+        field_builder = None  # valid iff state not in [START_RECORD, EAT_CRNL]
         state = START_RECORD
         #
         while True:
             try:
                 w_line = space.next(self.w_iter)
             except OperationError, e:
-                if e.match(space, space.w_StopIteration) and len(field) > 0:
-                    raise error("newline inside string")
+                if e.match(space, space.w_StopIteration):
+                    if field_builder is not None:
+                        raise error("newline inside string")
                 raise
             self.line_num += 1
             line = space.str_w(w_line)
             for c in line:
+                if c == '\0':
+                    raise error("line contains NULL byte")
+
                 if state == START_RECORD:
                     if c == '\n' or c == '\r':
                         state = EAT_CRNL
                         continue
                     # normal character - handle as START_FIELD
                     state = START_FIELD
+                    # fall-through to the next case
+
                 if state == START_FIELD:
+                    field_builder = new_field_builder()
                     # expecting field
                     if c == '\n' or c == '\r':
                         # save empty field
-                        assert len(field) == 0; self.save_field('')
+                        self.save_field(field_builder)
                         state = EAT_CRNL
                     elif (c == dialect.quotechar and
                               dialect.quoting != QUOTE_NONE):
@@ -69,12 +98,101 @@
                         pass
                     elif c == dialect.delimiter:
                         # save empty field
-                        assert len(field) == 0; self.save_field('')
+                        self.save_field(field_builder)
                     else:
                         # begin new unquoted field
                         if dialect.quoting == QUOTE_NONNUMERIC:
                             self.numeric_field = True
-                        field += .....
+                        field_builder.append(c)
+                        state = IN_FIELD
+
+                elif state == ESCAPED_CHAR:
+                    field_builder.append(c)
+                    state = IN_FIELD
+
+                elif state == IN_FIELD:
+                    # in unquoted field
+                    if c == '\n' or c == '\r':
+                        # end of line
+                        self.save_field(field_builder)
+                        state = EAT_CRNL
+                    elif c == dialect.escapechar:
+                        # possible escaped character
+                        state = ESCAPED_CHAR
+                    elif c == dialect.delimiter:
+                        # save field - wait for new field
+                        self.save_field(field_builder)
+                        state = START_FIELD
+                    else:
+                        # normal character - save in field
+                        field_builder.append(c)
+
+                elif state == IN_QUOTED_FIELD:
+                    # in quoted field
+                    if c == dialect.escapechar:
+                        # Possible escape character
+                        state = ESCAPE_IN_QUOTED_FIELD
+                    elif (c == dialect.quotechar and
+                              dialect.quoting != QUOTE_NONE):
+                        if dialect.doublequote:
+                            # doublequote; " represented by ""
+                            state = QUOTE_IN_QUOTED_FIELD
+                        else:
+                            # end of quote part of field
+                            state = IN_FIELD
+                    else:
+                        # normal character - save in field
+                        field_builder.append(c)
+
+                elif state == ESCAPE_IN_QUOTED_FIELD:
+                    field_builder.append(c)
+                    state = IN_QUOTED_FIELD
+
+                elif state == QUOTE_IN_QUOTED_FIELD:
+                    # doublequote - seen a quote in an quoted field
+                    if (dialect.quoting != QUOTE_NONE and
+                            c == dialect.quotechar):
+                        # save "" as "
+                        field_builder.append(c)
+                        state = IN_QUOTED_FIELD
+                    elif c == dialect.delimiter:
+                        # save field - wait for new field
+                        self.save_field(field_builder)
+                        state = START_FIELD
+                    elif c == '\n' or c == '\r':
+                        # end of line
+                        self.save_field(field_builder)
+                        state = EAT_CRNL
+                    elif not dialect.strict:
+                        field_builder.append(c)
+                        state = IN_FIELD
+                    else:
+                        # illegal
+                        raise error("'%s' expected after '%s'" %
+                                    dialect.delimiter,
+                                    dialect.quotechar)
+
+                elif state == EAT_CRNL:
+                    if not (c == '\n' or c == '\r'):
+                        raise error("new-line character seen in unquoted "
+                                    "field - do you need to open the file "
+                                    "in universal-newline mode?")
+
+            if (state == START_FIELD or
+                  state == IN_FIELD or
+                  state == QUOTE_IN_QUOTED_FIELD):
+                self.save_field()
+                break
+            elif state == ESCAPED_CHAR:
+                field_builder.append('\n')
+                state = IN_FIELD
+            elif state == IN_QUOTED_FIELD:
+                pass
+            elif state == ESCAPE_IN_QUOTED_FIELD:
+                field_builder.append('\n')
+                state = IN_QUOTED_FIELD
+            else:
+                break
         #
         w_result = space.newlist(self.fields_w)
         self.fields_w = None


More information about the pypy-commit mailing list