[pypy-commit] pypy length-hint: merge default

pjenvey noreply at buildbot.pypy.org
Mon Sep 24 19:49:46 CEST 2012


Author: Philip Jenvey <pjenvey at underboss.org>
Branch: length-hint
Changeset: r57512:c4b0048a639b
Date: 2012-09-24 10:02 -0700
http://bitbucket.org/pypy/pypy/changeset/c4b0048a639b/

Log:	merge default

diff too long, truncating to 2000 out of 4707 lines

diff --git a/lib-python/2.7/test/test_csv.py b/lib-python/2.7/test/test_csv.py
--- a/lib-python/2.7/test/test_csv.py
+++ b/lib-python/2.7/test/test_csv.py
@@ -20,7 +20,8 @@
     """
     def _test_arg_valid(self, ctor, arg):
         self.assertRaises(TypeError, ctor)
-        self.assertRaises(TypeError, ctor, None)
+        # PyPy gets an AttributeError instead of a TypeError
+        self.assertRaises((TypeError, AttributeError), ctor, None)
         self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
         self.assertRaises(TypeError, ctor, arg, delimiter = 0)
         self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
@@ -59,7 +60,8 @@
         self.assertRaises((TypeError, AttributeError), setattr, obj.dialect,
                           'delimiter', ':')
         self.assertRaises(AttributeError, delattr, obj.dialect, 'quoting')
-        self.assertRaises(AttributeError, setattr, obj.dialect,
+        # PyPy gets a TypeError instead of an AttributeError
+        self.assertRaises((AttributeError, TypeError), setattr, obj.dialect,
                           'quoting', None)
 
     def test_reader_attrs(self):
@@ -133,7 +135,8 @@
             os.unlink(name)
 
     def test_write_arg_valid(self):
-        self.assertRaises(csv.Error, self._write_test, None, '')
+        # PyPy gets a TypeError instead of a csv.Error for "not a sequence"
+        self.assertRaises((csv.Error, TypeError), self._write_test, None, '')
         self._write_test((), '')
         self._write_test([None], '""')
         self.assertRaises(csv.Error, self._write_test,
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -183,7 +183,7 @@
     RegrTest('test_cpickle.py', core=True),
     RegrTest('test_cprofile.py'), 
     RegrTest('test_crypt.py', usemodules='crypt', skip=skip_win32),
-    RegrTest('test_csv.py'),
+    RegrTest('test_csv.py', usemodules='_csv'),
 
     RegrTest('test_curses.py', skip="unsupported extension module"),
     RegrTest('test_datetime.py'),
diff --git a/lib_pypy/_csv.py b/lib_pypy/_csv.py
--- a/lib_pypy/_csv.py
+++ b/lib_pypy/_csv.py
@@ -363,9 +363,7 @@
                             (self.dialect.delimiter, self.dialect.quotechar))
 
         elif self.state == self.EAT_CRNL:
-            if c in '\r\n':
-                pass
-            else:
+            if c not in '\r\n':
                 raise Error("new-line character seen in unquoted field - "
                             "do you need to open the file "
                             "in universal-newline mode?")
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -81,7 +81,9 @@
         addr = self._buffer[0]
         if addr == 0:
             raise ValueError("NULL pointer access")
-        return self._type_.from_address(addr)
+        instance = self._type_.from_address(addr)
+        instance.__dict__['_base'] = self
+        return instance
 
     def setcontents(self, value):
         if not isinstance(value, self._type_):
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -34,7 +34,7 @@
      "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
      "_bisect", "binascii", "_multiprocessing", '_warnings',
      "_collections", "_multibytecodec", "micronumpy", "_ffi",
-     "_continuation", "_cffi_backend"]
+     "_continuation", "_cffi_backend", "_csv"]
 ))
 
 translation_modules = default_modules.copy()
diff --git a/pypy/doc/jit/pyjitpl5.rst b/pypy/doc/jit/pyjitpl5.rst
--- a/pypy/doc/jit/pyjitpl5.rst
+++ b/pypy/doc/jit/pyjitpl5.rst
@@ -149,7 +149,7 @@
 
 A *virtual* value is an array, struct, or RPython level instance that is created
 during the loop and does not escape from it via calls or longevity past the
-loop.  Since it is only used by the JIT, it be "optimized out"; the value
+loop.  Since it is only used by the JIT, it can be "optimized out"; the value
 doesn't have to be allocated at all and its fields can be stored as first class
 values instead of deferencing them in memory.  Virtuals allow temporary objects
 in the interpreter to be unwrapped.  For example, a W_IntObject in the PyPy can
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -21,7 +21,7 @@
 -------------------------
 
 PyPy's implementation of the Python ``long`` type is slower than CPython's.
-Find out why and optimize them.
+Find out why and optimize them.  **UPDATE:** this was done (thanks stian).
 
 Make bytearray type fast
 ------------------------
@@ -103,13 +103,32 @@
 
 * A concurrent garbage collector (a lot of work)
 
-STM, a.k.a. "remove the GIL"
-----------------------------
+STM (Software Transactional Memory)
+-----------------------------------
 
-Removing the GIL --- or more precisely, a GIL-less thread-less solution ---
-is `now work in progress.`__  Contributions welcome.
+This is work in progress.  Besides the main development path, whose goal is
+to make a (relatively fast) version of pypy which includes STM, there are
+independent topics that can already be experimented with on the existing,
+JIT-less pypy-stm version:
+  
+* What kind of conflicts do we get in real use cases?  And, sometimes,
+  which data structures would be more appropriate?  For example, a dict
+  implemented as a hash table will suffer "stm collisions" in all threads
+  whenever one thread writes anything to it; but there could be other
+  implementations.
 
-.. __: http://pypy.org/tmdonate.html
+* More generally, there is the idea that we would need some kind of
+  "debugger"-like tool to "debug" things that are not bugs, but stm
+  conflicts.  How would this tool look like to the end Python
+  programmers?  Like a profiler?  Or like a debugger with breakpoints
+  on aborted transactions?
+
+* Find good ways to have libraries using internally threads and atomics,
+  but not exposing threads to the user.  Right now there is a rough draft
+  in ``lib_pypy/transaction.py``, but much better is possible.  For example
+  we could probably have an iterator-like concept that allows each loop
+  iteration to run in parallel.
+
 
 Introduce new benchmarks
 ------------------------
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -18,6 +18,8 @@
 .. branch: numpypy_count_nonzero
 .. branch: numpy-refactor
 Remove numpy lazy evaluation and simplify everything
+.. branch: numpy-fancy-indexing
+Support for array[array-of-ints] in numpy
 .. branch: even-more-jit-hooks
 Implement better JIT hooks
 .. branch: virtual-arguments
diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py
--- a/pypy/interpreter/astcompiler/assemble.py
+++ b/pypy/interpreter/astcompiler/assemble.py
@@ -65,24 +65,44 @@
         self.marked = False
         self.have_return = False
 
-    def _post_order(self, blocks):
-        if self.marked:
-            return
-        self.marked = True
-        if self.next_block is not None:
-            self.next_block._post_order(blocks)
-        for instr in self.instructions:
-            if instr.has_jump:
-                instr.jump[0]._post_order(blocks)
-        blocks.append(self)
-        self.marked = True
+    def _post_order_see(self, stack, nextblock):
+        if nextblock.marked == 0:
+            nextblock.marked = 1
+            stack.append(nextblock)
 
     def post_order(self):
-        """Return this block and its children in post order."""
-        blocks = []
-        self._post_order(blocks)
-        blocks.reverse()
-        return blocks
+        """Return this block and its children in post order.
+        This means that the graph of blocks is first cleaned up to
+        ignore back-edges, thus turning it into a DAG.  Then the DAG
+        is linearized.  For example:
+
+                   A --> B -\           =>     [A, D, B, C]
+                     \-> D ---> C
+        """
+        resultblocks = []
+        stack = [self]
+        self.marked = 1
+        while stack:
+            current = stack[-1]
+            if current.marked == 1:
+                current.marked = 2
+                if current.next_block is not None:
+                    self._post_order_see(stack, current.next_block)
+            else:
+                i = current.marked - 2
+                assert i >= 0
+                while i < len(current.instructions):
+                    instr = current.instructions[i]
+                    i += 1
+                    if instr.has_jump:
+                        current.marked = i + 2
+                        self._post_order_see(stack, instr.jump[0])
+                        break
+                else:
+                    resultblocks.append(current)
+                    stack.pop()
+        resultblocks.reverse()
+        return resultblocks
 
     def code_size(self):
         """Return the encoded size of all the instructions in this block."""
@@ -353,20 +373,26 @@
     def _stacksize(self, blocks):
         """Compute co_stacksize."""
         for block in blocks:
-            block.marked = False
-            block.initial_depth = -1000
-        return self._recursive_stack_depth_walk(blocks[0], 0, 0)
+            block.initial_depth = 0
+        # Assumes that it is sufficient to walk the blocks in 'post-order'.
+        # This means we ignore all back-edges, but apart from that, we only
+        # look into a block when all the previous blocks have been done.
+        self._max_depth = 0
+        for block in blocks:
+            self._do_stack_depth_walk(block)
+        return self._max_depth
 
-    def _recursive_stack_depth_walk(self, block, depth, max_depth):
-        if block.marked or block.initial_depth >= depth:
-            return max_depth
-        block.marked = True
-        block.initial_depth = depth
+    def _next_stack_depth_walk(self, nextblock, depth):
+        if depth > nextblock.initial_depth:
+            nextblock.initial_depth = depth
+
+    def _do_stack_depth_walk(self, block):
+        depth = block.initial_depth
         done = False
         for instr in block.instructions:
             depth += _opcode_stack_effect(instr.opcode, instr.arg)
-            if depth >= max_depth:
-                max_depth = depth
+            if depth >= self._max_depth:
+                self._max_depth = depth
             if instr.has_jump:
                 target_depth = depth
                 jump_op = instr.opcode
@@ -376,20 +402,15 @@
                       jump_op == ops.SETUP_EXCEPT or
                       jump_op == ops.SETUP_WITH):
                     target_depth += 3
-                    if target_depth > max_depth:
-                        max_depth = target_depth
-                max_depth = self._recursive_stack_depth_walk(instr.jump[0],
-                                                             target_depth,
-                                                             max_depth)
+                    if target_depth > self._max_depth:
+                        self._max_depth = target_depth
+                self._next_stack_depth_walk(instr.jump[0], target_depth)
                 if jump_op == ops.JUMP_ABSOLUTE or jump_op == ops.JUMP_FORWARD:
                     # Nothing more can occur.
                     done = True
                     break
         if block.next_block and not done:
-            max_depth = self._recursive_stack_depth_walk(block.next_block,
-                                                         depth, max_depth)
-        block.marked = False
-        return max_depth
+            max_depth = self._next_stack_depth_walk(block.next_block, depth)
 
     def _build_lnotab(self, blocks):
         """Build the line number table for tracebacks and tracing."""
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -778,6 +778,10 @@
             raise AssertionError("attribute not removed")"""
         yield self.st, test, "X.__name__", "X"
 
+    def test_lots_of_loops(self):
+        source = "for x in y: pass\n" * 1000
+        compile_with_astcompiler(source, 'exec', self.space)
+
 
 class AppTestCompiler:
 
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -590,10 +590,6 @@
                 w_exc = self.getitem(w_dic, w_name)
                 exc_types_w[name] = w_exc
                 setattr(self, "w_" + excname, w_exc)
-        # Make a prebuilt recursion error
-        w_msg = self.wrap("maximum recursion depth exceeded")
-        self.prebuilt_recursion_error = OperationError(self.w_RuntimeError,
-                                                       w_msg)
         return exc_types_w
 
     def install_mixedmodule(self, mixedname, installed_builtin_modules):
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -21,9 +21,7 @@
     _application_traceback = None
 
     def __init__(self, w_type, w_value, tb=None):
-        if not we_are_translated() and w_type is None:
-            from pypy.tool.error import FlowingError
-            raise FlowingError(w_value)
+        assert w_type is not None
         self.setup(w_type)
         self._w_value = w_value
         self._application_traceback = tb
@@ -327,9 +325,7 @@
                 self.xstrings = strings
                 for i, attr in entries:
                     setattr(self, attr, args[i])
-                if not we_are_translated() and w_type is None:
-                    from pypy.tool.error import FlowingError
-                    raise FlowingError(self._compute_value())
+                assert w_type is not None
             def _compute_value(self):
                 lst = [None] * (len(formats) + len(formats) + 1)
                 for i, attr in entries:
@@ -393,7 +389,7 @@
         return OperationError(exc, w_error)
 
 def wrap_oserror2(space, e, w_filename=None, exception_name='w_OSError',
-                  w_exception_class=None): 
+                  w_exception_class=None):
     assert isinstance(e, OSError)
 
     if _WINDOWS and isinstance(e, WindowsError):
diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -153,17 +153,13 @@
             self._trace(frame, 'exception', None, operationerr)
         #operationerr.print_detailed_traceback(self.space)
 
-    def _convert_exc(self, operr):
-        # Only for the flow object space
-        return operr
-
     def sys_exc_info(self): # attn: the result is not the wrapped sys.exc_info() !!!
         """Implements sys.exc_info().
         Return an OperationError instance or None."""
         frame = self.gettopframe_nohidden()
         while frame:
             if frame.last_exception is not None:
-                return self._convert_exc(frame.last_exception)
+                return frame.last_exception
             frame = self.getnextframe_nohidden(frame)
         return None
 
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -651,7 +651,8 @@
             raise OperationError(space.w_MemoryError, space.w_None)
         except rstackovf.StackOverflow, e:
             rstackovf.check_stack_overflow()
-            raise space.prebuilt_recursion_error
+            raise OperationError(space.w_RuntimeError,
+                                space.wrap("maximum recursion depth exceeded"))
         except RuntimeError:   # not on top of py.py
             raise OperationError(space.w_RuntimeError, space.w_None)
 
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -90,10 +90,6 @@
             next_instr = self.dispatch_bytecode(co_code, next_instr, ec)
         except OperationError, operr:
             next_instr = self.handle_operation_error(ec, operr)
-        except Reraise:
-            operr = self.last_exception
-            next_instr = self.handle_operation_error(ec, operr,
-                                                     attach_tb=False)
         except RaiseWithExplicitTraceback, e:
             next_instr = self.handle_operation_error(ec, e.operr,
                                                      attach_tb=False)
@@ -104,9 +100,11 @@
             next_instr = self.handle_asynchronous_error(ec,
                 self.space.w_MemoryError)
         except rstackovf.StackOverflow, e:
+            # Note that this case catches AttributeError!
             rstackovf.check_stack_overflow()
-            w_err = self.space.prebuilt_recursion_error
-            next_instr = self.handle_operation_error(ec, w_err)
+            next_instr = self.handle_asynchronous_error(ec,
+                self.space.w_RuntimeError,
+                self.space.wrap("maximum recursion depth exceeded"))
         return next_instr
 
     def handle_asynchronous_error(self, ec, w_type, w_value=None):
@@ -540,7 +538,7 @@
             ec = self.space.getexecutioncontext()
             while frame:
                 if frame.last_exception is not None:
-                    operror = ec._convert_exc(frame.last_exception)
+                    operror = frame.last_exception
                     break
                 frame = frame.f_backref()
             else:
@@ -548,7 +546,7 @@
                     space.wrap("raise: no active exception to re-raise"))
             # re-raise, no new traceback obj will be attached
             self.last_exception = operror
-            raise Reraise
+            raise RaiseWithExplicitTraceback(operror)
 
         w_value = w_traceback = space.w_None
         if nbargs >= 3:
@@ -1166,10 +1164,8 @@
 class Yield(ExitFrame):
     """Raised when exiting a frame via a 'yield' statement."""
 
-class Reraise(Exception):
-    """Raised at interp-level by a bare 'raise' statement."""
 class RaiseWithExplicitTraceback(Exception):
-    """Raised at interp-level by a 3-arguments 'raise' statement."""
+    """Raised at interp-level by a 0- or 3-arguments 'raise' statement."""
     def __init__(self, operr):
         self.operr = operr
 
@@ -1226,12 +1222,6 @@
     def nomoreblocks(self):
         raise RaiseWithExplicitTraceback(self.operr)
 
-    def state_unpack_variables(self, space):
-        return [self.operr.w_type, self.operr.get_w_value(space)]
-    def state_pack_variables(space, w_type, w_value):
-        return SApplicationException(OperationError(w_type, w_value))
-    state_pack_variables = staticmethod(state_pack_variables)
-
 class SBreakLoop(SuspendedUnroller):
     """Signals a 'break' statement."""
     _immutable_ = True
diff --git a/pypy/interpreter/test/test_syntax.py b/pypy/interpreter/test/test_syntax.py
--- a/pypy/interpreter/test/test_syntax.py
+++ b/pypy/interpreter/test/test_syntax.py
@@ -1,3 +1,4 @@
+from __future__ import with_statement
 import py
 from pypy.conftest import gettestobjspace
 
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -994,6 +994,7 @@
                              ('p', lltype.Ptr(TP)))
         a_box, A = self.alloc_array_of(ITEM, 15)
         s_box, S = self.alloc_instance(TP)
+        vsdescr = self.cpu.interiorfielddescrof(A, 'vs')
         kdescr = self.cpu.interiorfielddescrof(A, 'k')
         pdescr = self.cpu.interiorfielddescrof(A, 'p')
         self.execute_operation(rop.SETINTERIORFIELD_GC, [a_box, BoxInt(3),
@@ -1045,6 +1046,13 @@
         r = self.execute_operation(rop.GETINTERIORFIELD_GC, [a_box, BoxInt(3)],
                                    'ref', descr=pdescr)
         assert r.getref_base() == s_box.getref_base()
+        #
+        # test a corner case that used to fail on x86
+        i4 = BoxInt(4)
+        self.execute_operation(rop.SETINTERIORFIELD_GC, [a_box, i4, i4],
+                               'void', descr=vsdescr)
+        r = self.cpu.bh_getinteriorfield_gc_i(a_box.getref_base(), 4, vsdescr)
+        assert r == 4
 
     def test_string_basic(self):
         s_box = self.alloc_string("hello\xfe")
diff --git a/pypy/jit/backend/test/test_random.py b/pypy/jit/backend/test/test_random.py
--- a/pypy/jit/backend/test/test_random.py
+++ b/pypy/jit/backend/test/test_random.py
@@ -465,6 +465,16 @@
 
 # ____________________________________________________________
 
+def do_assert(condition, error_message):
+    if condition:
+        return
+    seed = pytest.config.option.randomseed
+    message = "%s\nPython: %s\nRandom seed: %r" % (
+        error_message,
+        sys.executable,
+        seed)
+    raise AssertionError(message)
+
 def Random():
     import random
     seed = pytest.config.option.randomseed
@@ -544,6 +554,7 @@
         self.startvars = startvars
         self.prebuilt_ptr_consts = []
         self.r = r
+        self.subloops = []
         self.build_random_loop(cpu, builder_factory, r, startvars, allow_delay)
 
     def build_random_loop(self, cpu, builder_factory, r, startvars, allow_delay):
@@ -668,13 +679,15 @@
 
         arguments = [box.value for box in self.loop.inputargs]
         fail = cpu.execute_token(self.runjitcelltoken(), *arguments)
-        assert fail is self.should_fail_by.getdescr()
+        do_assert(fail is self.should_fail_by.getdescr(),
+                  "Got %r, expected %r" % (fail,
+                                           self.should_fail_by.getdescr()))
         for i, v in enumerate(self.get_fail_args()):
             if isinstance(v, (BoxFloat, ConstFloat)):
                 value = cpu.get_latest_value_float(i)
             else:
                 value = cpu.get_latest_value_int(i)
-            assert value == self.expected[v], (
+            do_assert(value == self.expected[v],
                 "Got %r, expected %r for value #%d" % (value,
                                                        self.expected[v],
                                                        i)
@@ -683,9 +696,11 @@
         if (self.guard_op is not None and
             self.guard_op.is_guard_exception()):
             if self.guard_op.getopnum() == rop.GUARD_NO_EXCEPTION:
-                assert exc
+                do_assert(exc,
+                          "grab_exc_value() should not be %r" % (exc,))
         else:
-            assert not exc
+            do_assert(not exc,
+                      "unexpected grab_exc_value(): %r" % (exc,))
 
     def build_bridge(self):
         def exc_handling(guard_op):
@@ -710,6 +725,7 @@
             return False
         # generate the branch: a sequence of operations that ends in a FINISH
         subloop = DummyLoop([])
+        self.subloops.append(subloop)   # keep around for debugging
         if guard_op.is_guard_exception():
             subloop.operations.append(exc_handling(guard_op))
         bridge_builder = self.builder.fork(self.builder.cpu, subloop,
@@ -746,9 +762,6 @@
             args = [x.clonebox() for x in subset]
             rl = RandomLoop(self.builder.cpu, self.builder.fork,
                                      r, args)
-            dump(rl.loop)
-            self.cpu.compile_loop(rl.loop.inputargs, rl.loop.operations,
-                                  rl.loop._jitcelltoken)
             # done
             self.should_fail_by = rl.should_fail_by
             self.expected = rl.expected
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -1003,14 +1003,18 @@
         # If 'index_loc' is not an immediate, then we need a 'temp_loc' that
         # is a register whose value will be destroyed.  It's fine to destroy
         # the same register as 'index_loc', but not the other ones.
-        self.rm.possibly_free_var(box_index)
         if not isinstance(index_loc, ImmedLoc):
+            # ...that is, except in a corner case where 'index_loc' would be
+            # in the same register as 'value_loc'...
+            if index_loc is not value_loc:
+                self.rm.possibly_free_var(box_index)
             tempvar = TempBox()
             temp_loc = self.rm.force_allocate_reg(tempvar, [box_base,
                                                             box_value])
             self.rm.possibly_free_var(tempvar)
         else:
             temp_loc = None
+        self.rm.possibly_free_var(box_index)
         self.rm.possibly_free_var(box_base)
         self.possibly_free_var(box_value)
         self.PerformDiscard(op, [base_loc, ofs, itemsize, fieldsize,
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -576,7 +576,7 @@
     J_il8 = insn(immediate(1, 'o'), '\x70', immediate(2, 'b'))
     J_il = insn('\x0F', immediate(1,'o'), '\x80', relative(2))
 
-    SET_ir = insn(rex_w, '\x0F', immediate(1,'o'),'\x90', byte_register(2), '\xC0')
+    SET_ir = insn(rex_fw, '\x0F', immediate(1,'o'),'\x90', byte_register(2), '\xC0')
 
     # The 64-bit version of this, CQO, is defined in X86_64_CodeBuilder
     CDQ = insn(rex_nw, '\x99')
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -106,7 +106,8 @@
 
 def compile_loop(metainterp, greenkey, start,
                  inputargs, jumpargs,
-                 resume_at_jump_descr, full_preamble_needed=True):
+                 resume_at_jump_descr, full_preamble_needed=True,
+                 try_disabling_unroll=False):
     """Try to compile a new procedure by closing the current history back
     to the first operation.
     """
@@ -116,6 +117,13 @@
     jitdriver_sd = metainterp.jitdriver_sd
     history = metainterp.history
 
+    enable_opts = jitdriver_sd.warmstate.enable_opts
+    if try_disabling_unroll:
+        if 'unroll' not in enable_opts:
+            return None
+        enable_opts = enable_opts.copy()
+        del enable_opts['unroll']
+
     jitcell_token = make_jitcell_token(jitdriver_sd)
     part = create_empty_loop(metainterp)
     part.inputargs = inputargs[:]
@@ -126,7 +134,7 @@
                       [ResOperation(rop.LABEL, jumpargs, None, descr=jitcell_token)]
 
     try:
-        optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
+        optimize_trace(metainterp_sd, part, enable_opts)
     except InvalidLoop:
         return None
     target_token = part.operations[0].getdescr()
@@ -153,7 +161,7 @@
         jumpargs = part.operations[-1].getarglist()
 
         try:
-            optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
+            optimize_trace(metainterp_sd, part, enable_opts)
         except InvalidLoop:
             return None
             
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -2039,8 +2039,9 @@
                     memmgr = self.staticdata.warmrunnerdesc.memory_manager
                     if memmgr:
                         if self.cancel_count > memmgr.max_unroll_loops:
-                            self.staticdata.log('cancelled too many times!')
-                            raise SwitchToBlackhole(Counters.ABORT_BAD_LOOP)
+                            self.compile_loop_or_abort(original_boxes,
+                                                       live_arg_boxes,
+                                                       start, resumedescr)
                 self.staticdata.log('cancelled, tracing more...')
 
         # Otherwise, no loop found so far, so continue tracing.
@@ -2140,7 +2141,8 @@
                 return None
         return token
 
-    def compile_loop(self, original_boxes, live_arg_boxes, start, resume_at_jump_descr):
+    def compile_loop(self, original_boxes, live_arg_boxes, start,
+                     resume_at_jump_descr, try_disabling_unroll=False):
         num_green_args = self.jitdriver_sd.num_green_args
         greenkey = original_boxes[:num_green_args]
         if not self.partial_trace:
@@ -2156,7 +2158,8 @@
             target_token = compile.compile_loop(self, greenkey, start,
                                                 original_boxes[num_green_args:],
                                                 live_arg_boxes[num_green_args:],
-                                                resume_at_jump_descr)
+                                                resume_at_jump_descr,
+                                     try_disabling_unroll=try_disabling_unroll)
             if target_token is not None:
                 assert isinstance(target_token, TargetToken)
                 self.jitdriver_sd.warmstate.attach_procedure_to_interp(greenkey, target_token.targeting_jitcell_token)
@@ -2168,6 +2171,18 @@
             jitcell_token = target_token.targeting_jitcell_token
             self.raise_continue_running_normally(live_arg_boxes, jitcell_token)
 
+    def compile_loop_or_abort(self, original_boxes, live_arg_boxes,
+                              start, resume_at_jump_descr):
+        """Called after we aborted more than 'max_unroll_loops' times.
+        As a last attempt, try to compile the loop with unrolling disabled.
+        """
+        if not self.partial_trace:
+            self.compile_loop(original_boxes, live_arg_boxes, start,
+                              resume_at_jump_descr, try_disabling_unroll=True)
+        #
+        self.staticdata.log('cancelled too many times!')
+        raise SwitchToBlackhole(Counters.ABORT_BAD_LOOP)
+
     def compile_trace(self, live_arg_boxes, resume_at_jump_descr):
         num_green_args = self.jitdriver_sd.num_green_args
         greenkey = live_arg_boxes[:num_green_args]
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -2734,6 +2734,35 @@
         finally:
             optimizeopt.optimize_trace = old_optimize_trace
 
+    def test_max_unroll_loops_retry_without_unroll(self):
+        from pypy.jit.metainterp.optimize import InvalidLoop
+        from pypy.jit.metainterp import optimizeopt
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i'])
+        #
+        def f(n, limit):
+            set_param(myjitdriver, 'threshold', 5)
+            set_param(myjitdriver, 'max_unroll_loops', limit)
+            i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i)
+                print i
+                i += 1
+            return i
+        #
+        seen = []
+        def my_optimize_trace(metainterp_sd, loop, enable_opts, *args, **kwds):
+            seen.append('unroll' in enable_opts)
+            raise InvalidLoop
+        old_optimize_trace = optimizeopt.optimize_trace
+        optimizeopt.optimize_trace = my_optimize_trace
+        try:
+            res = self.meta_interp(f, [23, 4])
+            assert res == 23
+            assert False in seen
+            assert True in seen
+        finally:
+            optimizeopt.optimize_trace = old_optimize_trace
+
     def test_retrace_limit_with_extra_guards(self):
         myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'a',
                                                      'node'])
diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py
--- a/pypy/module/_cffi_backend/__init__.py
+++ b/pypy/module/_cffi_backend/__init__.py
@@ -27,7 +27,8 @@
         'alignof': 'func.alignof',
         'sizeof': 'func.sizeof',
         'typeof': 'func.typeof',
-        'offsetof': 'func.offsetof',
+        'typeoffsetof': 'func.typeoffsetof',
+        'rawaddressof': 'func.rawaddressof',
         '_getfields': 'func._getfields',
         'getcname': 'func.getcname',
         '_get_types': 'func._get_types',
diff --git a/pypy/module/_cffi_backend/ctypeobj.py b/pypy/module/_cffi_backend/ctypeobj.py
--- a/pypy/module/_cffi_backend/ctypeobj.py
+++ b/pypy/module/_cffi_backend/ctypeobj.py
@@ -134,14 +134,22 @@
                               "ctype '%s' is of unknown alignment",
                               self.name)
 
-    def offsetof(self, fieldname):
+    def typeoffsetof(self, fieldname):
         space = self.space
-        raise OperationError(space.w_TypeError,
-                             space.wrap("not a struct or union ctype"))
+        if fieldname is None:
+            msg = "expected a struct or union ctype"
+        else:
+            msg = "expected a struct or union ctype, or a pointer to one"
+        raise OperationError(space.w_TypeError, space.wrap(msg))
 
     def _getfields(self):
         return None
 
+    def rawaddressof(self, cdata, offset):
+        space = self.space
+        raise OperationError(space.w_TypeError,
+                             space.wrap("expected a pointer ctype"))
+
     def call(self, funcaddr, args_w):
         space = self.space
         raise operationerrfmt(space.w_TypeError,
diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -169,13 +169,9 @@
             self.vrangemax = (r_ulonglong(1) << sh) - 1
 
     def int(self, cdata):
-        if self.value_fits_long:
-            # this case is to handle enums, but also serves as a slight
-            # performance improvement for some other primitive types
-            value = misc.read_raw_long_data(cdata, self.size)
-            return self.space.wrap(value)
-        else:
-            return self.convert_to_object(cdata)
+        # enums: really call convert_to_object() just below,
+        # and not the one overridden in W_CTypeEnum.
+        return W_CTypePrimitiveSigned.convert_to_object(self, cdata)
 
     def convert_to_object(self, cdata):
         if self.value_fits_long:
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -70,7 +70,8 @@
             for i in range(len(lst_w)):
                 ctitem.convert_from_object(cdata, lst_w[i])
                 cdata = rffi.ptradd(cdata, ctitem.size)
-        elif isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveChar):
+        elif (self.ctitem.is_primitive_integer and
+              self.ctitem.size == rffi.sizeof(lltype.Char)):
             try:
                 s = space.str_w(w_ob)
             except OperationError, e:
@@ -274,18 +275,26 @@
             return True
         else:
             set_mustfree_flag(cdata, False)
-            try:
-                self.convert_from_object(cdata, w_ob)
-            except OperationError:
-                if (self.is_struct_ptr and isinstance(ob, cdataobj.W_CData)
-                    and ob.ctype is self.ctitem):
-                    # special case to make the life of verifier.py easier:
-                    # if the formal argument type is 'struct foo *' but
-                    # we pass a 'struct foo', then get a pointer to it
-                    rffi.cast(rffi.CCHARPP, cdata)[0] = ob._cdata
-                else:
-                    raise
+            self.convert_from_object(cdata, w_ob)
             return False
 
     def getcfield(self, attr):
         return self.ctitem.getcfield(attr)
+
+    def typeoffsetof(self, fieldname):
+        if fieldname is None:
+            return W_CTypePtrBase.typeoffsetof(self, fieldname)
+        else:
+            return self.ctitem.typeoffsetof(fieldname)
+
+    def rawaddressof(self, cdata, offset):
+        from pypy.module._cffi_backend.ctypestruct import W_CTypeStructOrUnion
+        space = self.space
+        ctype2 = cdata.ctype
+        if (isinstance(ctype2, W_CTypeStructOrUnion) or
+            (isinstance(ctype2, W_CTypePtrOrArray) and ctype2.is_struct_ptr)):
+            ptrdata = rffi.ptradd(cdata._cdata, offset)
+            return cdataobj.W_CData(space, ptrdata, self)
+        else:
+            raise OperationError(space.w_TypeError,
+                     space.wrap("expected a 'cdata struct-or-union' object"))
diff --git a/pypy/module/_cffi_backend/ctypestruct.py b/pypy/module/_cffi_backend/ctypestruct.py
--- a/pypy/module/_cffi_backend/ctypestruct.py
+++ b/pypy/module/_cffi_backend/ctypestruct.py
@@ -61,14 +61,19 @@
         keepalive_until_here(ob)
         return ob
 
-    def offsetof(self, fieldname):
+    def typeoffsetof(self, fieldname):
+        if fieldname is None:
+            return (self, 0)
         self.check_complete()
+        space = self.space
         try:
             cfield = self.fields_dict[fieldname]
         except KeyError:
-            space = self.space
             raise OperationError(space.w_KeyError, space.wrap(fieldname))
-        return cfield.offset
+        if cfield.bitshift >= 0:
+            raise OperationError(space.w_TypeError,
+                                 space.wrap("not supported for bitfields"))
+        return (cfield.ctype, cfield.offset)
 
     def _copy_from_same(self, cdata, w_ob):
         space = self.space
diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py
--- a/pypy/module/_cffi_backend/func.py
+++ b/pypy/module/_cffi_backend/func.py
@@ -53,15 +53,19 @@
     align = ctype.alignof()
     return space.wrap(align)
 
- at unwrap_spec(ctype=ctypeobj.W_CType, fieldname=str)
-def offsetof(space, ctype, fieldname):
-    ofs = ctype.offsetof(fieldname)
-    return space.wrap(ofs)
+ at unwrap_spec(ctype=ctypeobj.W_CType, fieldname="str_or_None")
+def typeoffsetof(space, ctype, fieldname):
+    ctype, offset = ctype.typeoffsetof(fieldname)
+    return space.newtuple([space.wrap(ctype), space.wrap(offset)])
 
 @unwrap_spec(ctype=ctypeobj.W_CType)
 def _getfields(space, ctype):
     return ctype._getfields()
 
+ at unwrap_spec(ctype=ctypeobj.W_CType, cdata=cdataobj.W_CData, offset=int)
+def rawaddressof(space, ctype, cdata, offset=0):
+    return ctype.rawaddressof(cdata, offset)
+
 # ____________________________________________________________
 
 @unwrap_spec(ctype=ctypeobj.W_CType, replace_with=str)
diff --git a/pypy/module/_cffi_backend/libraryobj.py b/pypy/module/_cffi_backend/libraryobj.py
--- a/pypy/module/_cffi_backend/libraryobj.py
+++ b/pypy/module/_cffi_backend/libraryobj.py
@@ -28,7 +28,7 @@
                 self.handle = dlopen(ll_libname, mode)
             except DLOpenError, e:
                 raise operationerrfmt(space.w_OSError,
-                                      "cannot load '%s': %s",
+                                      "cannot load library %s: %s",
                                       filename, e.msg)
         self.name = filename
 
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -741,6 +741,8 @@
     assert repr(s.a1).startswith("<cdata 'int[5]' 0x")
 
 def test_offsetof():
+    def offsetof(BType, fieldname):
+        return typeoffsetof(BType, fieldname)[1]
     BInt = new_primitive_type("int")
     BStruct = new_struct_type("foo")
     py.test.raises(TypeError, offsetof, BInt, "abc")
@@ -749,6 +751,7 @@
     assert offsetof(BStruct, 'abc') == 0
     assert offsetof(BStruct, 'def') == size_of_int()
     py.test.raises(KeyError, offsetof, BStruct, "ghi")
+    assert offsetof(new_pointer_type(BStruct), "def") == size_of_int()
 
 def test_function_type():
     BInt = new_primitive_type("int")
@@ -888,11 +891,8 @@
     BFunc20 = new_function_type((BStructPtr,), BShort, False)
     f = cast(BFunc20, _testfunc(20))
     x = newp(BStructPtr, {'a1': b'A', 'a2': -4042})
-    # test the exception that allows us to pass a 'struct foo' where the
-    # function really expects a 'struct foo *'.
-    res = f(x[0])
-    assert res == -4042 + ord(b'A')
-    assert res == f(x)
+    # can't pass a 'struct foo'
+    py.test.raises(TypeError, f, x[0])
 
 def test_call_function_21():
     BInt = new_primitive_type("int")
@@ -2115,3 +2115,77 @@
     BDouble = new_primitive_type("double")
     assert int(cast(BBool, cast(BDouble, 0.1))) == 1
     assert int(cast(BBool, cast(BDouble, 0.0))) == 0
+
+def test_typeoffsetof():
+    BChar = new_primitive_type("char")
+    BStruct = new_struct_type("foo")
+    BStructPtr = new_pointer_type(BStruct)
+    complete_struct_or_union(BStruct, [('a1', BChar, -1),
+                                       ('a2', BChar, -1),
+                                       ('a3', BChar, -1)])
+    py.test.raises(TypeError, typeoffsetof, BStructPtr, None)
+    assert typeoffsetof(BStruct, None) == (BStruct, 0)
+    assert typeoffsetof(BStructPtr, 'a1') == (BChar, 0)
+    assert typeoffsetof(BStruct, 'a1') == (BChar, 0)
+    assert typeoffsetof(BStructPtr, 'a2') == (BChar, 1)
+    assert typeoffsetof(BStruct, 'a3') == (BChar, 2)
+    py.test.raises(KeyError, typeoffsetof, BStructPtr, 'a4')
+    py.test.raises(KeyError, typeoffsetof, BStruct, 'a5')
+
+def test_typeoffsetof_no_bitfield():
+    BInt = new_primitive_type("int")
+    BStruct = new_struct_type("foo")
+    complete_struct_or_union(BStruct, [('a1', BInt, 4)])
+    py.test.raises(TypeError, typeoffsetof, BStruct, 'a1')
+
+def test_rawaddressof():
+    BChar = new_primitive_type("char")
+    BCharP = new_pointer_type(BChar)
+    BStruct = new_struct_type("foo")
+    BStructPtr = new_pointer_type(BStruct)
+    complete_struct_or_union(BStruct, [('a1', BChar, -1),
+                                       ('a2', BChar, -1),
+                                       ('a3', BChar, -1)])
+    p = newp(BStructPtr)
+    assert repr(p) == "<cdata 'struct foo *' owning 3 bytes>"
+    s = p[0]
+    assert repr(s) == "<cdata 'struct foo' owning 3 bytes>"
+    a = rawaddressof(BStructPtr, s)
+    assert repr(a).startswith("<cdata 'struct foo *' 0x")
+    py.test.raises(TypeError, rawaddressof, BStruct, s)
+    b = rawaddressof(BCharP, s)
+    assert b == cast(BCharP, p)
+    c = rawaddressof(BStructPtr, a)
+    assert c == a
+    py.test.raises(TypeError, rawaddressof, BStructPtr, cast(BChar, '?'))
+    #
+    d = rawaddressof(BCharP, s, 1)
+    assert d == cast(BCharP, p) + 1
+
+def test_newp_signed_unsigned_char():
+    BCharArray = new_array_type(
+        new_pointer_type(new_primitive_type("char")), None)
+    p = newp(BCharArray, b"foo")
+    assert len(p) == 4
+    assert list(p) == [b"f", b"o", b"o", b"\x00"]
+    #
+    BUCharArray = new_array_type(
+        new_pointer_type(new_primitive_type("unsigned char")), None)
+    p = newp(BUCharArray, b"fo\xff")
+    assert len(p) == 4
+    assert list(p) == [ord("f"), ord("o"), 0xff, 0]
+    #
+    BSCharArray = new_array_type(
+        new_pointer_type(new_primitive_type("signed char")), None)
+    p = newp(BSCharArray, b"fo\xff")
+    assert len(p) == 4
+    assert list(p) == [ord("f"), ord("o"), -1, 0]
+
+def test_newp_from_bytearray_doesnt_work():
+    BCharArray = new_array_type(
+        new_pointer_type(new_primitive_type("char")), None)
+    py.test.raises(TypeError, newp, BCharArray, bytearray(b"foo"))
+    p = newp(BCharArray, 4)
+    buffer(p)[:] = bytearray(b"foo\x00")
+    assert len(p) == 4
+    assert list(p) == [b"f", b"o", b"o", b"\x00"]
diff --git a/pypy/module/_csv/__init__.py b/pypy/module/_csv/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/__init__.py
@@ -0,0 +1,87 @@
+from pypy.interpreter.mixedmodule import MixedModule
+
+
+class Module(MixedModule):
+    """CSV parsing and writing.
+
+This module provides classes that assist in the reading and writing
+of Comma Separated Value (CSV) files, and implements the interface
+described by PEP 305.  Although many CSV files are simple to parse,
+the format is not formally defined by a stable specification and
+is subtle enough that parsing lines of a CSV file with something
+like line.split(\",\") is bound to fail.  The module supports three
+basic APIs: reading, writing, and registration of dialects.
+
+
+DIALECT REGISTRATION:
+
+Readers and writers support a dialect argument, which is a convenient
+handle on a group of settings.  When the dialect argument is a string,
+it identifies one of the dialects previously registered with the module.
+If it is a class or instance, the attributes of the argument are used as
+the settings for the reader or writer:
+
+    class excel:
+        delimiter = ','
+        quotechar = '\"'
+        escapechar = None
+        doublequote = True
+        skipinitialspace = False
+        lineterminator = '\\r\\n'
+        quoting = QUOTE_MINIMAL
+
+SETTINGS:
+
+    * quotechar - specifies a one-character string to use as the 
+        quoting character.  It defaults to '\"'.
+    * delimiter - specifies a one-character string to use as the 
+        field separator.  It defaults to ','.
+    * skipinitialspace - specifies how to interpret whitespace which
+        immediately follows a delimiter.  It defaults to False, which
+        means that whitespace immediately following a delimiter is part
+        of the following field.
+    * lineterminator -  specifies the character sequence which should 
+        terminate rows.
+    * quoting - controls when quotes should be generated by the writer.
+        It can take on any of the following module constants:
+
+        csv.QUOTE_MINIMAL means only when required, for example, when a
+            field contains either the quotechar or the delimiter
+        csv.QUOTE_ALL means that quotes are always placed around fields.
+        csv.QUOTE_NONNUMERIC means that quotes are always placed around
+            fields which do not parse as integers or floating point
+            numbers.
+        csv.QUOTE_NONE means that quotes are never placed around fields.
+    * escapechar - specifies a one-character string used to escape 
+        the delimiter when quoting is set to QUOTE_NONE.
+    * doublequote - controls the handling of quotes inside fields.  When
+        True, two consecutive quotes are interpreted as one during read,
+        and when writing, each quote character embedded in the data is
+        written as two quotes.
+"""
+
+    appleveldefs = {
+        'register_dialect':   'app_csv.register_dialect',
+        'unregister_dialect': 'app_csv.unregister_dialect',
+        'get_dialect':        'app_csv.get_dialect',
+        'list_dialects':      'app_csv.list_dialects',
+        '_dialects':          'app_csv._dialects',
+
+        'Error':              'app_csv.Error',
+        }
+
+    interpleveldefs = {
+        '__version__':      'space.wrap("1.0")',
+
+        'QUOTE_MINIMAL':    'space.wrap(interp_csv.QUOTE_MINIMAL)',
+        'QUOTE_ALL':        'space.wrap(interp_csv.QUOTE_ALL)',
+        'QUOTE_NONNUMERIC': 'space.wrap(interp_csv.QUOTE_NONNUMERIC)',
+        'QUOTE_NONE':       'space.wrap(interp_csv.QUOTE_NONE)',
+
+        'Dialect': 'interp_csv.W_Dialect',
+
+        'reader': 'interp_reader.csv_reader',
+        'field_size_limit': 'interp_reader.csv_field_size_limit',
+
+        'writer': 'interp_writer.csv_writer',
+        }
diff --git a/pypy/module/_csv/app_csv.py b/pypy/module/_csv/app_csv.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/app_csv.py
@@ -0,0 +1,33 @@
+import _csv
+
+class Error(Exception):
+    pass
+
+
+_dialects = {}
+
+def register_dialect(name, dialect=None, **kwargs):
+    """Create a mapping from a string name to a dialect class."""
+    if not isinstance(name, basestring):
+        raise TypeError("dialect name must be a string or unicode")
+
+    dialect = _csv.Dialect(dialect, **kwargs)
+    _dialects[name] = dialect
+
+def unregister_dialect(name):
+    """Delete the name/dialect mapping associated with a string name."""
+    try:
+        del _dialects[name]
+    except KeyError:
+        raise Error("unknown dialect")
+
+def get_dialect(name):
+    """Return the dialect instance associated with name."""
+    try:
+        return _dialects[name]
+    except KeyError:
+        raise Error("unknown dialect")
+
+def list_dialects():
+    """Return a list of all know dialect names."""
+    return list(_dialects)
diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/interp_csv.py
@@ -0,0 +1,175 @@
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.typedef import TypeDef, interp_attrproperty
+from pypy.interpreter.typedef import GetSetProperty
+from pypy.interpreter.gateway import interp2app, unwrap_spec, NoneNotWrapped
+
+
+QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE = range(4)
+
+
+class W_Dialect(Wrappable):
+    _immutable_fields_ = [
+        "dialect",
+        "delimiter",
+        "doublequote",
+        "escapechar",
+        "lineterminator",
+        "quotechar",
+        "quoting",
+        "skipinitialspace",
+        "strict",
+        ]
+
+def _fetch(space, w_dialect, name):
+    return space.findattr(w_dialect, space.wrap(name))
+
+def _get_bool(space, w_src, default):
+    if w_src is None:
+        return default
+    return space.is_true(w_src)
+
+def _get_int(space, w_src, default):
+    if w_src is None:
+        return default
+    return space.int_w(w_src)
+
+def _get_str(space, w_src, default):
+    if w_src is None:
+        return default
+    return space.str_w(w_src)
+
+def _get_char(space, w_src, default, name):
+    if w_src is None:
+        return default
+    if space.is_w(w_src, space.w_None):
+        return '\0'
+    src = space.str_w(w_src)
+    if len(src) == 1:
+        return src[0]
+    if len(src) == 0:
+        return '\0'
+    raise operationerrfmt(space.w_TypeError,
+                          '"%s" must be a 1-character string', name)
+
+def _build_dialect(space, w_dialect, w_delimiter, w_doublequote,
+                   w_escapechar, w_lineterminator, w_quotechar, w_quoting,
+                   w_skipinitialspace, w_strict):
+    if w_dialect is not None:
+        if space.isinstance_w(w_dialect, space.w_basestring):
+            w_module = space.getbuiltinmodule('_csv')
+            w_dialect = space.call_method(w_module, 'get_dialect', w_dialect)
+
+        dialect = space.interpclass_w(w_dialect)
+        if (isinstance(dialect, W_Dialect) and
+            w_delimiter is None and
+            w_doublequote is None and
+            w_escapechar is None and
+            w_lineterminator is None and
+            w_quotechar is None and
+            w_quoting is None and
+            w_skipinitialspace is None and
+            w_strict is None):
+            return dialect
+
+        if w_delimiter is None:
+            w_delimiter = _fetch(space, w_dialect, 'delimiter')
+        if w_doublequote is None:
+            w_doublequote = _fetch(space, w_dialect, 'doublequote')
+        if w_escapechar is None:
+            w_escapechar = _fetch(space, w_dialect, 'escapechar')
+        if w_lineterminator is None:
+            w_lineterminator = _fetch(space, w_dialect, 'lineterminator')
+        if w_quotechar is None:
+            w_quotechar = _fetch(space, w_dialect, 'quotechar')
+        if w_quoting is None:
+            w_quoting = _fetch(space, w_dialect, 'quoting')
+        if w_skipinitialspace is None:
+            w_skipinitialspace = _fetch(space, w_dialect, 'skipinitialspace')
+        if w_strict is None:
+            w_strict = _fetch(space, w_dialect, 'strict')
+
+    dialect = W_Dialect()
+    dialect.delimiter = _get_char(space, w_delimiter, ',', 'delimiter')
+    dialect.doublequote = _get_bool(space, w_doublequote, True)
+    dialect.escapechar = _get_char(space, w_escapechar, '\0', 'escapechar')
+    dialect.lineterminator = _get_str(space, w_lineterminator, '\r\n')
+    dialect.quotechar = _get_char(space, w_quotechar, '"', 'quotechar')
+    tmp_quoting = _get_int(space, w_quoting, QUOTE_MINIMAL)
+    dialect.skipinitialspace = _get_bool(space, w_skipinitialspace, False)
+    dialect.strict = _get_bool(space, w_strict, False)
+
+    # validate options
+    if not (0 <= tmp_quoting < 4):
+        raise OperationError(space.w_TypeError,
+                             space.wrap('bad "quoting" value'))
+
+    if dialect.delimiter == '\0':
+        raise OperationError(space.w_TypeError,
+                             space.wrap('delimiter must be set'))
+
+    if space.is_w(w_quotechar, space.w_None) and w_quoting is None:
+        tmp_quoting = QUOTE_NONE
+    if tmp_quoting != QUOTE_NONE and dialect.quotechar == '\0':
+        raise OperationError(space.w_TypeError,
+                        space.wrap('quotechar must be set if quoting enabled'))
+    dialect.quoting = tmp_quoting
+    return dialect
+
+def W_Dialect___new__(space, w_subtype, w_dialect = NoneNotWrapped,
+                      w_delimiter        = NoneNotWrapped,
+                      w_doublequote      = NoneNotWrapped,
+                      w_escapechar       = NoneNotWrapped,
+                      w_lineterminator   = NoneNotWrapped,
+                      w_quotechar        = NoneNotWrapped,
+                      w_quoting          = NoneNotWrapped,
+                      w_skipinitialspace = NoneNotWrapped,
+                      w_strict           = NoneNotWrapped,
+                      ):
+    dialect = _build_dialect(space, w_dialect, w_delimiter, w_doublequote,
+                             w_escapechar, w_lineterminator, w_quotechar,
+                             w_quoting, w_skipinitialspace, w_strict)
+    if space.is_w(w_subtype, space.gettypeobject(W_Dialect.typedef)):
+        return space.wrap(dialect)
+    else:
+        subdialect = space.allocate_instance(W_Dialect, w_subtype)
+        subdialect.delimiter        = dialect.delimiter
+        subdialect.doublequote      = dialect.doublequote
+        subdialect.escapechar       = dialect.escapechar
+        subdialect.lineterminator   = dialect.lineterminator
+        subdialect.quotechar        = dialect.quotechar
+        subdialect.quoting          = dialect.quoting
+        subdialect.skipinitialspace = dialect.skipinitialspace
+        subdialect.strict           = dialect.strict
+        return space.wrap(subdialect)
+
+
+def _get_escapechar(space, dialect):
+    if dialect.escapechar == '\0':
+        return space.w_None
+    return space.wrap(dialect.escapechar)
+
+def _get_quotechar(space, dialect):
+    if dialect.quotechar == '\0':
+        return space.w_None
+    return space.wrap(dialect.quotechar)
+
+
+W_Dialect.typedef = TypeDef(
+        'Dialect',
+        __module__ = '_csv',
+        __new__ = interp2app(W_Dialect___new__),
+
+        delimiter        = interp_attrproperty('delimiter', W_Dialect),
+        doublequote      = interp_attrproperty('doublequote', W_Dialect),
+        escapechar       = GetSetProperty(_get_escapechar, cls=W_Dialect),
+        lineterminator   = interp_attrproperty('lineterminator', W_Dialect),
+        quotechar        = GetSetProperty(_get_quotechar, cls=W_Dialect),
+        quoting          = interp_attrproperty('quoting', W_Dialect),
+        skipinitialspace = interp_attrproperty('skipinitialspace', W_Dialect),
+        strict           = interp_attrproperty('strict', W_Dialect),
+
+        __doc__ = """CSV dialect
+
+The Dialect type records CSV parsing and generation options.
+""")
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/interp_reader.py
@@ -0,0 +1,263 @@
+from pypy.rlib.rstring import StringBuilder
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import NoneNotWrapped, unwrap_spec
+from pypy.interpreter.typedef import TypeDef, interp2app
+from pypy.interpreter.typedef import interp_attrproperty_w, interp_attrproperty
+from pypy.module._csv.interp_csv import _build_dialect
+from pypy.module._csv.interp_csv import (QUOTE_MINIMAL, QUOTE_ALL,
+                                         QUOTE_NONNUMERIC, QUOTE_NONE)
+
+(START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
+ IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
+ EAT_CRNL) = range(8)
+
+
+class W_Reader(Wrappable):
+
+    def __init__(self, space, dialect, w_iter):
+        self.space = space
+        self.dialect = dialect
+        self.w_iter = w_iter
+        self.line_num = 0
+
+    def iter_w(self):
+        return self.space.wrap(self)
+
+    def error(self, msg):
+        space = self.space
+        msg = 'line %d: %s' % (self.line_num, msg)
+        w_module = space.getbuiltinmodule('_csv')
+        w_error = space.getattr(w_module, space.wrap('Error'))
+        raise OperationError(w_error, space.wrap(msg))
+    error._dont_inline_ = True
+
+    def add_char(self, field_builder, c):
+        assert field_builder is not None
+        if field_builder.getlength() >= field_limit.limit:
+            raise self.error("field larger than field limit")
+        field_builder.append(c)
+
+    def save_field(self, field_builder):
+        field = field_builder.build()
+        if self.numeric_field:
+            from pypy.objspace.std.strutil import ParseStringError
+            from pypy.objspace.std.strutil import string_to_float
+            self.numeric_field = False
+            try:
+                ff = string_to_float(field)
+            except ParseStringError, e:
+                raise OperationError(self.space.w_ValueError,
+                                     self.space.wrap(e.msg))
+            w_obj = self.space.wrap(ff)
+        else:
+            w_obj = self.space.wrap(field)
+        self.fields_w.append(w_obj)
+
+    def next_w(self):
+        space = self.space
+        dialect = self.dialect
+        self.fields_w = []
+        self.numeric_field = False
+        field_builder = None  # valid iff state not in [START_RECORD, EAT_CRNL]
+        state = START_RECORD
+        #
+        while True:
+            try:
+                w_line = space.next(self.w_iter)
+            except OperationError, e:
+                if e.match(space, space.w_StopIteration):
+                    if field_builder is not None:
+                        raise self.error("newline inside string")
+                raise
+            self.line_num += 1
+            line = space.str_w(w_line)
+            for c in line:
+                if c == '\0':
+                    raise self.error("line contains NULL byte")
+
+                if state == START_RECORD:
+                    if c == '\n' or c == '\r':
+                        state = EAT_CRNL
+                        continue
+                    # normal character - handle as START_FIELD
+                    state = START_FIELD
+                    # fall-through to the next case
+
+                if state == START_FIELD:
+                    field_builder = StringBuilder(64)
+                    # expecting field
+                    if c == '\n' or c == '\r':
+                        # save empty field
+                        self.save_field(field_builder)
+                        state = EAT_CRNL
+                    elif (c == dialect.quotechar and
+                              dialect.quoting != QUOTE_NONE):
+                        # start quoted field
+                        state = IN_QUOTED_FIELD
+                    elif c == dialect.escapechar:
+                        # possible escaped character
+                        state = ESCAPED_CHAR
+                    elif c == ' ' and dialect.skipinitialspace:
+                        # ignore space at start of field
+                        pass
+                    elif c == dialect.delimiter:
+                        # save empty field
+                        self.save_field(field_builder)
+                    else:
+                        # begin new unquoted field
+                        if dialect.quoting == QUOTE_NONNUMERIC:
+                            self.numeric_field = True
+                        self.add_char(field_builder, c)
+                        state = IN_FIELD
+
+                elif state == ESCAPED_CHAR:
+                    self.add_char(field_builder, c)
+                    state = IN_FIELD
+
+                elif state == IN_FIELD:
+                    # in unquoted field
+                    if c == '\n' or c == '\r':
+                        # end of line
+                        self.save_field(field_builder)
+                        state = EAT_CRNL
+                    elif c == dialect.escapechar:
+                        # possible escaped character
+                        state = ESCAPED_CHAR
+                    elif c == dialect.delimiter:
+                        # save field - wait for new field
+                        self.save_field(field_builder)
+                        state = START_FIELD
+                    else:
+                        # normal character - save in field
+                        self.add_char(field_builder, c)
+
+                elif state == IN_QUOTED_FIELD:
+                    # in quoted field
+                    if c == dialect.escapechar:
+                        # Possible escape character
+                        state = ESCAPE_IN_QUOTED_FIELD
+                    elif (c == dialect.quotechar and
+                              dialect.quoting != QUOTE_NONE):
+                        if dialect.doublequote:
+                            # doublequote; " represented by ""
+                            state = QUOTE_IN_QUOTED_FIELD
+                        else:
+                            # end of quote part of field
+                            state = IN_FIELD
+                    else:
+                        # normal character - save in field
+                        self.add_char(field_builder, c)
+
+                elif state == ESCAPE_IN_QUOTED_FIELD:
+                    self.add_char(field_builder, c)
+                    state = IN_QUOTED_FIELD
+
+                elif state == QUOTE_IN_QUOTED_FIELD:
+                    # doublequote - seen a quote in an quoted field
+                    if (dialect.quoting != QUOTE_NONE and
+                            c == dialect.quotechar):
+                        # save "" as "
+                        self.add_char(field_builder, c)
+                        state = IN_QUOTED_FIELD
+                    elif c == dialect.delimiter:
+                        # save field - wait for new field
+                        self.save_field(field_builder)
+                        state = START_FIELD
+                    elif c == '\n' or c == '\r':
+                        # end of line
+                        self.save_field(field_builder)
+                        state = EAT_CRNL
+                    elif not dialect.strict:
+                        self.add_char(field_builder, c)
+                        state = IN_FIELD
+                    else:
+                        # illegal
+                        raise self.error("'%s' expected after '%s'" % (
+                            dialect.delimiter, dialect.quotechar))
+
+                elif state == EAT_CRNL:
+                    if not (c == '\n' or c == '\r'):
+                        raise self.error("new-line character seen in unquoted "
+                                        "field - do you need to open the file "
+                                        "in universal-newline mode?")
+
+            if state == IN_FIELD or state == QUOTE_IN_QUOTED_FIELD:
+                self.save_field(field_builder)
+                break
+            elif state == ESCAPED_CHAR:
+                self.add_char(field_builder, '\n')
+                state = IN_FIELD
+            elif state == IN_QUOTED_FIELD:
+                pass
+            elif state == ESCAPE_IN_QUOTED_FIELD:
+                self.add_char(field_builder, '\n')
+                state = IN_QUOTED_FIELD
+            elif state == START_FIELD:
+                # save empty field
+                field_builder = StringBuilder(1)
+                self.save_field(field_builder)
+                break
+            else:
+                break
+        #
+        w_result = space.newlist(self.fields_w)
+        self.fields_w = None
+        return w_result
+
+
+def csv_reader(space, w_iterator, w_dialect=NoneNotWrapped,
+                  w_delimiter        = NoneNotWrapped,
+                  w_doublequote      = NoneNotWrapped,
+                  w_escapechar       = NoneNotWrapped,
+                  w_lineterminator   = NoneNotWrapped,
+                  w_quotechar        = NoneNotWrapped,
+                  w_quoting          = NoneNotWrapped,
+                  w_skipinitialspace = NoneNotWrapped,
+                  w_strict           = NoneNotWrapped,
+                  ):
+    """
+    csv_reader = reader(iterable [, dialect='excel']
+                       [optional keyword args])
+    for row in csv_reader:
+        process(row)
+
+    The "iterable" argument can be any object that returns a line
+    of input for each iteration, such as a file object or a list.  The
+    optional \"dialect\" parameter is discussed below.  The function
+    also accepts optional keyword arguments which override settings
+    provided by the dialect.
+
+    The returned object is an iterator.  Each iteration returns a row
+    of the CSV file (which can span multiple input lines)"""
+    w_iter = space.iter(w_iterator)
+    dialect = _build_dialect(space, w_dialect, w_delimiter, w_doublequote,
+                             w_escapechar, w_lineterminator, w_quotechar,
+                             w_quoting, w_skipinitialspace, w_strict)
+    return W_Reader(space, dialect, w_iter)
+
+W_Reader.typedef = TypeDef(
+        'reader',
+        __module__ = '_csv',
+        dialect = interp_attrproperty_w('dialect', W_Reader),
+        line_num = interp_attrproperty('line_num', W_Reader),
+        __iter__ = interp2app(W_Reader.iter_w),
+        next = interp2app(W_Reader.next_w),
+        __doc__ = """CSV reader
+
+Reader objects are responsible for reading and parsing tabular data
+in CSV format.""")
+W_Reader.typedef.acceptable_as_base_class = False
+
+# ____________________________________________________________
+
+class FieldLimit:
+    limit = 128 * 1024   # max parsed field size
+field_limit = FieldLimit()
+
+ at unwrap_spec(new_limit=int)
+def csv_field_size_limit(space, new_limit=-1):
+    old_limit = field_limit.limit
+    if new_limit >= 0:
+        field_limit.limit = new_limit
+    return space.wrap(old_limit)
diff --git a/pypy/module/_csv/interp_writer.py b/pypy/module/_csv/interp_writer.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/interp_writer.py
@@ -0,0 +1,172 @@
+from pypy.rlib.rstring import StringBuilder
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import NoneNotWrapped
+from pypy.interpreter.typedef import TypeDef, interp2app
+from pypy.interpreter.typedef import interp_attrproperty_w
+from pypy.module._csv.interp_csv import _build_dialect
+from pypy.module._csv.interp_csv import (QUOTE_MINIMAL, QUOTE_ALL,
+                                         QUOTE_NONNUMERIC, QUOTE_NONE)
+
+
+class W_Writer(Wrappable):
+
+    def __init__(self, space, dialect, w_fileobj):
+        self.space = space
+        self.dialect = dialect
+        self.w_filewrite = space.getattr(w_fileobj, space.wrap('write'))
+        # precompute this
+        special = dialect.delimiter + dialect.lineterminator
+        if dialect.escapechar != '\0': special += dialect.escapechar
+        if dialect.quotechar  != '\0': special += dialect.quotechar
+        self.special_characters = special
+
+    def error(self, msg):
+        space = self.space
+        w_module = space.getbuiltinmodule('_csv')
+        w_error = space.getattr(w_module, space.wrap('Error'))
+        raise OperationError(w_error, space.wrap(msg))
+    error._dont_inline_ = True
+
+    def writerow(self, w_fields):
+        """Construct and write a CSV record from a sequence of fields.
+        Non-string elements will be converted to string."""
+        space = self.space
+        fields_w = space.listview(w_fields)
+        dialect = self.dialect
+        rec = StringBuilder(80)
+        #
+        for field_index in range(len(fields_w)):
+            w_field = fields_w[field_index]
+            if space.is_w(w_field, space.w_None):
+                field = ""
+            elif space.isinstance_w(w_field, space.w_float):
+                field = space.str_w(space.repr(w_field))
+            else:
+                field = space.str_w(space.str(w_field))
+            #
+            if dialect.quoting == QUOTE_NONNUMERIC:
+                try:
+                    space.float_w(w_field)    # is it an int/long/float?
+                    quoted = False
+                except OperationError, e:
+                    if e.async(space):
+                        raise
+                    quoted = True
+            elif dialect.quoting == QUOTE_ALL:
+                quoted = True
+            elif dialect.quoting == QUOTE_MINIMAL:
+                # Find out if we really quoting
+                special_characters = self.special_characters
+                for c in field:
+                    if c in special_characters:
+                        if c != dialect.quotechar or dialect.doublequote:
+                            quoted = True
+                            break
+                else:
+                    quoted = False
+            else:
+                quoted = False
+
+            # If field is empty check if it needs to be quoted
+            if len(field) == 0 and len(fields_w) == 1:
+                if dialect.quoting == QUOTE_NONE:
+                    raise self.error("single empty field record "
+                                     "must be quoted")
+                quoted = True
+
+            # If this is not the first field we need a field separator
+            if field_index > 0:
+                rec.append(dialect.delimiter)
+
+            # Handle preceding quote
+            if quoted:
+                rec.append(dialect.quotechar)
+
+            # Copy field data
+            special_characters = self.special_characters
+            for c in field:
+                if c in special_characters:
+                    if dialect.quoting == QUOTE_NONE:
+                        want_escape = True
+                    else:
+                        want_escape = False
+                        if c == dialect.quotechar:
+                            if dialect.doublequote:
+                                rec.append(dialect.quotechar)
+                            else:
+                                want_escape = True
+                    if want_escape:
+                        if dialect.escapechar == '\0':
+                            raise self.error("need to escape, "
+                                             "but no escapechar set")
+                        rec.append(dialect.escapechar)
+                    else:
+                        assert quoted
+                # Copy field character into record buffer
+                rec.append(c)
+
+            # Handle final quote
+            if quoted:
+                rec.append(dialect.quotechar)
+
+        # Add line terminator
+        rec.append(dialect.lineterminator)
+
+        line = rec.build()
+        return space.call_function(self.w_filewrite, space.wrap(line))
+
+    def writerows(self, w_seqseq):
+        """Construct and write a series of sequences to a csv file.
+        Non-string elements will be converted to string."""
+        space = self.space
+        w_iter = space.iter(w_seqseq)
+        while True:
+            try:
+                w_seq = space.next(w_iter)
+            except OperationError, e:
+                if e.match(space, space.w_StopIteration):
+                    break
+                raise
+            self.writerow(w_seq)
+
+
+def csv_writer(space, w_fileobj, w_dialect=NoneNotWrapped,
+                  w_delimiter        = NoneNotWrapped,
+                  w_doublequote      = NoneNotWrapped,
+                  w_escapechar       = NoneNotWrapped,
+                  w_lineterminator   = NoneNotWrapped,
+                  w_quotechar        = NoneNotWrapped,
+                  w_quoting          = NoneNotWrapped,
+                  w_skipinitialspace = NoneNotWrapped,
+                  w_strict           = NoneNotWrapped,
+                  ):
+    """
+    csv_writer = csv.writer(fileobj [, dialect='excel']
+                            [optional keyword args])
+    for row in sequence:
+        csv_writer.writerow(row)
+
+    [or]
+
+    csv_writer = csv.writer(fileobj [, dialect='excel']
+                            [optional keyword args])
+    csv_writer.writerows(rows)
+
+    The \"fileobj\" argument can be any object that supports the file API."""
+    dialect = _build_dialect(space, w_dialect, w_delimiter, w_doublequote,
+                             w_escapechar, w_lineterminator, w_quotechar,
+                             w_quoting, w_skipinitialspace, w_strict)
+    return W_Writer(space, dialect, w_fileobj)
+
+W_Writer.typedef = TypeDef(
+        'writer',
+        __module__ = '_csv',
+        dialect = interp_attrproperty_w('dialect', W_Writer),
+        writerow = interp2app(W_Writer.writerow),
+        writerows = interp2app(W_Writer.writerows),
+        __doc__ = """CSV writer
+
+Writer objects are responsible for generating tabular data
+in CSV format from sequence input.""")
+W_Writer.typedef.acceptable_as_base_class = False
diff --git a/pypy/module/_csv/test/test_dialect.py b/pypy/module/_csv/test/test_dialect.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/test/test_dialect.py
@@ -0,0 +1,107 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestDialect(object):
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['_csv'])
+
+    def test_register_dialect(self):
+        import _csv
+
+        attrs = [('delimiter', ','),
+                 ('doublequote', True),
+                 ('escapechar', None),
+                 ('lineterminator', '\r\n'),
+                 ('quotechar', '"'),
+                 ('quoting', _csv.QUOTE_MINIMAL),
+                 ('skipinitialspace', False),
+                 ('strict', False),
+                 ]
+
+        for changeattr, newvalue in [('delimiter', ':'),
+                                     ('doublequote', False),
+                                     ('escapechar', '/'),
+                                     ('lineterminator', '---\n'),
+                                     ('quotechar', '%'),
+                                     ('quoting', _csv.QUOTE_NONNUMERIC),
+                                     ('skipinitialspace', True),
+                                     ('strict', True)]:
+            kwargs = {changeattr: newvalue}
+            _csv.register_dialect('foo1', **kwargs)
+            d = _csv.get_dialect('foo1')
+            assert d.__class__.__name__ == 'Dialect'
+            for attr, default in attrs:
+                if attr == changeattr:
+                    expected = newvalue
+                else:
+                    expected = default
+                assert getattr(d, attr) == expected
+
+    def test_register_dialect_base_1(self):
+        import _csv
+        _csv.register_dialect('foo1', escapechar='!')
+        _csv.register_dialect('foo2', 'foo1', strict=True)
+        d1 = _csv.get_dialect('foo1')
+        assert d1.escapechar == '!'
+        assert d1.strict == False
+        d2 = _csv.get_dialect('foo2')
+        assert d2.escapechar == '!'
+        assert d2.strict == True
+
+    def test_register_dialect_base_2(self):
+        import _csv
+        class Foo1:
+            escapechar = '?'
+        _csv.register_dialect('foo2', Foo1, strict=True)
+        d2 = _csv.get_dialect('foo2')
+        assert d2.escapechar == '?'
+        assert d2.strict == True
+
+    def test_typeerror(self):
+        import _csv
+        attempts = [("delimiter", '', 123),
+                    ("escapechar", Ellipsis, 'foo', 0),
+                    ("lineterminator", -132),
+                    ("quotechar", '', 25),
+                    ("quoting", 4, '', '\x00'),
+                    ]
+        for attempt in attempts:
+            name = attempt[0]
+            for value in attempt[1:]:
+                kwargs = {name: value}
+                raises(TypeError, _csv.register_dialect, 'foo1', **kwargs)
+
+    def test_bool_arg(self):
+        # boolean arguments take *any* object and use its truth-value
+        import _csv
+        _csv.register_dialect('foo1', doublequote=[])
+        assert _csv.get_dialect('foo1').doublequote == False
+        _csv.register_dialect('foo1', skipinitialspace=2)
+        assert _csv.get_dialect('foo1').skipinitialspace == True
+        _csv.register_dialect('foo1', strict=_csv)    # :-/
+        assert _csv.get_dialect('foo1').strict == True
+
+    def test_line_terminator(self):
+        # lineterminator can be the empty string
+        import _csv
+        _csv.register_dialect('foo1', lineterminator='')
+        assert _csv.get_dialect('foo1').lineterminator == ''
+
+    def test_unregister_dialect(self):
+        import _csv
+        _csv.register_dialect('foo1')
+        _csv.unregister_dialect('foo1')
+        raises(_csv.Error, _csv.get_dialect, 'foo1')
+        raises(_csv.Error, _csv.unregister_dialect, 'foo1')
+
+    def test_list_dialects(self):
+        import _csv
+        lst = _csv.list_dialects()
+        assert type(lst) is list
+        assert 'neverseen' not in lst
+        _csv.register_dialect('neverseen')
+        lst = _csv.list_dialects()
+        assert 'neverseen' in lst
+        _csv.unregister_dialect('neverseen')
+        lst = _csv.list_dialects()
+        assert 'neverseen' not in lst
diff --git a/pypy/module/_csv/test/test_reader.py b/pypy/module/_csv/test/test_reader.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/test/test_reader.py
@@ -0,0 +1,101 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestReader(object):
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['_csv'])
+
+        w__read_test = cls.space.appexec([], r"""():
+            import _csv
+            def _read_test(input, expect, **kwargs):
+                reader = _csv.reader(input, **kwargs)
+                if expect == 'Error':
+                    raises(_csv.Error, list, reader)
+                    return
+                result = list(reader)
+                assert result == expect, 'result: %r\nexpect: %r' % (
+                    result, expect)
+            return _read_test
+        """)
+        if type(w__read_test) is type(lambda:0):
+            w__read_test = staticmethod(w__read_test)
+        cls.w__read_test = w__read_test
+
+    def test_simple_reader(self):
+        self._read_test(['foo:bar\n'], [['foo', 'bar']], delimiter=':')
+
+    def test_read_oddinputs(self):
+        self._read_test([], [])
+        self._read_test([''], [[]])
+        self._read_test(['"ab"c'], 'Error', strict = 1)
+        # cannot handle null bytes for the moment
+        self._read_test(['ab\0c'], 'Error', strict = 1)
+        self._read_test(['"ab"c'], [['abc']], doublequote = 0)
+
+    def test_read_eol(self):
+        self._read_test(['a,b'], [['a','b']])
+        self._read_test(['a,b\n'], [['a','b']])
+        self._read_test(['a,b\r\n'], [['a','b']])
+        self._read_test(['a,b\r'], [['a','b']])
+        self._read_test(['a,b\rc,d'], 'Error')
+        self._read_test(['a,b\nc,d'], 'Error')
+        self._read_test(['a,b\r\nc,d'], 'Error')
+
+    def test_read_escape(self):
+        self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\')
+        self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\')
+        self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\')
+        self._read_test(['a,"b,\\c"'], [['a', 'b,c']], escapechar='\\')
+        self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\')
+        self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\')
+
+    def test_read_quoting(self):
+        import _csv as csv
+        self._read_test(['1,",3,",5'], [['1', ',3,', '5']])
+        self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
+                        quotechar=None, escapechar='\\')
+        self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
+                        quoting=csv.QUOTE_NONE, escapechar='\\')
+        # will this fail where locale uses comma for decimals?
+        self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]],
+                        quoting=csv.QUOTE_NONNUMERIC)
+        self._read_test(['"a\nb", 7'], [['a\nb', ' 7']])
+        raises(ValueError, self._read_test,
+                          ['abc,3'], [[]],
+                          quoting=csv.QUOTE_NONNUMERIC)
+
+    def test_read_bigfield(self):
+        # This exercises the buffer realloc functionality and field size
+        # limits.
+        import _csv as csv
+        limit = csv.field_size_limit()
+        try:
+            size = 150
+            bigstring = 'X' * size
+            bigline = '%s,%s' % (bigstring, bigstring)
+            self._read_test([bigline], [[bigstring, bigstring]])
+            csv.field_size_limit(size)
+            self._read_test([bigline], [[bigstring, bigstring]])
+            assert csv.field_size_limit() == size
+            csv.field_size_limit(size-1)
+            self._read_test([bigline], 'Error')
+            raises(TypeError, csv.field_size_limit, None)
+            raises(TypeError, csv.field_size_limit, 1, None)
+        finally:
+            csv.field_size_limit(limit)
+
+    def test_read_linenum(self):
+        import _csv as csv
+        r = csv.reader(['line,1', 'line,2', 'line,3'])
+        assert r.line_num == 0
+        r.next()
+        assert r.line_num == 1
+        r.next()
+        assert r.line_num == 2
+        r.next()
+        assert r.line_num == 3
+        raises(StopIteration, r.next)
+        assert r.line_num == 3
+
+    def test_dubious_quote(self):
+        self._read_test(['12,12,1",'], [['12', '12', '1"', '']])


More information about the pypy-commit mailing list