[Python-checkins] r46476 - sandbox/trunk/hotbuffer/README.txt sandbox/trunk/hotbuffer/test_hotbuf.py

Sat May 27 18:14:47 CEST 2006

Author: martin.blais
Date: Sat May 27 18:14:47 2006
New Revision: 46476

Modified:
   sandbox/trunk/hotbuffer/README.txt
   sandbox/trunk/hotbuffer/test_hotbuf.py
Log:
Implemented use case for netstrings.

Modified: sandbox/trunk/hotbuffer/README.txt
==============================================================================

--- sandbox/trunk/hotbuffer/README.txt	(original)
+++ sandbox/trunk/hotbuffer/README.txt	Sat May 27 18:14:47 2006
@@ -21,37 +21,42 @@
 TODO
 ====
 
-* Should getbyterel() implement getting negative offsets from the end of the
-  window?
-
-  - BTW getbyterel() should be implemented using the slicing operators
-
-* Remove the advbytes argument to pop(), it is confusing.
-
-* Remove the mark, save() and restore().
-
-* Implement the entire string protocol, since that will be a fast to contents
+* Should getbyterel() implement getting negative offsets from the
+  end of the window rather than move backwards from the current
+  position?  I think it should.
+
+* Implement the entire string protocol, since that will be a fast
+  path to contents (no dict lookup is necessary)
+
+  * Also, getbyterel() should be implemented using the slicing
+    operators, but you need to check if they allow returning an int
+    rather than a string.
 
 * Make it possible to read from a file directly into a hotbuf
 
-  - implement the file protocol (read(), write()) on the hotbuf object
-  - euh, is there a file protocol?
-
-* Implement relative get/put methods that don't increment the position.
-
-* Implement absolute get/put methods.
+  - Implement fromfile() and tofile() on the hotbuf object
+  - Check if there a tp_file protocol and if there is, use that
+    instead to provide the interface.
 
-* We need to select between PyObject_MALLOC and PyObject_MEMMALLOC
+* Implement absolute get/put methods (getabs(n), putabs(n, data))
 
-* Implement pack() in C
+* The hot buffer can unpack in C, similarly implement pack() in C.
 
-* Add support for some of the other sequence methods.
+* Implement some of the other sequence methods.
 
-* Add methods to parse ints, longs, floats and doubles directly from the buffer
-  contents, without using a temporary string.
+* Add methods to parse ints, longs, floats and doubles directly
+  from/to the buffer contents, without using a temporary string.
+  getlong(), putlong(), etc.
 
-* Write a small PEP about this, when all is said and done.
+* Documentation: write a small PEP about this, when all is said and
+  done.
+  
+  - hot buffers are really just a fancy string that can change on top of a
+    fixed-allocated memory buffer, and should provide the same interface
 
+  - Note for common use cases: the buffer should have at least the
+    size of the minimal line/message that you may ever encounter,
+    otherwise you will have to write special parsing routines.
 
 
 Other Features

Modified: sandbox/trunk/hotbuffer/test_hotbuf.py
==============================================================================
--- sandbox/trunk/hotbuffer/test_hotbuf.py	(original)
+++ sandbox/trunk/hotbuffer/test_hotbuf.py	Sat May 27 18:14:47 2006
@@ -7,7 +7,7 @@
 #
 
 from hotbuf import hotbuf, BoundaryError
-from struct import Struct
+from struct import Struct, pack
 import unittest
 from cStringIO import StringIO
 from test import test_support
@@ -365,38 +365,11 @@
     on your application and the side-effects that may have occurred.
     """
 
-    data1 = """
-Most programming languages, including Lisp, are organized
-around computing the values of mathematical
-functions. Expression-oriented languages (such as Lisp,
-Fortran, and Algol) capitalize on the ``pun'' that an
-expression that describes the value of a function may also
-be interpreted as a means of computing that value. Because
-of this, most programming languages are strongly biased
-toward unidirectional computations (computations with
-well-defined inputs and outputs). There are, however,
-radically different programming languages that relax this
-bias. We saw one such example in section 3.3.5, where the
-objects of computation were arithmetic constraints. In a
-constraint system the direction and the order of
-computation are not so well specified; in carrying out a
-computation the system must therefore provide more detailed
-``how to'' knowledge than would be the case with an
-ordinary arithmetic computation.
-
-This does not mean, however, that the user is released
-altogether from the responsibility of providing imperative
-knowledge. There are many constraint networks that
-implement the same set of constraints, and the user must
-choose from the set of mathematically equivalent networks a
-suitable network to specify a particular computation."""
-    lines1 = map(str.strip, data1.splitlines())
-
     def parse_newline_delim( self, hot, read, process_line ):
         """
         Use case for newline-delimited data.
         """
-        newline, cr = ord('\n'), ord('\r')
+        cr = ord('\r')
 
         # Initiallly put some data into the buffer.
         hot.putstr(read(len(hot)))
@@ -439,8 +412,6 @@
                     break
 
             # Read more data in the buffer.
-## FIXME: we need to support reading from a file directly into the
-## buffer.
             hot.compact()
             s = read(len(hot))
             if not s:
@@ -453,29 +424,59 @@
         if hot:
             process_line(hot)
 
-
     def test_newline_delim_data( self ):
         """
         Test for newline-delimited data.
         """
-        inp = StringIO(self.data1)
+        inp = StringIO(self.data_nldelim)
         hot = hotbuf(256)
 
         lineidx = [0]
         def assert_lines( hot ):
             "Assert the lines we process are the ones we expect."
-            self.assertEquals(str(hot), self.lines1[lineidx[0]])
+            self.assertEquals(str(hot), self.lines_nldelim[lineidx[0]])
             lineidx[0] += 1
 
         self.parse_newline_delim(hot, inp.read, assert_lines)
 
+    data_nldelim = """
+Most programming languages, including Lisp, are organized
+around computing the values of mathematical
+functions. Expression-oriented languages (such as Lisp,
+Fortran, and Algol) capitalize on the ``pun'' that an
+expression that describes the value of a function may also
+be interpreted as a means of computing that value. Because
+of this, most programming languages are strongly biased
+toward unidirectional computations (computations with
+well-defined inputs and outputs). There are, however,
+radically different programming languages that relax this
+bias. We saw one such example in section 3.3.5, where the
+objects of computation were arithmetic constraints. In a
+constraint system the direction and the order of
+computation are not so well specified; in carrying out a
+computation the system must therefore provide more detailed
+``how to'' knowledge than would be the case with an
+ordinary arithmetic computation.
 
-#------------------------------------------------------------------------
-#
-    def _test_netstrings( self ):
+This does not mean, however, that the user is released
+altogether from the responsibility of providing imperative
+knowledge. There are many constraint networks that
+implement the same set of constraints, and the user must
+choose from the set of mathematically equivalent networks a
+suitable network to specify a particular computation."""
+
+    lines_nldelim = map(str.strip, data_nldelim.splitlines())
+
+
+    #---------------------------------------------------------------------------
+
+    def parse_netstrings( self, hot, read, process_msg ):
         """
         Use case for netstrings.
         """
+        # Initiallly put some data into the buffer.
+        hot.putstr(read(len(hot)))
+        hot.flip()
 
         # Loop over the entire input.
         while 1:
@@ -503,7 +504,7 @@
                 # - Exceptions will be programming errors.
                 # - You never need to deal with rollback of your transactions.
 
-                process_message(hot)
+                process_msg(hot)
 
                 # Advance beyond the message.
                 hot.position = limit
@@ -514,58 +515,60 @@
             s = read(len(hot))
             if not s:
                 break # Finished the input, exit.
+            hot.putstr(s)
+            hot.flip()
 
-## FIXME review the version with exceptions, would it be faster to just
-## hit the boundary?  I would prefer letting the boundary be
-
-        while 1:
-            # Catch when we hit the boundary.
-            try:
-                # Loop over all the messages in the current buffer.
-                while hot:
-                    # Read the length.
-                    length = hot.getbyte() # This never raises since
-                                           # we're hot.
-
-                    mark_position = hot.position
-                    mark_limit = hot.limit
-                    hot.limit = hot.position + length
-                    saved = True
-
-                    # Parse the message.
-                    #
-                    # - We are insured to be able to read all the message
-                    #   here because we checked for the length.
-                    # - Exceptions will be programming errors.
-                    # - You never need to deal with rollback of your
-                    #   transactions.
-
-                    process_message(hot)
-
-                    # Pop the message window and advance beyond the
-                    # length.
-                    hot.position = hot.limit
-                    saved = False
-                else:
-                    # Raise an exception, triggering a filling of the
-                    # buffer
-                    raise hotbuf.BoundaryError
-
-            except hotbuf.BoundaryError:
-                # Rollback the failed transaction, if there was one.
-                if saved:
-                    hot.position = mark_position
-                    hot.limit = mark_limit
-
-                # Compact and read the next chunk of the buffer.
-                hot.compact()
-                s = read(len(hot))
-                if not s:
-                    break # Finished the input, exit.
-
+    def test_netstrings( self ):
+        """
+        Test for parsing netstrings.
+        """
+        inp = StringIO(self.packed_netstrings)
+        hot = hotbuf(256)
 
+        msgidx = [0]
+        def assert_msg( hot ):
+            "Assert the messages we process are the ones we expect."
+            msg = str(hot)
+            l = len(hot)
+            msgtype = chr(hot.getbyte())
+            expected = self.expected_messages[msgidx[0]]
+            self.assertEquals(msg, expected)
+            msgidx[0] = (msgidx[0] + 1) % len(self.data_netstrings)
+            
+
+        self.parse_netstrings(hot, inp.read, assert_msg)
+
+    #
+    # Test data for netstrings.
+    #
+
+    # Formats for packing/unpacking.
+    data_fmts = dict( (x[0], Struct(x[1])) for x in (('A', 'h l f'),
+                                                     ('B', 'd d d'),
+                                                     ('C', 'I L l c')) )
+
+    # Expected data.
+    data_netstrings = (
+        ('A', (47, 23, 3.14159217)),
+        ('B', (1.23, 4.232, 6.433)),
+        ('A', (43, 239, 4.243232)),
+        ('C', (100, 101L, 12, 'b')),
+        ('B', (22.3232, 5.343, 4.3323)),
+        )
+
+    # Test data.
+    expected_messages, packed_netstrings = [], ''
+    for i in xrange(100):
+        for msgtype, data in data_netstrings:
+            stru = data_fmts[msgtype]
+            msg = pack('b', ord(msgtype)) + stru.pack(*data)
+            expected_messages.append(msg)
+            netstring = pack('b', len(msg)) + msg
+            packed_netstrings += netstring
 
 
+    #---------------------------------------------------------------------------
+    
     def _test_detect_boundary( self ):
         """
         Use case for arbitraty formats, where we do not set a limit for