[Numpy-svn] r5026 - in trunk/numpy/lib: . tests

numpy-svn at scipy.org numpy-svn at scipy.org
Sat Apr 12 19:18:45 EDT 2008


Author: stefan
Date: 2008-04-12 18:18:27 -0500 (Sat, 12 Apr 2008)
New Revision: 5026

Modified:
   trunk/numpy/lib/io.py
   trunk/numpy/lib/tests/test_io.py
Log:
Fix fromregex, add documentation and tests [patch by Pauli Virtanen].


Modified: trunk/numpy/lib/io.py
===================================================================
--- trunk/numpy/lib/io.py	2008-04-12 03:12:09 UTC (rev 5025)
+++ trunk/numpy/lib/io.py	2008-04-12 23:18:27 UTC (rev 5026)
@@ -362,22 +362,44 @@
         X.shape = origShape
 
 import re
-def fromregex(file, regexp, **kwds):
+def fromregex(file, regexp, dtype):
     """Construct a record array from a text file, using regular-expressions parsing.
     
-    Groups in the regular exespression are converted to fields. 
+    Array is constructed from all matches of the regular expression
+    in the file. Groups in the regular expression are converted to fields.
+
+    Parameters
+    ----------
+    file : str or file
+        File name or file object to read
+    regexp : str or regexp
+        Regular expression to use to parse the file
+    dtype : dtype or dtype list
+        Dtype for the record array
+
+    Example
+    -------
+    >>> import numpy as np
+    >>> f = open('test.dat', 'w')
+    >>> f.write("1312 foo\n1534  bar\n 444   qux")
+    >>> f.close()
+    >>> np.fromregex('test.dat', r"(\d+)\s+(...)", [('num', np.int64), ('key', 'S3')])
+    array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')], 
+          dtype=[('num', '<i8'), ('key', '|S3')])
+
     """
     if not hasattr(file, "read"):
         file = open(file,'r')
     if not hasattr(regexp, 'match'):
         regexp = re.compile(regexp)
-
+    if not isinstance(dtype, np.dtype):
+        dtype = np.dtype(dtype)
+    
     seq = regexp.findall(file.read())
-    dtypelist = []
-    for key, value in kwds.values():
-        dtypelist.append((key, value))
-    format = np.dtype(dtypelist)
-    output = array(seq, dtype=format)
+    if seq and not isinstance(seq[0], tuple):
+        # make sure np.array doesn't interpret strings as binary data
+        # by always producing a list of tuples
+        seq = [(x,) for x in seq]
+    output = np.array(seq, dtype=dtype)
     return output
-    
-    
+

Modified: trunk/numpy/lib/tests/test_io.py
===================================================================
--- trunk/numpy/lib/tests/test_io.py	2008-04-12 03:12:09 UTC (rev 5025)
+++ trunk/numpy/lib/tests/test_io.py	2008-04-12 23:18:27 UTC (rev 5026)
@@ -2,27 +2,29 @@
 import numpy as np
 import StringIO
 
-class Testsavetxt(NumpyTestCase):
+class TestSaveTxt(NumpyTestCase):
     def test_array(self):
         a =np.array( [[1,2],[3,4]], float)
         c = StringIO.StringIO()
         np.savetxt(c, a)
         c.seek(0)
-        assert(c.readlines(), ['1.000000000000000000e+00 2.000000000000000000e+00\n', '3.000000000000000000e+00 4.000000000000000000e+00\n'])
+        assert(c.readlines(),
+               ['1.000000000000000000e+00 2.000000000000000000e+00\n',
+                '3.000000000000000000e+00 4.000000000000000000e+00\n'])
 
         a =np.array( [[1,2],[3,4]], int)
         c = StringIO.StringIO()
         np.savetxt(c, a)
         c.seek(0)
         assert(c.readlines(), ['1 2\n', '3 4\n'])
-        
+
     def test_1D(self):
         a = np.array([1,2,3,4], int)
         c = StringIO.StringIO()
         np.savetxt(c, a, fmt='%d')
         c.seek(0)
         assert(c.readlines(), ['1\n', '2\n', '3\n', '4\n'])
-    
+
     def test_record(self):
         a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
         c = StringIO.StringIO()
@@ -30,9 +32,8 @@
         c.seek(0)
         assert(c.readlines(), ['1 2\n', '3 4\n'])
 
-        
 
-class Testloadtxt(NumpyTestCase):
+class TestLoadTxt(NumpyTestCase):
     def test_record(self):
         c = StringIO.StringIO()
         c.write('1 2\n3 4')
@@ -40,31 +41,33 @@
         x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)])
         a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
         assert_array_equal(x, a)
-        
+
         d = StringIO.StringIO()
         d.write('M 64.0 75.0\nF 25.0 60.0')
         d.seek(0)
 
-        mydescriptor = {'names': ('gender','age','weight'), 'formats': ('S1',
-                                'i4', 'f4')}
-        b = np.array([('M',    64.0, 75.0),('F', 25.0, 60.0)], dtype=mydescriptor)
+        mydescriptor = {'names': ('gender','age','weight'),
+                        'formats': ('S1',
+                                    'i4', 'f4')}
+        b = np.array([('M', 64.0, 75.0),
+                      ('F', 25.0, 60.0)], dtype=mydescriptor)
         y = np.loadtxt(d, dtype=mydescriptor)
         assert_array_equal(y, b)
-        
+
     def test_array(self):
         c = StringIO.StringIO()
         c.write('1 2\n3 4')
-        
+
         c.seek(0)
         x = np.loadtxt(c, dtype=int)
         a = np.array([[1,2],[3,4]], int)
         assert_array_equal(x, a)
-        
+
         c.seek(0)
         x = np.loadtxt(c, dtype=float)
         a = np.array([[1,2],[3,4]], float)
         assert_array_equal(x, a)
-        
+
     def test_1D(self):
         c = StringIO.StringIO()
         c.write('1\n2\n3\n4\n')
@@ -72,15 +75,15 @@
         x = np.loadtxt(c, dtype=int)
         a = np.array([1,2,3,4], int)
         assert_array_equal(x, a)
-        
+
         c = StringIO.StringIO()
         c.write('1,2,3,4\n')
         c.seek(0)
         x = np.loadtxt(c, dtype=int, delimiter=',')
         a = np.array([1,2,3,4], int)
         assert_array_equal(x, a)
-        
-        
+
+
     def test_missing(self):
         c = StringIO.StringIO()
         c.write('1,2,3,,5\n')
@@ -89,7 +92,38 @@
             converters={3:lambda s: int(s or -999)})
         a = np.array([1,2,3,-999,5], int)
         assert_array_equal(x, a)
-        
-    
+
+class Testfromregex(NumpyTestCase):
+    def test_record(self):
+        c = StringIO.StringIO()
+        c.write('1.312 foo\n1.534 bar\n4.444 qux')
+        c.seek(0)
+
+        dt = [('num', np.float64), ('val', 'S3')]
+        x = np.fromregex(c, r"([0-9.]+)\s+(...)", dt)
+        a = np.array([(1.312, 'foo'), (1.534, 'bar'), (4.444, 'qux')], dtype=dt)
+        assert_array_equal(x, a)
+
+    def test_record_2(self):
+        return # pass this test until #736 is resolved
+        c = StringIO.StringIO()
+        c.write('1312 foo\n1534 bar\n4444 qux')
+        c.seek(0)
+
+        dt = [('num', np.int32), ('val', 'S3')]
+        x = np.fromregex(c, r"(\d+)\s+(...)", dt)
+        a = np.array([(1312, 'foo'), (1534, 'bar'), (4444, 'qux')], dtype=dt)
+        assert_array_equal(x, a)
+
+    def test_record_3(self):
+        c = StringIO.StringIO()
+        c.write('1312 foo\n1534 bar\n4444 qux')
+        c.seek(0)
+
+        dt = [('num', np.float64)]
+        x = np.fromregex(c, r"(\d+)\s+...", dt)
+        a = np.array([(1312,), (1534,), (4444,)], dtype=dt)
+        assert_array_equal(x, a)
+
 if __name__ == "__main__":
     NumpyTest().run()




More information about the Numpy-svn mailing list