[Numpy-svn] r5026 - in trunk/numpy/lib: . tests
numpy-svn at scipy.org
numpy-svn at scipy.org
Sat Apr 12 19:18:45 EDT 2008
Author: stefan
Date: 2008-04-12 18:18:27 -0500 (Sat, 12 Apr 2008)
New Revision: 5026
Modified:
trunk/numpy/lib/io.py
trunk/numpy/lib/tests/test_io.py
Log:
Fix fromregex, add documentation and tests [patch by Pauli Virtanen].
Modified: trunk/numpy/lib/io.py
===================================================================
--- trunk/numpy/lib/io.py 2008-04-12 03:12:09 UTC (rev 5025)
+++ trunk/numpy/lib/io.py 2008-04-12 23:18:27 UTC (rev 5026)
@@ -362,22 +362,44 @@
X.shape = origShape
import re
-def fromregex(file, regexp, **kwds):
+def fromregex(file, regexp, dtype):
"""Construct a record array from a text file, using regular-expressions parsing.
- Groups in the regular exespression are converted to fields.
+ Array is constructed from all matches of the regular expression
+ in the file. Groups in the regular expression are converted to fields.
+
+ Parameters
+ ----------
+ file : str or file
+ File name or file object to read
+ regexp : str or regexp
+ Regular expression to use to parse the file
+ dtype : dtype or dtype list
+ Dtype for the record array
+
+ Example
+ -------
+ >>> import numpy as np
+ >>> f = open('test.dat', 'w')
+ >>> f.write("1312 foo\n1534 bar\n 444 qux")
+ >>> f.close()
+ >>> np.fromregex('test.dat', r"(\d+)\s+(...)", [('num', np.int64), ('key', 'S3')])
+ array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')],
+ dtype=[('num', '<i8'), ('key', '|S3')])
+
"""
if not hasattr(file, "read"):
file = open(file,'r')
if not hasattr(regexp, 'match'):
regexp = re.compile(regexp)
-
+ if not isinstance(dtype, np.dtype):
+ dtype = np.dtype(dtype)
+
seq = regexp.findall(file.read())
- dtypelist = []
- for key, value in kwds.values():
- dtypelist.append((key, value))
- format = np.dtype(dtypelist)
- output = array(seq, dtype=format)
+ if seq and not isinstance(seq[0], tuple):
+ # make sure np.array doesn't interpret strings as binary data
+ # by always producing a list of tuples
+ seq = [(x,) for x in seq]
+ output = np.array(seq, dtype=dtype)
return output
-
-
+
Modified: trunk/numpy/lib/tests/test_io.py
===================================================================
--- trunk/numpy/lib/tests/test_io.py 2008-04-12 03:12:09 UTC (rev 5025)
+++ trunk/numpy/lib/tests/test_io.py 2008-04-12 23:18:27 UTC (rev 5026)
@@ -2,27 +2,29 @@
import numpy as np
import StringIO
-class Testsavetxt(NumpyTestCase):
+class TestSaveTxt(NumpyTestCase):
def test_array(self):
a =np.array( [[1,2],[3,4]], float)
c = StringIO.StringIO()
np.savetxt(c, a)
c.seek(0)
- assert(c.readlines(), ['1.000000000000000000e+00 2.000000000000000000e+00\n', '3.000000000000000000e+00 4.000000000000000000e+00\n'])
+ assert(c.readlines(),
+ ['1.000000000000000000e+00 2.000000000000000000e+00\n',
+ '3.000000000000000000e+00 4.000000000000000000e+00\n'])
a =np.array( [[1,2],[3,4]], int)
c = StringIO.StringIO()
np.savetxt(c, a)
c.seek(0)
assert(c.readlines(), ['1 2\n', '3 4\n'])
-
+
def test_1D(self):
a = np.array([1,2,3,4], int)
c = StringIO.StringIO()
np.savetxt(c, a, fmt='%d')
c.seek(0)
assert(c.readlines(), ['1\n', '2\n', '3\n', '4\n'])
-
+
def test_record(self):
a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
c = StringIO.StringIO()
@@ -30,9 +32,8 @@
c.seek(0)
assert(c.readlines(), ['1 2\n', '3 4\n'])
-
-class Testloadtxt(NumpyTestCase):
+class TestLoadTxt(NumpyTestCase):
def test_record(self):
c = StringIO.StringIO()
c.write('1 2\n3 4')
@@ -40,31 +41,33 @@
x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)])
a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
assert_array_equal(x, a)
-
+
d = StringIO.StringIO()
d.write('M 64.0 75.0\nF 25.0 60.0')
d.seek(0)
- mydescriptor = {'names': ('gender','age','weight'), 'formats': ('S1',
- 'i4', 'f4')}
- b = np.array([('M', 64.0, 75.0),('F', 25.0, 60.0)], dtype=mydescriptor)
+ mydescriptor = {'names': ('gender','age','weight'),
+ 'formats': ('S1',
+ 'i4', 'f4')}
+ b = np.array([('M', 64.0, 75.0),
+ ('F', 25.0, 60.0)], dtype=mydescriptor)
y = np.loadtxt(d, dtype=mydescriptor)
assert_array_equal(y, b)
-
+
def test_array(self):
c = StringIO.StringIO()
c.write('1 2\n3 4')
-
+
c.seek(0)
x = np.loadtxt(c, dtype=int)
a = np.array([[1,2],[3,4]], int)
assert_array_equal(x, a)
-
+
c.seek(0)
x = np.loadtxt(c, dtype=float)
a = np.array([[1,2],[3,4]], float)
assert_array_equal(x, a)
-
+
def test_1D(self):
c = StringIO.StringIO()
c.write('1\n2\n3\n4\n')
@@ -72,15 +75,15 @@
x = np.loadtxt(c, dtype=int)
a = np.array([1,2,3,4], int)
assert_array_equal(x, a)
-
+
c = StringIO.StringIO()
c.write('1,2,3,4\n')
c.seek(0)
x = np.loadtxt(c, dtype=int, delimiter=',')
a = np.array([1,2,3,4], int)
assert_array_equal(x, a)
-
-
+
+
def test_missing(self):
c = StringIO.StringIO()
c.write('1,2,3,,5\n')
@@ -89,7 +92,38 @@
converters={3:lambda s: int(s or -999)})
a = np.array([1,2,3,-999,5], int)
assert_array_equal(x, a)
-
-
+
+class Testfromregex(NumpyTestCase):
+ def test_record(self):
+ c = StringIO.StringIO()
+ c.write('1.312 foo\n1.534 bar\n4.444 qux')
+ c.seek(0)
+
+ dt = [('num', np.float64), ('val', 'S3')]
+ x = np.fromregex(c, r"([0-9.]+)\s+(...)", dt)
+ a = np.array([(1.312, 'foo'), (1.534, 'bar'), (4.444, 'qux')], dtype=dt)
+ assert_array_equal(x, a)
+
+ def test_record_2(self):
+ return # pass this test until #736 is resolved
+ c = StringIO.StringIO()
+ c.write('1312 foo\n1534 bar\n4444 qux')
+ c.seek(0)
+
+ dt = [('num', np.int32), ('val', 'S3')]
+ x = np.fromregex(c, r"(\d+)\s+(...)", dt)
+ a = np.array([(1312, 'foo'), (1534, 'bar'), (4444, 'qux')], dtype=dt)
+ assert_array_equal(x, a)
+
+ def test_record_3(self):
+ c = StringIO.StringIO()
+ c.write('1312 foo\n1534 bar\n4444 qux')
+ c.seek(0)
+
+ dt = [('num', np.float64)]
+ x = np.fromregex(c, r"(\d+)\s+...", dt)
+ a = np.array([(1312,), (1534,), (4444,)], dtype=dt)
+ assert_array_equal(x, a)
+
if __name__ == "__main__":
NumpyTest().run()
More information about the Numpy-svn
mailing list