[Numpy-svn] r8179 - in trunk: doc numpy/lib numpy/lib/tests

Sat Feb 20 13:17:14 EST 2010

Author: ptvirtan
Date: 2010-02-20 12:17:14 -0600 (Sat, 20 Feb 2010)
New Revision: 8179

Modified:
   trunk/doc/Py3K.txt
   trunk/numpy/lib/_iotools.py
   trunk/numpy/lib/function_base.py
   trunk/numpy/lib/io.py
   trunk/numpy/lib/tests/test__iotools.py
   trunk/numpy/lib/tests/test_io.py
Log:
3K: lib: fix some bytes vs. str issues in _iotools.py and io.py -- mainly genfromtxt

Modified: trunk/doc/Py3K.txt
===================================================================

--- trunk/doc/Py3K.txt	2010-02-20 18:16:52 UTC (rev 8178)
+++ trunk/doc/Py3K.txt	2010-02-20 18:17:14 UTC (rev 8179)
@@ -211,6 +211,16 @@
    Check if I missed something here.
 
 
+numpy.loadtxt et al
+-------------------
+
+These routines are difficult to duck-type to read both Unicode and
+Bytes input.
+
+I assume they are meant for reading Bytes streams -- this is probably
+the far more common use case with scientific data.
+
+
 C Code
 ======
 

Modified: trunk/numpy/lib/_iotools.py
===================================================================
--- trunk/numpy/lib/_iotools.py	2010-02-20 18:16:52 UTC (rev 8178)
+++ trunk/numpy/lib/_iotools.py	2010-02-20 18:17:14 UTC (rev 8179)
@@ -1,11 +1,22 @@
 """A collection of functions designed to help I/O with ascii files."""
 __docformat__ = "restructuredtext en"
 
+import sys
 import numpy as np
 import numpy.core.numeric as nx
 from __builtin__ import bool, int, long, float, complex, object, unicode, str
 
+from numpy.compat import asbytes, bytes
 
+if sys.version_info[0] >= 3:
+    def _bytes_to_complex(s):
+        return complex(s.decode('ascii'))
+    def _bytes_to_name(s):
+        return s.decode('ascii')
+else:
+    _bytes_to_complex = complex
+    _bytes_to_name = str
+
 def _is_string_like(obj):
     """
     Check whether obj behaves like a string.
@@ -16,7 +27,17 @@
         return False
     return True
 
+def _is_bytes_like(obj):
+    """
+    Check whether obj behaves like a bytes object.
+    """
+    try:
+        obj + asbytes('')
+    except (TypeError, ValueError):
+        return False
+    return True
 
+
 def _to_filehandle(fname, flag='r', return_opened=False):
     """
     Returns the filehandle corresponding to a string or a file.
@@ -157,10 +178,12 @@
         """
         return lambda input: [_.strip() for _ in method(input)]
     #
-    def __init__(self, delimiter=None, comments='#', autostrip=True):
+    def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True):
         self.comments = comments
         # Delimiter is a character
-        if (delimiter is None) or _is_string_like(delimiter):
+        if isinstance(delimiter, unicode):
+            delimiter = delimiter.encode('ascii')
+        if (delimiter is None) or _is_bytes_like(delimiter):
             delimiter = delimiter or None
             _handyman = self._delimited_splitter
         # Delimiter is a list of field widths
@@ -180,7 +203,7 @@
             self._handyman = _handyman
     #
     def _delimited_splitter(self, line):
-        line = line.split(self.comments)[0].strip(" \r\n")
+        line = line.split(self.comments)[0].strip(asbytes(" \r\n"))
         if not line:
             return []
         return line.split(self.delimiter)
@@ -382,9 +405,9 @@
 
     """
     value = value.upper()
-    if value == 'TRUE':
+    if value == asbytes('TRUE'):
         return True
-    elif value == 'FALSE':
+    elif value == asbytes('FALSE'):
         return False
     else:
         raise ValueError("Invalid boolean")
@@ -468,8 +491,8 @@
     _mapper = [(nx.bool_, str2bool, False),
                (nx.integer, int, -1),
                (nx.floating, float, nx.nan),
-               (complex, complex, nx.nan + 0j),
-               (nx.string_, str, '???')]
+               (complex, _bytes_to_complex, nx.nan + 0j),
+               (nx.string_, bytes, asbytes('???'))]
     (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
     #
     @classmethod
@@ -570,11 +593,11 @@
                 self.func = lambda x : int(float(x))
         # Store the list of strings corresponding to missing values.
         if missing_values is None:
-            self.missing_values = set([''])
+            self.missing_values = set([asbytes('')])
         else:
             if isinstance(missing_values, basestring):
-                missing_values = missing_values.split(",")
-            self.missing_values = set(list(missing_values) + [''])
+                missing_values = missing_values.split(asbytes(","))
+            self.missing_values = set(list(missing_values) + [asbytes('')])
         #
         self._callingfunction = self._strict_call
         self.type = ttype
@@ -672,7 +695,8 @@
             self._status = _status
             self.iterupgrade(value)
 
-    def update(self, func, default=None, missing_values='', locked=False):
+    def update(self, func, default=None, missing_values=asbytes(''),
+               locked=False):
         """
         Set StringConverter attributes directly.
 
@@ -711,7 +735,7 @@
             self.type = self._getsubdtype(tester)
         # Add the missing values to the existing set
         if missing_values is not None:
-            if _is_string_like(missing_values):
+            if _is_bytes_like(missing_values):
                 self.missing_values.add(missing_values)
             elif hasattr(missing_values, '__iter__'):
                 for val in missing_values:

Modified: trunk/numpy/lib/function_base.py
===================================================================
--- trunk/numpy/lib/function_base.py	2010-02-20 18:16:52 UTC (rev 8178)
+++ trunk/numpy/lib/function_base.py	2010-02-20 18:17:14 UTC (rev 8179)
@@ -13,6 +13,7 @@
 import warnings
 
 import types
+import sys
 import numpy.core.numeric as _nx
 from numpy.core import linspace
 from numpy.core.numeric import ones, zeros, arange, concatenate, array, \
@@ -1596,7 +1597,18 @@
 def _get_nargs(obj):
     if not callable(obj):
         raise TypeError, "Object is not callable."
-    if hasattr(obj,'func_code'):
+    if sys.version_info[0] >= 3:
+        import inspect
+        spec = inspect.getargspec(obj)
+        nargs = len(spec.args)
+        if spec.defaults:
+            ndefaults = len(spec.defaults)
+        else:
+            ndefaults = 0
+        if inspect.ismethod(obj):
+            nargs -= 1
+        return nargs, ndefaults
+    elif hasattr(obj,'func_code'):
         fcode = obj.func_code
         nargs = fcode.co_argcount
         if obj.func_defaults is not None:

Modified: trunk/numpy/lib/io.py
===================================================================
--- trunk/numpy/lib/io.py	2010-02-20 18:16:52 UTC (rev 8178)
+++ trunk/numpy/lib/io.py	2010-02-20 18:17:14 UTC (rev 8179)
@@ -21,7 +21,7 @@
 from _iotools import LineSplitter, NameValidator, StringConverter, \
                      ConverterError, ConverterLockError, ConversionWarning, \
                      _is_string_like, has_nested_fields, flatten_dtype, \
-                     easy_dtype
+                     easy_dtype, _bytes_to_name
 
 from numpy.compat import asbytes
 
@@ -478,8 +478,8 @@
 
 
 
-def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
-            skiprows=0, usecols=None, unpack=False):
+def loadtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None,
+            converters=None, skiprows=0, usecols=None, unpack=False):
     """
     Load data from a text file.
 
@@ -613,7 +613,7 @@
         first_vals = None
         while not first_vals:
             first_line = fh.readline()
-            if first_line == '': # EOF reached
+            if not first_line: # EOF reached
                 raise IOError('End-of-file reached before encountering data.')
             first_vals = split_line(first_line)
         N = len(usecols or first_vals)
@@ -891,9 +891,9 @@
 
 
 
-def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
+def genfromtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None,
                skiprows=0, skip_header=0, skip_footer=0, converters=None,
-               missing='', missing_values=None, filling_values=None,
+               missing=asbytes(''), missing_values=None, filling_values=None,
                usecols=None, names=None, excludelist=None, deletechars=None,
                autostrip=False, case_sensitive=True, defaultfmt="f%i",
                unpack=None, usemask=False, loose=True, invalid_raise=True):
@@ -1065,11 +1065,11 @@
     first_values = None
     while not first_values:
         first_line = fhd.readline()
-        if first_line == '':
+        if not first_line:
             raise IOError('End-of-file reached before encountering data.')
         if names is True:
             if comments in first_line:
-                first_line = ''.join(first_line.split(comments)[1])
+                first_line = asbytes('').join(first_line.split(comments)[1:])
         first_values = split_line(first_line)
     # Should we take the first values as names ?
     if names is True:
@@ -1090,8 +1090,9 @@
 
     # Check the names and overwrite the dtype.names if needed
     if names is True:
-        names = validate_names([_.strip() for _ in first_values])
-        first_line = ''
+        names = validate_names([_bytes_to_name(_.strip())
+                                for _ in first_values])
+        first_line = asbytes('')
     elif _is_string_like(names):
         names = validate_names([_.strip() for _ in names.split(',')])
     elif names:
@@ -1127,7 +1128,7 @@
     user_missing_values = missing_values or ()
 
     # Define the list of missing_values (one column: one list)
-    missing_values = [list(['']) for _ in range(nbcols)]
+    missing_values = [list([asbytes('')]) for _ in range(nbcols)]
 
     # We have a dictionary: process it field by field
     if isinstance(user_missing_values, dict):
@@ -1176,7 +1177,7 @@
             entry.extend([str(user_missing_values)])
 
     # Process the deprecated `missing`
-    if missing != '':
+    if missing != asbytes(''):
         warnings.warn("The use of `missing` is deprecated.\n"\
                       "Please use `missing_values` instead.",
                       DeprecationWarning)
@@ -1451,7 +1452,8 @@
     names = output.dtype.names
     if usemask and names:
         for (name, conv) in zip(names or (), converters):
-            missing_values = [conv(_) for _ in conv.missing_values if _ != '']
+            missing_values = [conv(_) for _ in conv.missing_values
+                              if _ != asbytes('')]
             for mval in missing_values:
                 outputmask[name] |= (output[name] == mval)
     # Construct the final array

Modified: trunk/numpy/lib/tests/test__iotools.py
===================================================================
--- trunk/numpy/lib/tests/test__iotools.py	2010-02-20 18:16:52 UTC (rev 8178)
+++ trunk/numpy/lib/tests/test__iotools.py	2010-02-20 18:17:14 UTC (rev 8179)
@@ -1,71 +1,78 @@
+import sys
+if sys.version_info[0] >= 3:
+    from io import BytesIO
+    def StringIO(s=""):
+        return BytesIO(asbytes(s))
+else:
+    from StringIO import StringIO
 
-import StringIO
-
 import numpy as np
 from numpy.lib._iotools import LineSplitter, NameValidator, StringConverter,\
                                has_nested_fields, easy_dtype
 from numpy.testing import *
 
+from numpy.compat import asbytes, asbytes_nested
+
 class TestLineSplitter(TestCase):
     "Tests the LineSplitter class."
     #
     def test_no_delimiter(self):
         "Test LineSplitter w/o delimiter"
-        strg = " 1 2 3 4  5 # test"
+        strg = asbytes(" 1 2 3 4  5 # test")
         test = LineSplitter()(strg)
-        assert_equal(test, ['1', '2', '3', '4', '5'])
+        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5']))
         test = LineSplitter('')(strg)
-        assert_equal(test, ['1', '2', '3', '4', '5'])
+        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5']))
 
     def test_space_delimiter(self):
         "Test space delimiter"
-        strg = " 1 2 3 4  5 # test"
-        test = LineSplitter(' ')(strg)
-        assert_equal(test, ['1', '2', '3', '4', '', '5'])
-        test = LineSplitter('  ')(strg)
-        assert_equal(test, ['1 2 3 4', '5'])
+        strg = asbytes(" 1 2 3 4  5 # test")
+        test = LineSplitter(asbytes(' '))(strg)
+        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5']))
+        test = LineSplitter(asbytes('  '))(strg)
+        assert_equal(test, asbytes_nested(['1 2 3 4', '5']))
 
     def test_tab_delimiter(self):
         "Test tab delimiter"
-        strg= " 1\t 2\t 3\t 4\t 5  6"
-        test = LineSplitter('\t')(strg)
-        assert_equal(test, ['1', '2', '3', '4', '5  6'])
-        strg= " 1  2\t 3  4\t 5  6"
-        test = LineSplitter('\t')(strg)
-        assert_equal(test, ['1  2', '3  4', '5  6'])
+        strg= asbytes(" 1\t 2\t 3\t 4\t 5  6")
+        test = LineSplitter(asbytes('\t'))(strg)
+        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5  6']))
+        strg= asbytes(" 1  2\t 3  4\t 5  6")
+        test = LineSplitter(asbytes('\t'))(strg)
+        assert_equal(test, asbytes_nested(['1  2', '3  4', '5  6']))
 
     def test_other_delimiter(self):
         "Test LineSplitter on delimiter"
-        strg = "1,2,3,4,,5"
-        test = LineSplitter(',')(strg)
-        assert_equal(test, ['1', '2', '3', '4', '', '5'])
+        strg = asbytes("1,2,3,4,,5")
+        test = LineSplitter(asbytes(','))(strg)
+        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5']))
         #
-        strg = " 1,2,3,4,,5 # test"
-        test = LineSplitter(',')(strg)
-        assert_equal(test, ['1', '2', '3', '4', '', '5'])
+        strg = asbytes(" 1,2,3,4,,5 # test")
+        test = LineSplitter(asbytes(','))(strg)
+        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5']))
 
     def test_constant_fixed_width(self):
         "Test LineSplitter w/ fixed-width fields"
-        strg = "  1  2  3  4     5   # test"
+        strg = asbytes("  1  2  3  4     5   # test")
         test = LineSplitter(3)(strg)
-        assert_equal(test, ['1', '2', '3', '4', '', '5', ''])
+        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5', '']))
         #
-        strg = "  1     3  4  5  6# test"
+        strg = asbytes("  1     3  4  5  6# test")
         test = LineSplitter(20)(strg)
-        assert_equal(test, ['1     3  4  5  6'])
+        assert_equal(test, asbytes_nested(['1     3  4  5  6']))
         #
-        strg = "  1     3  4  5  6# test"
+        strg = asbytes("  1     3  4  5  6# test")
         test = LineSplitter(30)(strg)
-        assert_equal(test, ['1     3  4  5  6'])
+        assert_equal(test, asbytes_nested(['1     3  4  5  6']))
 
     def test_variable_fixed_width(self):
-        strg = "  1     3  4  5  6# test"
+        strg = asbytes("  1     3  4  5  6# test")
         test = LineSplitter((3,6,6,3))(strg)
-        assert_equal(test, ['1', '3', '4  5', '6'])
+        assert_equal(test, asbytes_nested(['1', '3', '4  5', '6']))
         #
-        strg = "  1     3  4  5  6# test"
+        strg = asbytes("  1     3  4  5  6# test")
         test = LineSplitter((6,6,9))(strg)
-        assert_equal(test, ['1', '3  4', '5  6'])
+        assert_equal(test, asbytes_nested(['1', '3  4', '5  6']))
 
 
 #-------------------------------------------------------------------------------
@@ -136,23 +143,24 @@
         "Tests the upgrade method."
         converter = StringConverter()
         assert_equal(converter._status, 0)
-        converter.upgrade('0')
+        converter.upgrade(asbytes('0'))
         assert_equal(converter._status, 1)
-        converter.upgrade('0.')
+        converter.upgrade(asbytes('0.'))
         assert_equal(converter._status, 2)
-        converter.upgrade('0j')
+        converter.upgrade(asbytes('0j'))
         assert_equal(converter._status, 3)
-        converter.upgrade('a')
+        converter.upgrade(asbytes('a'))
         assert_equal(converter._status, len(converter._mapper)-1)
     #
     def test_missing(self):
         "Tests the use of missing values."
-        converter = StringConverter(missing_values=('missing','missed'))
-        converter.upgrade('0')
-        assert_equal(converter('0'), 0)
-        assert_equal(converter(''), converter.default)
-        assert_equal(converter('missing'), converter.default)
-        assert_equal(converter('missed'), converter.default)
+        converter = StringConverter(missing_values=(asbytes('missing'),
+                                                    asbytes('missed')))
+        converter.upgrade(asbytes('0'))
+        assert_equal(converter(asbytes('0')), 0)
+        assert_equal(converter(asbytes('')), converter.default)
+        assert_equal(converter(asbytes('missing')), converter.default)
+        assert_equal(converter(asbytes('missed')), converter.default)
         try:
             converter('miss')
         except ValueError:
@@ -162,7 +170,11 @@
         "Tests updatemapper"
         from datetime import date
         import time
-        dateparser = lambda s : date(*time.strptime(s, "%Y-%m-%d")[:3])
+        if sys.version_info[0] >= 3:
+            dateparser = lambda s : date(*time.strptime(s.decode('latin1'),
+                                                        "%Y-%m-%d")[:3])
+        else:
+            dateparser = lambda s : date(*time.strptime(s, "%Y-%m-%d")[:3])
         StringConverter.upgrade_mapper(dateparser, date(2000,1,1))
         convert = StringConverter(dateparser, date(2000, 1, 1))
         test = convert('2001-01-01')
@@ -182,25 +194,28 @@
     #
     def test_keep_default(self):
         "Make sure we don't lose an explicit default"
-        converter = StringConverter(None, missing_values='', default=-999)
-        converter.upgrade('3.14159265')
+        converter = StringConverter(None, missing_values=asbytes(''),
+                                    default=-999)
+        converter.upgrade(asbytes('3.14159265'))
         assert_equal(converter.default, -999)
         assert_equal(converter.type, np.dtype(float))
         #
-        converter = StringConverter(None, missing_values='', default=0)
-        converter.upgrade('3.14159265')
+        converter = StringConverter(None, missing_values=asbytes(''), default=0)
+        converter.upgrade(asbytes('3.14159265'))
         assert_equal(converter.default, 0)
         assert_equal(converter.type, np.dtype(float))
     #
     def test_keep_default_zero(self):
         "Check that we don't lose a default of 0"
-        converter = StringConverter(int, default=0, missing_values="N/A")
+        converter = StringConverter(int, default=0,
+                                    missing_values=asbytes("N/A"))
         assert_equal(converter.default, 0)
     #
     def test_keep_missing_values(self):
         "Check that we're not losing missing values"
-        converter = StringConverter(int, default=0, missing_values="N/A")
-        assert_equal(converter.missing_values, set(['', 'N/A']))
+        converter = StringConverter(int, default=0,
+                                    missing_values=asbytes("N/A"))
+        assert_equal(converter.missing_values, set(asbytes_nested(['', 'N/A'])))
 
 #-------------------------------------------------------------------------------
 

Modified: trunk/numpy/lib/tests/test_io.py
===================================================================
--- trunk/numpy/lib/tests/test_io.py	2010-02-20 18:16:52 UTC (rev 8178)
+++ trunk/numpy/lib/tests/test_io.py	2010-02-20 18:17:14 UTC (rev 8179)
@@ -5,11 +5,6 @@
 
 import sys
 
-if sys.version_info[0] >= 3:
-    from io import BytesIO as StringIO
-else:
-    from StringIO import StringIO
-
 import gzip
 import os
 import threading
@@ -20,7 +15,14 @@
 
 from numpy.lib._iotools import ConverterError, ConverterLockError, \
                                ConversionWarning
+from numpy.compat import asbytes
 
+if sys.version_info[0] >= 3:
+    from io import BytesIO
+    def StringIO(s=""):
+        return BytesIO(asbytes(s))
+else:
+    from StringIO import StringIO
 
 MAJVER, MINVER = sys.version_info[:2]
 
@@ -193,7 +195,7 @@
     def test_delimiter(self):
         a = np.array([[1., 2.], [3., 4.]])
         c = StringIO()
-        np.savetxt(c, a, delimiter=',', fmt='%d')
+        np.savetxt(c, a, delimiter=asbytes(','), fmt='%d')
         c.seek(0)
         assert_equal(c.readlines(), ['1,2\n', '3,4\n'])
 
@@ -440,7 +442,7 @@
     #
     def test_record(self):
         "Test w/ explicit dtype"
-        data = StringIO('1 2\n3 4')
+        data = StringIO(asbytes('1 2\n3 4'))
 #        data.seek(0)
         test = np.ndfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)])
         control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
@@ -476,7 +478,7 @@
         assert_array_equal(test, control)
         #
         data = StringIO('1,2,3,4\n')
-        test = np.ndfromtxt(data, dtype=int, delimiter=',')
+        test = np.ndfromtxt(data, dtype=int, delimiter=asbytes(','))
         assert_array_equal(test, control)
 
     def test_comments(self):
@@ -484,17 +486,17 @@
         control = np.array([1, 2, 3, 5], int)
         # Comment on its own line
         data = StringIO('# comment\n1,2,3,5\n')
-        test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#')
+        test = np.ndfromtxt(data, dtype=int, delimiter=asbytes(','), comments=asbytes('#'))
         assert_equal(test, control)
         # Comment at the end of a line
         data = StringIO('1,2,3,5# comment\n')
-        test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#')
+        test = np.ndfromtxt(data, dtype=int, delimiter=asbytes(','), comments=asbytes('#'))
         assert_equal(test, control)
 
     def test_skiprows(self):
         "Test row skipping"
         control = np.array([1, 2, 3, 5], int)
-        kwargs = dict(dtype=int, delimiter=',')
+        kwargs = dict(dtype=int, delimiter=asbytes(','))
         #
         data = StringIO('comment\n1,2,3,5\n')
         test = np.ndfromtxt(data, skip_header=1, **kwargs)
@@ -510,7 +512,7 @@
         data.extend(["%i,%3.1f,%03s" % (i, i, i) for i in range(51)])
         data[-1] = "99,99"
         kwargs = dict(delimiter=",", names=True, skip_header=5, skip_footer=10)
-        test = np.genfromtxt(StringIO("\n".join(data)), **kwargs)
+        test = np.genfromtxt(StringIO(asbytes("\n".join(data))), **kwargs)
         ctrl = np.array([("%f" % i, "%f" % i, "%f" % i) for i in range(40)],
                         dtype=[(_, float) for _ in "ABC"])
         assert_equal(test, ctrl)