[Scipy-svn] r2954 - trunk/Lib/io
scipy-svn at scipy.org
scipy-svn at scipy.org
Thu May 3 02:18:02 EDT 2007
Author: oliphant
Date: 2007-05-03 01:17:49 -0500 (Thu, 03 May 2007)
New Revision: 2954
Added:
trunk/Lib/io/netcdf.py
Modified:
trunk/Lib/io/__init__.py
Log:
Add netcdf file reader to scipy.io
Modified: trunk/Lib/io/__init__.py
===================================================================
--- trunk/Lib/io/__init__.py 2007-05-02 14:30:15 UTC (rev 2953)
+++ trunk/Lib/io/__init__.py 2007-05-03 06:17:49 UTC (rev 2954)
@@ -7,7 +7,10 @@
from numpyio import packbits, unpackbits, bswap, fread, fwrite, \
convert_objectarray
+# matfile read and write
from mio import *
+# netCDF file support
+from netcdf import *
from npfile import npfile
from recaster import sctype_attributes, Recaster
from array_import import *
Added: trunk/Lib/io/netcdf.py
===================================================================
--- trunk/Lib/io/netcdf.py 2007-05-02 14:30:15 UTC (rev 2953)
+++ trunk/Lib/io/netcdf.py 2007-05-03 06:17:49 UTC (rev 2954)
@@ -0,0 +1,259 @@
+"""NetCDF file reader.
+
+This is adapted from Roberto De Almeida's Pupynere PUre PYthon NEtcdf REader.
+
+classes changed to underscore_separated instead of CamelCase
+
+TODO:
+
+ Add write capability.
+"""
+
+#__author__ = "Roberto De Almeida <rob at pydap.org>"
+
+
+__all__ = ['netcdf_file', 'netcdf_variable']
+
+import struct
+import itertools
+import mmap
+
+from numpy import ndarray, zeros, array
+
+
+ABSENT = '\x00' * 8
+ZERO = '\x00' * 4
+NC_BYTE = '\x00\x00\x00\x01'
+NC_CHAR = '\x00\x00\x00\x02'
+NC_SHORT = '\x00\x00\x00\x03'
+NC_INT = '\x00\x00\x00\x04'
+NC_FLOAT = '\x00\x00\x00\x05'
+NC_DOUBLE = '\x00\x00\x00\x06'
+NC_DIMENSION = '\x00\x00\x00\n'
+NC_VARIABLE = '\x00\x00\x00\x0b'
+NC_ATTRIBUTE = '\x00\x00\x00\x0c'
+
+
+class netcdf_file(object):
+ """A NetCDF file parser."""
+
+ def __init__(self, file):
+ self._buffer = open(file, 'rb')
+ self._parse()
+
+ def read(self, size=-1):
+ """Alias for reading the file buffer."""
+ return self._buffer.read(size)
+
+ def _parse(self):
+ """Initial parsing of the header."""
+ # Check magic bytes.
+ assert self.read(3) == 'CDF'
+
+ # Read version byte.
+ byte = self.read(1)
+ self.version_byte = struct.unpack('>b', byte)[0]
+
+ # Read header info.
+ self._numrecs()
+ self._dim_array()
+ self._gatt_array()
+ self._var_array()
+
+ def _numrecs(self):
+ """Read number of records."""
+ self._nrecs = self._unpack_int()
+
+ def _dim_array(self):
+ """Read a dict with dimensions names and sizes."""
+ assert self.read(4) in [ZERO, NC_DIMENSION]
+ count = self._unpack_int()
+
+ self.dimensions = {}
+ self._dims = []
+ for dim in range(count):
+ name = self._read_string()
+ length = self._unpack_int()
+ if length == 0: length = None # record dimension
+ self.dimensions[name] = length
+ self._dims.append(name) # preserve dim order
+
+ def _gatt_array(self):
+ """Read global attributes."""
+ self.attributes = self._att_array()
+
+ # Update __dict__ for compatibility with S.IO.N
+ self.__dict__.update(self.attributes)
+
+ def _att_array(self):
+ """Read a dict with attributes."""
+ assert self.read(4) in [ZERO, NC_ATTRIBUTE]
+ count = self._unpack_int()
+
+ # Read attributes.
+ attributes = {}
+ for attribute in range(count):
+ name = self._read_string()
+ nc_type = self._unpack_int()
+ n = self._unpack_int()
+
+ # Read value for attributes.
+ attributes[name] = self._read_values(n, nc_type)
+
+ return attributes
+
+ def _var_array(self):
+ """Read all variables."""
+ assert self.read(4) in [ZERO, NC_VARIABLE]
+
+ # Read size of each record, in bytes.
+ self._read_recsize()
+
+ # Read variables.
+ self.variables = {}
+ count = self._unpack_int()
+ for variable in range(count):
+ name = self._read_string()
+ self.variables[name] = self._read_var()
+
+ def _read_recsize(self):
+ """Read all variables and compute record bytes."""
+ pos = self._buffer.tell()
+
+ recsize = 0
+ count = self._unpack_int()
+ for variable in range(count):
+ name = self._read_string()
+ n = self._unpack_int()
+ isrec = False
+ for i in range(n):
+ dimid = self._unpack_int()
+ name = self._dims[dimid]
+ dim = self.dimensions[name]
+ if dim is None and i == 0:
+ isrec = True
+ attributes = self._att_array()
+ nc_type = self._unpack_int()
+ vsize = self._unpack_int()
+ begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
+
+ if isrec: recsize += vsize
+
+ self._recsize = recsize
+ self._buffer.seek(pos)
+
+ def _read_var(self):
+ dimensions = []
+ shape = []
+ n = self._unpack_int()
+ isrec = False
+ for i in range(n):
+ dimid = self._unpack_int()
+ name = self._dims[dimid]
+ dimensions.append(name)
+ dim = self.dimensions[name]
+ if dim is None and i == 0:
+ dim = self._nrecs
+ isrec = True
+ shape.append(dim)
+ dimensions = tuple(dimensions)
+ shape = tuple(shape)
+
+ attributes = self._att_array()
+ nc_type = self._unpack_int()
+ vsize = self._unpack_int()
+
+ # Read offset.
+ begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
+
+ return netcdf_variable(self._buffer.fileno(), nc_type, vsize, begin, shape, dimensions, attributes, isrec, self._recsize)
+
+ def _read_values(self, n, nc_type):
+ bytes = [1, 1, 2, 4, 4, 8]
+ typecodes = ['b', 'c', 'h', 'i', 'f', 'd']
+
+ count = n * bytes[nc_type-1]
+ values = self.read(count)
+ padding = self.read((4 - (count % 4)) % 4)
+
+ typecode = typecodes[nc_type-1]
+ if nc_type != 2: # not char
+ values = struct.unpack('>%s' % (typecode * n), values)
+ values = array(values, dtype=typecode)
+ else:
+ # Remove EOL terminator.
+ if values.endswith('\x00'): values = values[:-1]
+
+ return values
+
+ def _unpack_int(self):
+ return struct.unpack('>i', self.read(4))[0]
+ _unpack_int32 = _unpack_int
+
+ def _unpack_int64(self):
+ return struct.unpack('>q', self.read(8))[0]
+
+ def _read_string(self):
+ count = struct.unpack('>i', self.read(4))[0]
+ s = self.read(count)
+ # Remove EOL terminator.
+ if s.endswith('\x00'): s = s[:-1]
+ padding = self.read((4 - (count % 4)) % 4)
+ return s
+
+ def close(self):
+ self._buffer.close()
+
+
+class netcdf_variable(object):
+ def __init__(self, fileno, nc_type, vsize, begin, shape, dimensions, attributes, isrec=False, recsize=0):
+ self._nc_type = nc_type
+ self._vsize = vsize
+ self._begin = begin
+ self.shape = shape
+ self.dimensions = dimensions
+ self.attributes = attributes # for ``dap.plugins.netcdf``
+ self.__dict__.update(attributes)
+ self._is_record = isrec
+
+ # Number of bytes and type.
+ self._bytes = [1, 1, 2, 4, 4, 8][self._nc_type-1]
+ type_ = ['i', 'S', 'i', 'i', 'f', 'f'][self._nc_type-1]
+ dtype = '>%s%d' % (type_, self._bytes)
+ bytes = self._begin + self._vsize
+
+ if isrec:
+ # Record variables are not stored contiguosly on disk, so we
+ # need to create a separate array for each record.
+ self.__array_data__ = zeros(shape, dtype)
+ bytes += (shape[0] - 1) * recsize
+ for n in range(shape[0]):
+ offset = self._begin + (n * recsize)
+ mm = mmap.mmap(fileno, bytes, access=mmap.ACCESS_READ)
+ self.__array_data__[n] = ndarray.__new__(ndarray, shape[1:], dtype=dtype, buffer=mm, offset=offset, order=0)
+ else:
+ # Create buffer and data.
+ mm = mmap.mmap(fileno, bytes, access=mmap.ACCESS_READ)
+ self.__array_data__ = ndarray.__new__(ndarray, shape, dtype=dtype, buffer=mm, offset=self._begin, order=0)
+
+ # N-D array interface
+ self.__array_interface__ = {'shape' : shape,
+ 'typestr': dtype,
+ 'data' : self.__array_data__,
+ 'version': 3,
+ }
+
+ def __getitem__(self, index):
+ return self.__array_data__.__getitem__(index)
+
+ def getValue(self):
+ """For scalars."""
+ return self.__array_data__.item()
+
+ def typecode(self):
+ return ['b', 'c', 'h', 'i', 'f', 'd'][self._nc_type-1]
+
+
+def _test():
+ import doctest
+ doctest.testmod()
More information about the Scipy-svn
mailing list