memory error

questions anon questions.anon at gmail.com
Thu Sep 29 02:16:13 EDT 2011


Hello All,
I am still having trouble with memory errors when I try to process many
netcdf files.
Originally I would get the memory error as mentioned in the previous post
but when I added gc.collect() after each for loop I receive the error:
GEOS_ERROR: bad allocation
with no additional information!
The error use to occur at the point when a new netcdf file was to be opened
and plotted but with the things I have 'fixed' thanks to suggestions from
this list it seems to happen while processing the second file.
I am just trying to plot 3hourly data for each file and each file contains
hourly data for a month and I am trying to do this for many months.
It seems like I cannot close down the last file properly so the computer has
a clean memory to start the next one.
Any feedback will be greatly appreciated.
My latest version of the code:

######################

from netCDF4 import Dataset
import numpy as N
import matplotlib.pyplot as plt
from numpy import ma as MA
from mpl_toolkits.basemap import Basemap
from netcdftime import utime
from datetime import datetime
import os


shapefile1="E:/DSE_BushfireClimatologyProject/griddeddatasamples/test_GIS/DSE_REGIONS"
OutputFolder=r"E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/OutputsforValidation"

def plotrawdata(variable):
        if variable=='TSFC':
                ncvariablename='T_SFC'

MainFolder=r"E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/InputsforValidation/T_SFC/"
                ticks=[-5,0,5,10,15,20,25,30,35,40,45,50]
                Title='Surface Temperature'
                cmap=plt.cm.jet

        elif variable=='RHSFC':
                ncvariablename='RH_SFC'

MainFolder=r"E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/InputsforValidation/RH_SFC/"
                ticks=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
                Title='Surface RH'
                cmap=plt.cm.jet_r


fileforlatlon=Dataset("E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/InputsforValidation/T_SFC/TSFC_1974_01/IDZ00026_VIC_ADFD_T_SFC.nc",
'r+', 'NETCDF4')
        LAT=fileforlatlon.variables['latitude'][:]
        LON=fileforlatlon.variables['longitude'][:]

        startperiod=raw_input("Start slice (e.g. 1 ): ")
        endperiod=raw_input("End slice (e.g. 2): ")
        skipperiod=raw_input("skip slice (e.g. 1): ")
        if startperiod == "":
                startperiod = None
        else:
                startperiod = int(startperiod)
        if endperiod == "":
                endperiod = None
        else:
                endperiod = int(endperiod)
        if skipperiod == "":
                skipperiod = None
        else:
                skipperiod= int(skipperiod)

        for (path, dirs, files) in os.walk(MainFolder):
                        for dir in dirs:
                                print dir
                        path=path+'/'

                        for ncfile in files:
                                if ncfile[-3:]=='.nc':
                                    print "dealing with ncfiles:",
path+ncfile
                                    ncfile=os.path.join(path,ncfile)
                                    ncfile=Dataset(ncfile, 'r+', 'NETCDF4')
                                    #global TSFC

variable=ncfile.variables[ncvariablename][startperiod:endperiod:skipperiod]

TIME=ncfile.variables['time'][startperiod:endperiod:skipperiod]

fillvalue=ncfile.variables[ncvariablename]._FillValue
                                    ncfile.close()

                                    for variable, TIME in
zip((variable[:]),(TIME[:])):
                                    #for variable, TIME in
zip((variable[sliceperiod]),(TIME[sliceperiod])):

                                            cdftime=utime('seconds since
1970-01-01 00:00:00')

ncfiletime=cdftime.num2date(TIME)
                                            print ncfiletime
                                            timestr=str(ncfiletime)
                                            d = datetime.strptime(timestr,
'%Y-%m-%d %H:%M:%S')
                                            date_string =
d.strftime('%Y%m%d_%H%M')
                                            #Set up basemap using mercator
projection
http://matplotlib.sourceforge.net/basemap/doc/html/users/merc.html
                                            map =
Basemap(projection='merc',llcrnrlat=-40,urcrnrlat=-33,

llcrnrlon=139.0,urcrnrlon=151.0,lat_ts=0,resolution='i')
                                            x,y=map(*N.meshgrid(LON,LAT))

map.drawcoastlines(linewidth=0.5)
                                            map.readshapefile(shapefile1,
'DSE_REGIONS')
                                            map.drawstates()

                                            plt.title(Title+' %s
UTC'%ncfiletime)

                                            CS = map.contourf(x,y,variable,
ticks, cmap=cmap)
                                            l,b,w,h =0.1,0.1,0.8,0.8
                                            cax = plt.axes([l+w+0.025, b,
0.025, h], )
                                            cbar=plt.colorbar(CS, cax=cax,
drawedges=True)

                                            #save map as *.png and plot
netcdf file

plt.savefig((os.path.join(OutputFolder,
ncvariablename+date_string+'UTC.png')))
                                            #plt.show()
                                            plt.close()


######################



On Wed, Sep 14, 2011 at 4:08 PM, questions anon <questions.anon at gmail.com>wrote:

> Hello All,
> I keep coming across a memory error when processing many netcdf files. I
> assume it has something to do with how I loop things and maybe need to close
> things off properly.
> In the code below I am looping through a bunch of netcdf files (each file
> is hourly data for one month) and within each netcdf file I am outputting a
> *png file every three hours.
> This works for one netcdf file but when it begins to process the next
> netcdf file I receive this memory error:
>
> *Traceback (most recent call last):
>   File
> "d:/plot_netcdf_merc_multiplot_across_multifolders_mkdirs_memoryerror.py",
> line 44, in <module>
>     TSFC=ncfile.variables['T_SFC'][:]
>   File "netCDF4.pyx", line 2473, in netCDF4.Variable.__getitem__
> (netCDF4.c:23094)
> MemoryError*
>
> To reduce processing requirements I have tried making the LAT and LON to
> only use [0] but I also receive an error:
>
> *Traceback (most recent call last):
>   File
> "d:/plot_netcdf_merc_multiplot_across_multifolders_mkdirs_memoryerror.py",
> line 75, in <module>
>     x,y=map(*N.meshgrid(LON,LAT))
>   File "C:\Python27\lib\site-packages\numpy\lib\function_base.py", line
> 3256, in meshgrid
>     numRows, numCols = len(y), len(x)  # yes, reversed
> TypeError: len() of unsized object*
>
> finally I have added gc.collect() in a couple of places but that doesn't
> seem to do anything to help.
> I am using :*Python 2.7.2 |EPD 7.1-2 (32-bit)| (default, Jul  3 2011,
> 15:13:59) [MSC v.1500 32 bit (Intel)] on win32*
> Any feedback will be greatly appreciated!
>
>
> from netCDF4 import Dataset
> import numpy
> import numpy as N
> import matplotlib.pyplot as plt
> from numpy import ma as MA
> from mpl_toolkits.basemap import Basemap
> from netcdftime import utime
> from datetime import datetime
> import os
> import gc
>
> print "start processing...."
>
> inputpath=r'E:/GriddedData/Input/'
> outputpath=r'E:/GriddedData/Validation/'
> shapefile1="E:/test_GIS/DSE_REGIONS"
> for (path, dirs, files) in os.walk(inputpath):
>     for dir in dirs:
>         print dir
>         sourcepath=os.path.join(path,dir)
>         relativepath=os.path.relpath(sourcepath,inputpath)
>         newdir=os.path.join(outputpath,relativepath)
>         if not os.path.exists(newdir):
>             os.makedirs(newdir)
>
>     for ncfile in files:
>         if ncfile[-3:]=='.nc':
>             print "dealing with ncfiles:", ncfile
>             ncfile=os.path.join(sourcepath,ncfile)
>             #print ncfile
>             ncfile=Dataset(ncfile, 'r+', 'NETCDF4')
>             TSFC=ncfile.variables['T_SFC'][:,:,:]
>             TIME=ncfile.variables['time'][:]
>             LAT=ncfile.variables['latitude'][:]
>             LON=ncfile.variables['longitude'][:]
>             fillvalue=ncfile.variables['T_SFC']._FillValue
>             TSFC=MA.masked_values(TSFC, fillvalue)
>             ncfile.close()
>             gc.collect()
>             print "garbage collected"
>
>
>             for TSFC, TIME in zip((TSFC[1::3]),(TIME[1::3])):
>                 print TSFC, TIME
>             #convert time from numbers to date and prepare it to have no
> symbols for saving to filename
>                 cdftime=utime('seconds since 1970-01-01 00:00:00')
>                 ncfiletime=cdftime.num2date(TIME)
>                 print ncfiletime
>                 timestr=str(ncfiletime)
>                 d = datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S')
>                 date_string = d.strftime('%Y%m%d_%H%M')
>
>                 #Set up basemap using mercator projection
> http://matplotlib.sourceforge.net/basemap/doc/html/users/merc.html
>                 map =
> Basemap(projection='merc',llcrnrlat=-40,urcrnrlat=-33,
>
> llcrnrlon=139.0,urcrnrlon=151.0,lat_ts=0,resolution='i')
>
>             # compute map projection coordinates for lat/lon grid.
>                 x,y=map(*N.meshgrid(LON,LAT))
>                 map.drawcoastlines(linewidth=0.5)
>                 map.readshapefile(shapefile1, 'DSE_REGIONS')
>                 map.drawstates()
>
>                 plt.title('Surface temperature at %s UTC'%ncfiletime)
>                 ticks=[-5,0,5,10,15,20,25,30,35,40,45,50]
>                 CS = map.contourf(x,y,TSFC, ticks, cmap=plt.cm.jet)
>                 l,b,w,h =0.1,0.1,0.8,0.8
>                 cax = plt.axes([l+w+0.025, b, 0.025, h], )
>                 cbar=plt.colorbar(CS, cax=cax, drawedges=True)
>
>             #save map as *.png and plot netcdf file
>
> plt.savefig((os.path.join(newdir,'TSFC'+date_string+'UTC.png')))
>                 plt.close()
>                 gc.collect()
>                 print "garbage collected again"
> print "end of processing"
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20110929/98b1acc3/attachment-0001.html>


More information about the Python-list mailing list