Convert timeseries GDAL raster format (vrt) to NetCDF

milad.khakpour at gmail.com milad.khakpour at gmail.com
Thu Jan 14 11:32:50 EST 2016


I tried to package a timeseries for vrt file to NetCDF. The problem is, it converts layer by layer and save it to NetCDF which it took much time (also with memory allocation).
I wanted to do is to save it chunk by chunk which I do not have any idea how to do it !

Does someone have any solution for that !?

it take for 1Gb (25 images) around 90 second !!

here is my code:

    def tiff_to_netcdf(imagesPath, output, chunksize=None):
    
        img_txt = open('img_path.txt', 'w')
        for paths, subdirs, files in os.walk(imagesPath):
            for name in files:
                if name.endswith('.tif'):
                    img_txt.write(os.path.join(paths, name) + '\n')
        img_txt.close()
    
        # Build vrt files
        os.system('gdalbuildvrt -separate -input_file_list img_path.txt images.vrt')
    
        ds = gdal.Open('images.vrt')
        band1 = ds.GetRasterBand(1)
        a = band1.ReadAsArray()
    
        nlat, nlon = np.shape(a)
    
        b = ds.GetGeoTransform()  # bbox, interval
        lon = np.arange(nlon)*b[1]+b[0]
        lat = np.arange(nlat)*b[5]+b[3]
    
        basedate = dt.datetime(2006, 01, 11, 0, 0, 0)
    
        # Create NetCDF file
        # Clobber- Overwrite any existing file with the same name
        nco = netCDF4.Dataset(output+'.nc', 'w', clobber=True)
    
        # Create dimensions, variables and attributes:
        nco.createDimension('lon', nlon)
        nco.createDimension('lat', nlat)
        nco.createDimension('time', None)
    
        timeo = nco.createVariable('time', 'f4', ('time',))
        timeo.units = 'days since 2006-01-11 00:00:00'
        timeo.standard_name = 'time'
    
        lono = nco.createVariable('lon', 'f4', ('lon',))
        lono.standard_name = 'longitude'
    
        lato = nco.createVariable('lat', 'f4', ('lat',))
        lato.standard_name = 'latitude'
    
        # Create container variable for CRS: lon/lat WGS84 datum
        crso = nco.createVariable('crs', 'i4')
        crso.long_name = 'Lon/Lat Coords in WGS84'
        crso.grid_mapping_name = 'latitude_longitude'
        crso.longitude_of_prime_meridian = 0.0
        crso.semi_major_axis = 6378137.0
        crso.inverse_flattening = 298.257223563
    
        # Create short integer variable with chunking
        tmno = nco.createVariable('tmn', 'i2',  ('time', 'lat', 'lon'),
                                  zlib=True, chunksizes=chunksize, fill_value=-9999)
        tmno.scale_factor = 0.01
        tmno.add_offset = 0.00
        tmno.grid_mapping = 'crs'
        tmno.set_auto_maskandscale(False)
    
        nco.Conventions = 'CF-1.6'
    
        # Write lon,lat
        lono[:] = lon
        lato[:] = lat
    
        pat = re.compile(r'^(\w{1})(\d{4})(\d{2})(\d{2})_\d{6}___SIG0.*.tif$')
        itime = 0
        written = 0
        data = np.empty((3, 8000, 8000), dtype=np.float)
    
        # Step through data, writing time and data to NetCDF
        for root, dirs, files in os.walk(imagesPath):
            dirs.sort()
            files.sort()
            for f in files:
    
                match = re.match(pat, f)
                if match:
    
                    if itime  % 3 == 0 and itime != 0:
                        tmno[written:itime, :, :] = data
                        written = itime
                    # read the time values by parsing the filename
                    year = int(match.group(2))
                    mon = int(match.group(3))
                    day = int(match.group(4))
                    date = dt.datetime(year, mon, day, 0, 0, 0)
                    print(date)
                    dtime = (date-basedate).total_seconds()/86400.
                    timeo[itime] = dtime
                    tmn_path = os.path.join(root, f)
                    print(tmn_path)
                    tmn = gdal.Open(tmn_path)
                    a = tmn.ReadAsArray()  # data
                    data[itime%3, :, :] = a
                    itime = itime+1
    
    
        nco.close()



More information about the Python-list mailing list