Convert timeseries GDAL raster format (vrt) to NetCDF
milad.khakpour at gmail.com
milad.khakpour at gmail.com
Thu Jan 14 11:32:50 EST 2016
I tried to package a timeseries for vrt file to NetCDF. The problem is, it converts layer by layer and save it to NetCDF which it took much time (also with memory allocation).
I wanted to do is to save it chunk by chunk which I do not have any idea how to do it !
Does someone have any solution for that !?
it take for 1Gb (25 images) around 90 second !!
here is my code:
def tiff_to_netcdf(imagesPath, output, chunksize=None):
img_txt = open('img_path.txt', 'w')
for paths, subdirs, files in os.walk(imagesPath):
for name in files:
if name.endswith('.tif'):
img_txt.write(os.path.join(paths, name) + '\n')
img_txt.close()
# Build vrt files
os.system('gdalbuildvrt -separate -input_file_list img_path.txt images.vrt')
ds = gdal.Open('images.vrt')
band1 = ds.GetRasterBand(1)
a = band1.ReadAsArray()
nlat, nlon = np.shape(a)
b = ds.GetGeoTransform() # bbox, interval
lon = np.arange(nlon)*b[1]+b[0]
lat = np.arange(nlat)*b[5]+b[3]
basedate = dt.datetime(2006, 01, 11, 0, 0, 0)
# Create NetCDF file
# Clobber- Overwrite any existing file with the same name
nco = netCDF4.Dataset(output+'.nc', 'w', clobber=True)
# Create dimensions, variables and attributes:
nco.createDimension('lon', nlon)
nco.createDimension('lat', nlat)
nco.createDimension('time', None)
timeo = nco.createVariable('time', 'f4', ('time',))
timeo.units = 'days since 2006-01-11 00:00:00'
timeo.standard_name = 'time'
lono = nco.createVariable('lon', 'f4', ('lon',))
lono.standard_name = 'longitude'
lato = nco.createVariable('lat', 'f4', ('lat',))
lato.standard_name = 'latitude'
# Create container variable for CRS: lon/lat WGS84 datum
crso = nco.createVariable('crs', 'i4')
crso.long_name = 'Lon/Lat Coords in WGS84'
crso.grid_mapping_name = 'latitude_longitude'
crso.longitude_of_prime_meridian = 0.0
crso.semi_major_axis = 6378137.0
crso.inverse_flattening = 298.257223563
# Create short integer variable with chunking
tmno = nco.createVariable('tmn', 'i2', ('time', 'lat', 'lon'),
zlib=True, chunksizes=chunksize, fill_value=-9999)
tmno.scale_factor = 0.01
tmno.add_offset = 0.00
tmno.grid_mapping = 'crs'
tmno.set_auto_maskandscale(False)
nco.Conventions = 'CF-1.6'
# Write lon,lat
lono[:] = lon
lato[:] = lat
pat = re.compile(r'^(\w{1})(\d{4})(\d{2})(\d{2})_\d{6}___SIG0.*.tif$')
itime = 0
written = 0
data = np.empty((3, 8000, 8000), dtype=np.float)
# Step through data, writing time and data to NetCDF
for root, dirs, files in os.walk(imagesPath):
dirs.sort()
files.sort()
for f in files:
match = re.match(pat, f)
if match:
if itime % 3 == 0 and itime != 0:
tmno[written:itime, :, :] = data
written = itime
# read the time values by parsing the filename
year = int(match.group(2))
mon = int(match.group(3))
day = int(match.group(4))
date = dt.datetime(year, mon, day, 0, 0, 0)
print(date)
dtime = (date-basedate).total_seconds()/86400.
timeo[itime] = dtime
tmn_path = os.path.join(root, f)
print(tmn_path)
tmn = gdal.Open(tmn_path)
a = tmn.ReadAsArray() # data
data[itime%3, :, :] = a
itime = itime+1
nco.close()
More information about the Python-list
mailing list