#!/usr/bin/env python3 ############################################################################### # $Id: gdal2xyz.py 2be8649aadb3f869590306cd5a18a3387a650581 2021-04-23 12:08:46 +0300 Idan Miara $ # # Project: GDAL # Purpose: Script to translate GDAL supported raster into XYZ ASCII # point stream. # Author: Frank Warmerdam, warmerdam@pobox.com # ############################################################################### # Copyright (c) 2002, Frank Warmerdam # Copyright (c) 2020-2021, Idan Miara # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. ############################################################################### import sys import textwrap from argparse import RawDescriptionHelpFormatter from numbers import Number, Real from typing import Optional, Union, Sequence, Tuple import numpy as np from osgeo import gdal from osgeo_utils.auxiliary.base import PathLikeOrStr from osgeo_utils.auxiliary.progress import get_progress_callback, OptionalProgressCallback from osgeo_utils.auxiliary.util import PathOrDS, get_bands, open_ds from osgeo_utils.auxiliary.numpy_util import GDALTypeCodeAndNumericTypeCodeFromDataSet from osgeo_utils.auxiliary.gdal_argparse import GDALArgumentParser def gdal2xyz(srcfile: PathOrDS, dstfile: PathLikeOrStr = None, srcwin: Optional[Sequence[int]] = None, skip: Union[int, Sequence[int]] = 1, band_nums: Optional[Sequence[int]] = None, delim: str = ' ', skip_nodata: bool = False, src_nodata: Optional[Union[Sequence, Number]] = None, dst_nodata: Optional[Union[Sequence, Number]] = None, return_np_arrays: bool = False, pre_allocate_np_arrays: bool = True, progress_callback: OptionalProgressCallback = ...) -> Optional[Tuple]: """ translates a raster file (or dataset) into xyz format skip - how many rows/cols to skip each iteration srcwin (xoff, yoff, xsize, ysize) - Selects a subwindow from the source image for copying based on pixel/line location. band_nums - selected input bands to process, None to process all. delim - the delimiter to use between values in a line skip_nodata - Exclude the output lines with nodata value (as determined by srcnodata) src_nodata - The nodata value of the dataset (for skipping or replacing) default (`None`) - Use the dataset NoDataValue; `Sequence`/`Number` - use the given nodata value (per band or per dataset). dst_nodata - Replace source nodata with a given nodata. Has an effect only if not setting `-skipnodata` default(`None`) - use srcnodata, no replacement; `Sequence`/`Number` - replace the `srcnodata` with the given nodata value (per band or per dataset). srcfile - The source dataset filename or dataset object dstfile - The output dataset filename; for dstfile=None - if return_np_arrays=False then output will be printed to stdout return_np_arrays - return numpy arrays of the result, otherwise returns None pre_allocate_np_arrays - pre-allocated result arrays. Should be faster unless skip_nodata and the input is very sparse thus most data points will be skipped. progress_callback - progress callback function. use None for quiet or Ellipsis for using the default callback """ result = None progress_callback = get_progress_callback(progress_callback) # Open source file. ds = open_ds(srcfile, access_mode=gdal.GA_ReadOnly) if ds is None: raise Exception(f'Could not open {srcfile}.') bands = get_bands(ds, band_nums) band_count = len(bands) gt = ds.GetGeoTransform() # Collect information on all the source files. if srcwin is None: srcwin = (0, 0, ds.RasterXSize, ds.RasterYSize) dt, np_dt = GDALTypeCodeAndNumericTypeCodeFromDataSet(ds) # Open the output file. if dstfile is not None: dst_fh = open(dstfile, 'wt') elif return_np_arrays: dst_fh = None else: dst_fh = sys.stdout if dst_fh: if dt == gdal.GDT_Int32 or dt == gdal.GDT_UInt32: band_format = (("%d" + delim) * len(bands)).rstrip(delim) + '\n' else: band_format = (("%g" + delim) * len(bands)).rstrip(delim) + '\n' # Setup an appropriate print format. if abs(gt[0]) < 180 and abs(gt[3]) < 180 \ and abs(ds.RasterXSize * gt[1]) < 180 \ and abs(ds.RasterYSize * gt[5]) < 180: frmt = '%.10g' + delim + '%.10g' + delim + '%s' else: frmt = '%.3f' + delim + '%.3f' + delim + '%s' if isinstance(src_nodata, Number): src_nodata = [src_nodata] * band_count elif src_nodata is None: src_nodata = list(band.GetNoDataValue() for band in bands) if None in src_nodata: src_nodata = None if src_nodata is not None: src_nodata = np.asarray(src_nodata, dtype=np_dt) if isinstance(dst_nodata, Number): dst_nodata = [dst_nodata] * band_count if (dst_nodata is None) or (None in dst_nodata) or (src_nodata is None): dst_nodata = None if dst_nodata is not None: dst_nodata = np.asarray(dst_nodata, dtype=np_dt) skip_nodata = skip_nodata and (src_nodata is not None) replace_nodata = (not skip_nodata) and (dst_nodata is not None) process_nodata = skip_nodata or replace_nodata if isinstance(skip, Sequence): x_skip, y_skip = skip else: x_skip = y_skip = skip x_off, y_off, x_size, y_size = srcwin bands_count = len(bands) nXBlocks = (x_size - x_off) // x_skip nYBlocks = (y_size - y_off) // y_skip progress_end = nXBlocks * nYBlocks progress_curr = 0 progress_prev = -1 progress_parts = 100 if return_np_arrays: size = progress_end if pre_allocate_np_arrays else 0 all_geo_x = np.empty(size) all_geo_y = np.empty(size) all_data = np.empty((size, band_count), dtype=np_dt) # Loop emitting data. idx = 0 for y in range(y_off, y_off + y_size, y_skip): size = bands_count if pre_allocate_np_arrays else 0 data = np.empty((size, x_size), dtype=np_dt) # dims: (bands_count, x_size) for i_bnd, band in enumerate(bands): band_data = band.ReadAsArray(x_off, y, x_size, 1) # read one band line if pre_allocate_np_arrays: data[i_bnd] = band_data[0] else: data = np.append(data, band_data, axis=0) for x_i in range(0, x_size, x_skip): progress_curr += 1 if progress_callback: progress_frac = progress_curr / progress_end progress = int(progress_frac * progress_parts) if progress > progress_prev: progress_prev = progress progress_callback(progress_frac) x_i_data = data[:, x_i] # single pixel, dims: (bands) if process_nodata and np.array_equal(src_nodata, x_i_data): if skip_nodata: continue elif replace_nodata: x_i_data = dst_nodata x = x_i + x_off geo_x = gt[0] + (x + 0.5) * gt[1] + (y + 0.5) * gt[2] geo_y = gt[3] + (x + 0.5) * gt[4] + (y + 0.5) * gt[5] if dst_fh: band_str = band_format % tuple(x_i_data) line = frmt % (float(geo_x), float(geo_y), band_str) dst_fh.write(line) if return_np_arrays: if pre_allocate_np_arrays: all_geo_x[idx] = geo_x all_geo_y[idx] = geo_y all_data[idx] = x_i_data else: all_geo_x = np.append(all_geo_x, geo_x) all_geo_y = np.append(all_geo_y, geo_y) all_data = np.append(all_data, [x_i_data], axis=0) idx += 1 if return_np_arrays: nodata = None if skip_nodata else dst_nodata if replace_nodata else src_nodata if idx != progress_curr: all_geo_x = all_geo_x[:idx] all_geo_y = all_geo_y[:idx] all_data = all_data[:idx, :] result = all_geo_x, all_geo_y, all_data.transpose(), nodata return result def main(argv): parser = GDALArgumentParser( formatter_class=RawDescriptionHelpFormatter, description=textwrap.dedent('''\ The gdal2xyz utility can be used to translate a raster file into xyz format. It can be used as an alternative to gdal_translate of=xyz, But supporting other options, for example: * Select more then one band; * Skip or replace nodata value; * Return the output as numpy arrays.''')) parser.add_argument("-skip", dest="skip", action="store_true", default=1, help="How many rows/cols to skip in each iteration.") parser.add_argument("-srcwin", metavar=('xoff', 'yoff', 'xsize', 'ysize'), dest="srcwin", type=float, nargs=4, help="Selects a subwindow from the source image for copying based on pixel/line location") parser.add_argument("-b", "-band", "--band", dest="band_nums", metavar="band", type=int, nargs='+', help="Select bands from the input spectral bands for output. " "Bands are numbered from 1 in the order spectral bands are specified. " "Multiple -b switches may be used. When no -b switch is used, the first band will be used." "In order to use all input bands set -allbands or -b 0..") parser.add_argument("-allbands", "--allbands", dest="allbands", action="store_true", help="Select all input bands.") parser.add_argument("-csv", dest="delim", const=',', default=' ', action="store_const", help="Use comma instead of space as a delimiter.") parser.add_argument("-skipnodata", "--skipnodata", "-skip_nodata", dest="skip_nodata", action="store_true", help="Exclude the output lines with nodata value (as determined by srcnodata).") parser.add_argument("-srcnodata", '-nodatavalue', dest="src_nodata", type=Real, nargs='*', help="The nodata value of the dataset (for skipping or replacing) " "Default (None) - Use the dataset nodata value; " "Sequence/Number - Use the given nodata value (per band or per dataset).") parser.add_argument("-dstnodata", dest="dst_nodata", type=Real, nargs='*', help="Replace source nodata with a given nodata. " "Has an effect only if not setting -skipnodata. " "Default(None) - Use srcnodata, no replacement; " "Sequence/Number - Replace the srcnodata with the given nodata value " "(per band or per dataset).") parser.add_argument("srcfile", metavar="src_dataset", type=str, help="The source dataset name. It can be either file name, " "URL of data source or subdataset name for multi-dataset files.") parser.add_argument("dstfile", metavar="dst_dataset", type=str, help="The destination file name.") args = parser.parse_args(argv[1:]) if args.allbands: args.band_nums = None elif not args.band_nums: args.band_nums = 1 kwargs = vars(args) del kwargs["allbands"] try: return gdal2xyz(**kwargs) except IOError as e: print(e) return 1 if __name__ == '__main__': sys.exit(main(sys.argv))