diff --git a/.circleci/config.yml b/.circleci/config.yml index a3aded6..98bb5bd 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -23,7 +23,7 @@ jobs: pwd mkdir config build install . /opt/conda/bin/activate root - conda install --yes cython gdal h5py libgdal pytest numpy fftw scipy basemap scons opencv hdf4 hdf5 netcdf4 libgcc libstdcxx-ng cmake + conda install --yes cython gdal h5py libgdal pytest numpy fftw scipy basemap scons opencv hdf4 hdf5 netcdf4 libgcc libstdcxx-ng cmake astropy yum install -y uuid-devel x11-devel motif-devel jq gcc-gfortran ln -s /opt/conda/bin/cython /opt/conda/bin/cython3 cd /opt/conda/lib diff --git a/README.md b/README.md index b8984d9..412a0ac 100644 --- a/README.md +++ b/README.md @@ -623,7 +623,23 @@ between three files as follows: 20061231 ``` - +### rtcApp.xml +The inputs are Sentinel GRD zipfiles +```xml + + /Users/data/sentinel1 + + 20 + sentinel1 + + $dir$/rtcApp/data/S1A_IW_GRDH_1SDV_20181221T225104_20181221T225129_025130_02C664_B46C.zip + $dir$/orbits + $dir$/rtcApp/output + [VV, VH] + + + +``` ----- ## Component Configurability diff --git a/applications/imageMath.py b/applications/imageMath.py index 3977a89..432e262 100755 --- a/applications/imageMath.py +++ b/applications/imageMath.py @@ -293,6 +293,7 @@ def main(args, files): #######Determine number of input and output bands bandList = [] + iMath['equations'] = [] for ii,expr in enumerate(args.equation.split(';')): #####Now parse the equation to get the file names used @@ -319,7 +320,11 @@ def main(args, files): ######Create input memmaps for ii,infile in enumerate(fileList): - fstr, files = parseInputFile(infile, files) + if type(files) == list: + fstr, files = parseInputFile(infile, files) + else: + fstr = getattr(files, infile) + logger.debug('Input string for File %d: %s: %s'%(ii, infile, fstr)) if len(fstr.split(';')) > 1: @@ -341,8 +346,9 @@ def main(args, files): if bbox is not None: iMath['bboxes'].append(bbox) - if len(files): - raise IOError('Unused input variables set:\n'+ ' '.join(files)) + if type(files) == list: + if len(files): + raise IOError('Unused input variables set:\n'+ ' '.join(files)) #######Some debugging logger.debug('List of available bands: ' + str(iMath['inBands'].keys())) diff --git a/applications/rtcApp.py b/applications/rtcApp.py index ab45a7e..836151e 100755 --- a/applications/rtcApp.py +++ b/applications/rtcApp.py @@ -155,7 +155,7 @@ NUMBER_RANGE_LOOKS = Application.Parameter('numberRangeLooks', ) POSTING = Application.Parameter('posting', - public_name='azimuth looks', + public_name='posting', default = 20.0, type = float, mandatory = False, @@ -363,6 +363,7 @@ class GRDSAR(Application): self.verifyDEM = RtcProc.createVerifyDEM(self) self.multilook = RtcProc.createLooks(self) self.runTopo = RtcProc.createTopo(self) + self.runNormalize = RtcProc.createNormalize(self) # self.runGeocode = RtcProc.createGeocode(self) return None @@ -392,6 +393,9 @@ class GRDSAR(Application): ##Run topo for each bursts self.step('topo', func=self.runTopo) + ##Run normalize to get gamma0 + self.step('normalize', func=self.runNormalize) + # Geocode # self.step('geocode', func=self.runGeocode, # args=(self.geocode_list, self.do_unwrap, self.geocode_bbox)) @@ -416,6 +420,9 @@ class GRDSAR(Application): ##Run topo for each burst self.runTopo() + + ##Run normalize to get gamma0 + self.runNormalize() ###Compute covariance # self.runEstimateCovariance() diff --git a/applications/stripmapApp.py b/applications/stripmapApp.py index fbbeed5..938cafb 100755 --- a/applications/stripmapApp.py +++ b/applications/stripmapApp.py @@ -242,14 +242,20 @@ FILTER_STRENGTH = Application.Parameter('filterStrength', mandatory=False, doc='') - -DO_RUBBERSHEETING = Application.Parameter('doRubbersheeting', - public_name='do rubbersheeting', +############################################## Modified by V.Brancato 10.07.2019 +DO_RUBBERSHEETINGAZIMUTH = Application.Parameter('doRubbersheetingAzimuth', + public_name='do rubbersheetingAzimuth', default=False, type=bool, mandatory=False, doc='') - +DO_RUBBERSHEETINGRANGE = Application.Parameter('doRubbersheetingRange', + public_name='do rubbersheetingRange', + default=False, + type=bool, + mandatory=False, + doc='') +################################################################################# RUBBERSHEET_SNR_THRESHOLD = Application.Parameter('rubberSheetSNRThreshold', public_name='rubber sheet SNR Threshold', default = 5.0, @@ -533,7 +539,8 @@ class _RoiBase(Application, FrameMixin): GEOCODE_BOX, REGION_OF_INTEREST, HEIGHT_RANGE, - DO_RUBBERSHEETING, + DO_RUBBERSHEETINGRANGE, #Modified by V. Brancato 10.07.2019 + DO_RUBBERSHEETINGAZIMUTH, #Modified by V. Brancato 10.07.2019 RUBBERSHEET_SNR_THRESHOLD, RUBBERSHEET_FILTER_SIZE, DO_DENSEOFFSETS, @@ -724,7 +731,8 @@ class _RoiBase(Application, FrameMixin): self.runResampleSlc = StripmapProc.createResampleSlc(self) self.runRefineSlaveTiming = StripmapProc.createRefineSlaveTiming(self) self.runDenseOffsets = StripmapProc.createDenseOffsets(self) - self.runRubbersheet = StripmapProc.createRubbersheet(self) + self.runRubbersheetRange = StripmapProc.createRubbersheetRange(self) #Modified by V. Brancato 10.07.2019 + self.runRubbersheetAzimuth =StripmapProc.createRubbersheetAzimuth(self) #Modified by V. Brancato 10.07.2019 self.runResampleSubbandSlc = StripmapProc.createResampleSubbandSlc(self) self.runInterferogram = StripmapProc.createInterferogram(self) self.runFilter = StripmapProc.createFilter(self) @@ -774,8 +782,11 @@ class _RoiBase(Application, FrameMixin): args=('refined',)) self.step('dense_offsets', func=self.runDenseOffsets) - - self.step('rubber_sheet', func=self.runRubbersheet) +######################################################################## Modified by V. Brancato 10.07.2019 + self.step('rubber_sheet_range', func=self.runRubbersheetRange) + + self.step('rubber_sheet_azimuth',func=self.runRubbersheetAzimuth) +######################################################################### self.step('fine_resample', func=self.runResampleSlc, args=('fine',)) @@ -852,10 +863,14 @@ class _RoiBase(Application, FrameMixin): # run dense offsets self.runDenseOffsets() - - # adding the azimuth offsets computed from cross correlation to geometry offsets - self.runRubbersheet() - + +############ Modified by V. Brancato 10.07.2019 + # adding the azimuth offsets computed from cross correlation to geometry offsets + self.runRubbersheetAzimuth() + + # adding the range offsets computed from cross correlation to geometry offsets + self.runRubbersheetRange() +#################################################################################### # resampling using rubbersheeted offsets # which include geometry + constant range + constant azimuth # + dense azimuth offsets diff --git a/components/isceobj/Orbit/Orbit.py b/components/isceobj/Orbit/Orbit.py index 548ade6..e3cdc1f 100644 --- a/components/isceobj/Orbit/Orbit.py +++ b/components/isceobj/Orbit/Orbit.py @@ -1061,7 +1061,7 @@ class Orbit(Component): ###This wont break the old interface but could cause ###issues at midnight crossing if reference is None: - reference = self.minTime() + reference = self.minTime refEpoch = reference.replace(hour=0, minute=0, second=0, microsecond=0) diff --git a/components/isceobj/RtcProc/Factories.py b/components/isceobj/RtcProc/Factories.py index c779864..038f1f7 100644 --- a/components/isceobj/RtcProc/Factories.py +++ b/components/isceobj/RtcProc/Factories.py @@ -46,5 +46,6 @@ createPreprocessor = _factory("runPreprocessor") createVerifyDEM = _factory("runVerifyDEM") createLooks = _factory("runLooks") createTopo = _factory("runTopo") +createNormalize = _factory("runNormalize") #createGeocode = _factory("runGeocode") diff --git a/components/isceobj/RtcProc/RtcProc.py b/components/isceobj/RtcProc/RtcProc.py index f529106..aac8b9a 100644 --- a/components/isceobj/RtcProc/RtcProc.py +++ b/components/isceobj/RtcProc/RtcProc.py @@ -69,7 +69,7 @@ INC_FILENAME = Component.Parameter( GAMMA0_FILENAME = Component.Parameter( 'gamma0FileName', public_name='Gamma0 backscatter file', - default = 'gamma0.rdr', + default = 'gamma0.img', type = str, mandatory = False, doc = 'Unmasked gamma0 backscatter file') diff --git a/components/isceobj/RtcProc/runNormalize.py b/components/isceobj/RtcProc/runNormalize.py index ce1e6c9..70ce9cb 100644 --- a/components/isceobj/RtcProc/runNormalize.py +++ b/components/isceobj/RtcProc/runNormalize.py @@ -1,4 +1,4 @@ -# +#!/usr/bin/env python3 # Author: Piyush Agram # Copyright 2016 # @@ -6,19 +6,23 @@ import logging import isceobj import mroipac +from .runTopo import filenameWithLooks +from .runLooks import takeLooks import os +import itertools import numpy as np from isceobj.Util.decorators import use_api +from applications import imageMath logger = logging.getLogger('isce.grdsar.looks') - +class Dummy: + pass def runNormalize(self): ''' Make sure that a DEM is available for processing the given data. ''' - refPol = self._grd.polarizations[0] master = self._grd.loadProduct( os.path.join(self._grd.outputFolder, 'beta_{0}.xml'.format(refPol))) @@ -26,17 +30,31 @@ def runNormalize(self): azlooks, rglooks = self._grd.getLooks( self.posting, master.groundRangePixelSize, master.azimuthPixelSize, self.numberAzimuthLooks, self.numberRangeLooks) - if (azlooks == 1) and (rglooks == 1): - return - - slantRange = False for pol in self._grd.polarizations: - inname = os.path.join( self._grd.outputFolder, 'beta_{0}.img'.format(pol) ) - takeLooks(inname, azlooks, rglooks) + if (azlooks == 1) and (rglooks == 1): + inname = os.path.join( self._grd.outputFolder, 'beta_{0}.img'.format(pol)) + else: + inname = os.path.join( self._grd.outputFolder, filenameWithLooks('beta_{0}.img'.format(pol), azlooks, rglooks)) - if not slantRange: - inname = master.slantRangeImage.filename - takeLooks(inname, azlooks, rglooks) - slantRange = True + basefolder, output = os.path.split(self._grd.outputFolder) + incname = os.path.join(basefolder, self._grd.geometryFolder, self._grd.incFileName) + outname = os.path.join(self._grd.outputFolder, filenameWithLooks('gamma_{0}'.format(pol)+'.img', azlooks, rglooks)) + maskname = os.path.join(basefolder, self._grd.geometryFolder, self._grd.slMaskFileName) + + args = imageMath.createNamespace() + args.equation = 'a*cos(b_0*PI/180.)/cos(b_1*PI/180.) * (c==0)' + args.dtype = np.float32 + args.scheme = 'BIL' + args.out = outname + #args.debug = True + + files = Dummy() + files.a = inname + files.b = incname + files.c = maskname + + + + imageMath.main(args, files) return diff --git a/components/isceobj/Sensor/GRD/Sentinel1.py b/components/isceobj/Sensor/GRD/Sentinel1.py index 45e648b..b910885 100755 --- a/components/isceobj/Sensor/GRD/Sentinel1.py +++ b/components/isceobj/Sensor/GRD/Sentinel1.py @@ -261,10 +261,10 @@ class Sentinel1(Component): self.validateUserInputs() - if self.xml.startswith('/vsizip'): #Read from zip file + if '.zip' in self.xml: try: parts = self.xml.split(os.path.sep) - zipname = os.path.join(*(parts[2:-3])) + zipname = os.path.join('/',*(parts[:-3])) fname = os.path.join(*(parts[-3:])) with zipfile.ZipFile(zipname, 'r') as zf: @@ -283,23 +283,22 @@ class Sentinel1(Component): self.populateMetadata() self.populateBbox() - ####Tru and locate an orbit file + ####Try and locate an orbit file if self.orbitFile is None: if self.orbitDir is not None: self.orbitFile = self.findOrbitFile() + print('Found this orbitfile: %s' %self.orbitFile) - ####Read in the orbits - if self.orbitFile: + if '_POEORB_' in self.orbitFile: orb = self.extractPreciseOrbit() - else: + elif '_RESORB_' in self.orbitFile: orb = self.extractOrbit() - + self.product.orbit.setOrbitSource('Header') for sv in orb: self.product.orbit.addStateVector(sv) - self.populateIPFVersion() self.extractBetaLUT() self.extractNoiseLUT() @@ -423,10 +422,11 @@ class Sentinel1(Component): nsp = "{http://www.esa.int/safe/sentinel-1.0}" - if self.manifest.startswith('/vsizip'): + if '.zip' in self.manifest: + import zipfile parts = self.manifest.split(os.path.sep) - zipname = os.path.join(*(parts[2:-2])) + zipname = os.path.join('/',*(parts[:-2])) fname = os.path.join(*(parts[-2:])) try: @@ -462,38 +462,40 @@ class Sentinel1(Component): datefmt = "%Y%m%dT%H%M%S" types = ['POEORB', 'RESORB'] + filelist = [] match = [] - timeStamp = self.product.sensingMid - + timeStamp = self.product.sensingStart+(self.product.sensingStop - self.product.sensingStart)/2. + for orbType in types: files = glob.glob( os.path.join(self.orbitDir, 'S1A_OPER_AUX_' + orbType + '_OPOD*')) - + filelist.extend(files) ###List all orbit files - for result in files: - fields = result.split('_') - taft = datetime.datetime.strptime(fields[-1][0:15], datefmt) - tbef = datetime.datetime.strptime(fields[-2][1:16], datefmt) - - #####Get all files that span the acquisition - if (tbef <= timeStamp) and (taft >= timeStamp): - tmid = tbef + 0.5 * (taft - tbef) - match.append((result, abs((timeStamp-tmid).total_seconds()))) - #####Return the file with the image is aligned best to the middle of the file - if len(match) != 0: - bestmatch = min(match, key = lambda x: x[1]) - return bestmatch[0] + for result in filelist: + fields = result.split('_') + taft = datetime.datetime.strptime(fields[-1][0:15], datefmt) + tbef = datetime.datetime.strptime(fields[-2][1:16], datefmt) + print(taft, tbef) + + #####Get all files that span the acquisition + if (tbef <= timeStamp) and (taft >= timeStamp): + tmid = tbef + 0.5 * (taft - tbef) + match.append((result, abs((timeStamp-tmid).total_seconds()))) + #####Return the file with the image is aligned best to the middle of the file + if len(match) != 0: + bestmatch = min(match, key = lambda x: x[1]) + return bestmatch[0] - if len(match) == 0: - raise Exception('No suitable orbit file found. If you want to process anyway - unset the orbitdir parameter') + if len(match) == 0: + raise Exception('No suitable orbit file found. If you want to process anyway - unset the orbitdir parameter') def extractOrbit(self): ''' Extract orbit information from xml node. ''' node = self._xml_root.find('generalAnnotation/orbitList') - + print('Extracting orbit from annotation XML file') frameOrbit = Orbit() frameOrbit.configure() @@ -516,13 +518,7 @@ class Sentinel1(Component): vec.setVelocity(vel) frameOrbit.addStateVector(vec) - - orbExt = OrbitExtender(planet=Planet(pname='Earth')) - orbExt.configure() - newOrb = orbExt.extendOrbit(frameOrbit) - - - return newOrb + return frameOrbit def extractPreciseOrbit(self): ''' @@ -534,11 +530,10 @@ class Sentinel1(Component): print("IOError: %s" % strerr) return - _xml_root = ElementTree(file=fp).getroot() - + _xml_root = ElementTree.ElementTree(file=fp).getroot() + node = _xml_root.find('Data_Block/List_of_OSVs') - print('Extracting orbit from Orbit File: ', self.orbitFile) orb = Orbit() orb.configure() @@ -582,10 +577,10 @@ class Sentinel1(Component): if self.calibrationXml is None: raise Exception('No calibration file provided') - if self.calibrationXml.startswith('/vsizip'): + if '.zip' in self.calibrationXml: import zipfile parts = self.calibrationXml.split(os.path.sep) - zipname = os.path.join(*(parts[2:-4])) + zipname = os.path.join('/',*(parts[:-4])) fname = os.path.join(*(parts[-4:])) try: @@ -723,7 +718,7 @@ class Sentinel1(Component): print('Extracting normalized image ....') - src = gdal.Open(self.tiff.strip(), gdal.GA_ReadOnly) + src = gdal.Open('/vsizip//'+self.tiff.strip(), gdal.GA_ReadOnly) band = src.GetRasterBand(1) if self.product.numberOfSamples != src.RasterXSize: diff --git a/components/isceobj/Sensor/src/ALOS_pre_process/readOrbitPulse.f b/components/isceobj/Sensor/src/ALOS_pre_process/readOrbitPulse.f index 7e1114d..8743e98 100644 --- a/components/isceobj/Sensor/src/ALOS_pre_process/readOrbitPulse.f +++ b/components/isceobj/Sensor/src/ALOS_pre_process/readOrbitPulse.f @@ -21,6 +21,7 @@ c get alos position and times integer*1 indata(32768) integer statb(13),stat integer numdata,rowPos,colPos,eof + integer*4 unpackBytes c read the leader file descriptor record !!!!!!!!!!!!!!!!!! @@ -106,12 +107,9 @@ c read in the raw data file line by line do i=1,nlines ! jng ierr=ioread(ichandata,indata,len) call getLineSequential(rawAccessor,indata,eof) - iyear=iand(indata(40),255)*256*256*256+iand(indata(39),255)*256*256+ - $ iand(indata(38),255)*256+iand(indata(37),255) - idoy=iand(indata(44),255)*256*256*256+iand(indata(43),255)*256*256+ - $ iand(indata(42),255)*256+iand(indata(41),255) - ims=iand(indata(48),255)*256*256*256+iand(indata(47),255)*256*256+ - $ iand(indata(46),255)*256+iand(indata(45),255) + iyear = unpackBytes(indata(40), indata(39), indata(38), indata(37)) + idoy = unpackBytes(indata(44), indata(43), indata(42), indata(41)) + ims = unpackBytes(indata(48), indata(47), indata(46), indata(45)) ddate(2) = ims*1000.0 !we save days in the year and microsec in the day ddate(1) = 1.*idoy call setLineSequential(auxAccessor,ddate) @@ -144,3 +142,9 @@ c print *,val return end + + integer*4 function unpackBytes(i1, i2, i3, i4) + integer*4 i1, i2, i3, i4 + unpackBytes = iand(i1, 255)*256*256*256 + iand(i2, 255)*256*256 + + $ iand(i3, 255)*256 + iand(i4, 255) + end function diff --git a/components/isceobj/StripmapProc/Factories.py b/components/isceobj/StripmapProc/Factories.py index 7727f48..27497d8 100644 --- a/components/isceobj/StripmapProc/Factories.py +++ b/components/isceobj/StripmapProc/Factories.py @@ -112,7 +112,8 @@ createResampleSlc = _factory("runResampleSlc") createResampleSubbandSlc = _factory("runResampleSubbandSlc") createRefineSlaveTiming = _factory("runRefineSlaveTiming") createDenseOffsets = _factory("runDenseOffsets") -createRubbersheet = _factory("runRubbersheet") +createRubbersheetAzimuth = _factory("runRubbersheetAzimuth") # Modified by V. Brancato (10.07.2019) +createRubbersheetRange = _factory("runRubbersheetRange") # Modified by V. Brancato (10.07.2019) createInterferogram = _factory("runInterferogram") createCoherence = _factory("runCoherence") createFilter = _factory("runFilter") diff --git a/components/isceobj/StripmapProc/SConscript b/components/isceobj/StripmapProc/SConscript index 2926124..89adcb2 100644 --- a/components/isceobj/StripmapProc/SConscript +++ b/components/isceobj/StripmapProc/SConscript @@ -49,7 +49,7 @@ listFiles = ['StripmapProc.py', 'runPreprocessor.py', 'runSplitSpectrum.py', 'Factories.py' , 'runDenseOffsets.py', 'runResampleSlc.py' , 'runUnwrapGrass.py', '__init__.py' , 'runDispersive.py' , 'runResampleSubbandSlc.py', 'runUnwrapIcu.py', 'runFilter.py' , 'runROI.py' , 'runUnwrapSnaphu.py', 'runCrop.py', - 'runGeo2rdr.py', 'runRubbersheet.py', '__StripmapProc.py' , 'runInterferogram.py', + 'runGeo2rdr.py', 'runRubbersheetRange.py', 'runRubbersheetAzimuth.py', '__StripmapProc.py' , 'runInterferogram.py', 'runVerifyDEM.py', 'runGeocode.py', 'Sensor.py' ] diff --git a/components/isceobj/StripmapProc/StripmapProc.py b/components/isceobj/StripmapProc/StripmapProc.py index ea9df09..c0f5344 100755 --- a/components/isceobj/StripmapProc/StripmapProc.py +++ b/components/isceobj/StripmapProc/StripmapProc.py @@ -325,14 +325,21 @@ AZIMUTH_OFFSET_FILENAME = Component.Parameter('azimuthOffsetFilename', doc='') - +# Modified by V. Brancato 10.07.2019 AZIMUTH_RUBBERSHEET_FILENAME = Component.Parameter('azimuthRubbersheetFilename', public_name='azimuth Rubbersheet Image Name', default = 'azimuth_sheet.off', type=str, mandatory=False, doc='') - + +RANGE_RUBBERSHEET_FILENAME = Component.Parameter('rangeRubbersheetFilename', + public_name='range Rubbersheet Image Name', + default = 'range_sheet.off', + type=str, + mandatory=False, + doc='') +# End of modification MISREG_FILENAME = Component.Parameter('misregFilename', public_name='misreg file name', default='misreg', @@ -346,14 +353,21 @@ DENSE_OFFSET_FILENAME = Component.Parameter('denseOffsetFilename', type=str, mandatory=False, doc='file name of dense offsets computed from cross correlating two SLC images') - +# Modified by V. Brancato 10.07.2019 FILT_AZIMUTH_OFFSET_FILENAME = Component.Parameter('filtAzimuthOffsetFilename', public_name='filtered azimuth offset filename', default='filtAzimuth.off', type=str, mandatory=False, doc='Filtered azimuth dense offsets') - + +FILT_RANGE_OFFSET_FILENAME = Component.Parameter('filtRangeOffsetFilename', + public_name='filtered range offset filename', + default='filtRange.off', + type=str, + mandatory=False, + doc='Filtered range dense offsets') +# End of modification DISPERSIVE_FILENAME = Component.Parameter('dispersiveFilename', public_name = 'dispersive phase filename', default='dispersive.bil', @@ -470,8 +484,10 @@ class StripmapProc(Component, FrameMixin): LOS_FILENAME, RANGE_OFFSET_FILENAME, AZIMUTH_OFFSET_FILENAME, - AZIMUTH_RUBBERSHEET_FILENAME, - FILT_AZIMUTH_OFFSET_FILENAME, + AZIMUTH_RUBBERSHEET_FILENAME, # Added by V. Brancato 10.07.2019 + RANGE_RUBBERSHEET_FILENAME, # Added by V. Brancato 10.07.2019 + FILT_AZIMUTH_OFFSET_FILENAME, # Added by V. Brancato 10.07.2019 + FILT_RANGE_OFFSET_FILENAME, # Added by V. Brancato 10.07.2019 DENSE_OFFSET_FILENAME, MISREG_FILENAME, DISPERSIVE_FILENAME, diff --git a/components/isceobj/StripmapProc/runInterferogram.py b/components/isceobj/StripmapProc/runInterferogram.py index 08bc98a..fbbde4f 100644 --- a/components/isceobj/StripmapProc/runInterferogram.py +++ b/components/isceobj/StripmapProc/runInterferogram.py @@ -1,14 +1,73 @@ + # # Author: Heresh Fattahi, 2017 -# +# Modified by V. Brancato (10.2019) +# (Included flattening when rubbersheeting in range is turned on import isceobj import logging from components.stdproc.stdproc import crossmul from iscesys.ImageUtil.ImageUtil import ImageUtil as IU import os +import gdal +import numpy as np + logger = logging.getLogger('isce.insar.runInterferogram') +# Added by V. Brancato 10.09.2019 +def write_xml(fileName,width,length,bands,dataType,scheme): + + img = isceobj.createImage() + img.setFilename(fileName) + img.setWidth(width) + img.setLength(length) + img.setAccessMode('READ') + img.bands = bands + img.dataType = dataType + img.scheme = scheme + img.renderHdr() + img.renderVRT() + + return None + + +def compute_FlatEarth(self,ifgFilename,width,length,radarWavelength): + from imageMath import IML + import logging + + # If rubbersheeting has been performed add back the range sheet offsets + + info = self._insar.loadProduct(self._insar.slaveSlcCropProduct) + #radarWavelength = info.getInstrument().getRadarWavelength() + rangePixelSize = info.getInstrument().getRangePixelSize() + fact = 4 * np.pi* rangePixelSize / radarWavelength + + cJ = np.complex64(-1j) + + # Open the range sheet offset + rngOff = os.path.join(self.insar.offsetsDirname, self.insar.rangeOffsetFilename ) + + print(rngOff) + if os.path.exists(rngOff): + rng2 = np.memmap(rngOff, dtype=np.float64, mode='r', shape=(length,width)) + else: + print('No range offsets provided') + rng2 = np.zeros((length,width)) + + # Open the interferogram + #ifgFilename= os.path.join(self.insar.ifgDirname, self.insar.ifgFilename) + intf = np.memmap(ifgFilename+'.full',dtype=np.complex64,mode='r+',shape=(length,width)) + + for ll in range(length): + intf[ll,:] *= np.exp(cJ*fact*rng2[ll,:]) + + del rng2 + del intf + + return + + + def multilook(infile, outname=None, alks=5, rlks=15): ''' Take looks. @@ -66,8 +125,9 @@ def computeCoherence(slc1name, slc2name, corname, virtual=True): slc2.finalizeImage() return - -def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks): +# Modified by V. Brancato on 10.09.2019 (added self) +# Modified by V. Brancato on 11.13.2019 (added radar wavelength for low and high band flattening +def generateIgram(self,imageSlc1, imageSlc2, resampName, azLooks, rgLooks,radarWavelength): objSlc1 = isceobj.createSlcImage() IU.copyAttributes(imageSlc1, objSlc1) objSlc1.setAccessMode('read') @@ -79,8 +139,13 @@ def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks): objSlc2.createImage() slcWidth = imageSlc1.getWidth() - intWidth = int(slcWidth / rgLooks) - + + + if not self.doRubbersheetingRange: + intWidth = int(slcWidth/rgLooks) # Modified by V. Brancato intWidth = int(slcWidth / rgLooks) + else: + intWidth = int(slcWidth) + lines = min(imageSlc1.getLength(), imageSlc2.getLength()) if '.flat' in resampName: @@ -93,7 +158,7 @@ def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks): resampInt = resampName objInt = isceobj.createIntImage() - objInt.setFilename(resampInt) + objInt.setFilename(resampInt+'.full') objInt.setWidth(intWidth) imageInt = isceobj.createIntImage() IU.copyAttributes(objInt, imageInt) @@ -101,28 +166,48 @@ def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks): objInt.createImage() objAmp = isceobj.createAmpImage() - objAmp.setFilename(resampAmp) + objAmp.setFilename(resampAmp+'.full') objAmp.setWidth(intWidth) imageAmp = isceobj.createAmpImage() IU.copyAttributes(objAmp, imageAmp) objAmp.setAccessMode('write') objAmp.createImage() + + if not self.doRubbersheetingRange: + print('Rubbersheeting in range is off, interferogram is already flattened') + objCrossmul = crossmul.createcrossmul() + objCrossmul.width = slcWidth + objCrossmul.length = lines + objCrossmul.LooksDown = azLooks + objCrossmul.LooksAcross = rgLooks - objCrossmul = crossmul.createcrossmul() - objCrossmul.width = slcWidth - objCrossmul.length = lines - objCrossmul.LooksDown = azLooks - objCrossmul.LooksAcross = rgLooks - - objCrossmul.crossmul(objSlc1, objSlc2, objInt, objAmp) - + objCrossmul.crossmul(objSlc1, objSlc2, objInt, objAmp) + else: + # Modified by V. Brancato 10.09.2019 (added option to add Range Rubber sheet Flat-earth back) + print('Rubbersheeting in range is on, removing flat-Earth phase') + objCrossmul = crossmul.createcrossmul() + objCrossmul.width = slcWidth + objCrossmul.length = lines + objCrossmul.LooksDown = 1 + objCrossmul.LooksAcross = 1 + objCrossmul.crossmul(objSlc1, objSlc2, objInt, objAmp) + + # Remove Flat-Earth component + compute_FlatEarth(self,resampInt,intWidth,lines,radarWavelength) + + # Perform Multilook + multilook(resampInt+'.full', outname=resampInt, alks=azLooks, rlks=rgLooks) #takeLooks(objAmp,azLooks,rgLooks) + multilook(resampAmp+'.full', outname=resampAmp, alks=azLooks, rlks=rgLooks) #takeLooks(objInt,azLooks,rgLooks) + + #os.system('rm ' + resampInt+'.full* ' + resampAmp + '.full* ') + # End of modification for obj in [objInt, objAmp, objSlc1, objSlc2]: obj.finalizeImage() return imageInt, imageAmp -def subBandIgram(self, masterSlc, slaveSlc, subBandDir): +def subBandIgram(self, masterSlc, slaveSlc, subBandDir,radarWavelength): img1 = isceobj.createImage() img1.load(masterSlc + '.xml') @@ -142,7 +227,7 @@ def subBandIgram(self, masterSlc, slaveSlc, subBandDir): interferogramName = os.path.join(ifgDir , self.insar.ifgFilename) - generateIgram(img1, img2, interferogramName, azLooks, rgLooks) + generateIgram(self,img1, img2, interferogramName, azLooks, rgLooks,radarWavelength) return interferogramName @@ -175,9 +260,9 @@ def runSubBandInterferograms(self): slaveHighBandSlc = os.path.join(coregDir , os.path.basename(slaveSlc)) ########## - interferogramName = subBandIgram(self, masterLowBandSlc, slaveLowBandSlc, self.insar.lowBandSlcDirname) + interferogramName = subBandIgram(self, masterLowBandSlc, slaveLowBandSlc, self.insar.lowBandSlcDirname,self.insar.lowBandRadarWavelength) - interferogramName = subBandIgram(self, masterHighBandSlc, slaveHighBandSlc, self.insar.highBandSlcDirname) + interferogramName = subBandIgram(self, masterHighBandSlc, slaveHighBandSlc, self.insar.highBandSlcDirname,self.insar.highBandRadarWavelength) def runFullBandInterferogram(self): logger.info("Generating interferogram") @@ -185,7 +270,7 @@ def runFullBandInterferogram(self): masterFrame = self._insar.loadProduct( self._insar.masterSlcCropProduct) masterSlc = masterFrame.getImage().filename - if self.doRubbersheeting: + if (self.doRubbersheetingRange | self.doRubbersheetingAzimuth): slaveSlc = os.path.join(self._insar.coregDirname, self._insar.fineCoregFilename) else: slaveSlc = os.path.join(self._insar.coregDirname, self._insar.refinedCoregFilename) @@ -211,8 +296,11 @@ def runFullBandInterferogram(self): os.makedirs(ifgDir) interferogramName = os.path.join(ifgDir , self.insar.ifgFilename) - - generateIgram(img1, img2, interferogramName, azLooks, rgLooks) + + info = self._insar.loadProduct(self._insar.slaveSlcCropProduct) + radarWavelength = info.getInstrument().getRadarWavelength() + + generateIgram(self,img1, img2, interferogramName, azLooks, rgLooks,radarWavelength) ###Compute coherence @@ -221,7 +309,7 @@ def runFullBandInterferogram(self): multilook(cohname+'.full', outname=cohname, alks=azLooks, rlks=rgLooks) - ###Multilook relevant geometry products + ##Multilook relevant geometry products for fname in [self.insar.latFilename, self.insar.lonFilename, self.insar.losFilename]: inname = os.path.join(self.insar.geometryDirname, fname) multilook(inname + '.full', outname= inname, alks=azLooks, rlks=rgLooks) diff --git a/components/isceobj/StripmapProc/runResampleSlc.py b/components/isceobj/StripmapProc/runResampleSlc.py index cdc4a34..c108c98 100644 --- a/components/isceobj/StripmapProc/runResampleSlc.py +++ b/components/isceobj/StripmapProc/runResampleSlc.py @@ -23,7 +23,7 @@ def runResampleSlc(self, kind='coarse'): raise Exception('Unknown operation type {0} in runResampleSlc'.format(kind)) if kind == 'fine': - if not self.doRubbersheeting: + if not (self.doRubbersheetingRange | self.doRubbersheetingAzimuth): # Modified by V. Brancato 10.10.2019 print('Rubber sheeting not requested, skipping resampling ....') return @@ -68,12 +68,25 @@ def runResampleSlc(self, kind='coarse'): #Since the app is based on geometry module we expect pixel-by-pixel offset #field offsetsDir = self.insar.offsetsDirname - rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename) + + # Modified by V. Brancato 10.10.2019 + #rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename) + if kind in ['coarse', 'refined']: azname = os.path.join(offsetsDir, self.insar.azimuthOffsetFilename) + rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename) else: azname = os.path.join(offsetsDir, self.insar.azimuthRubbersheetFilename) - + if self.doRubbersheetingRange: + print('Rubbersheeting in range is turned on, taking the cross-correlation offsets') + print('Setting Flattening to False') + rgname = os.path.join(offsetsDir, self.insar.rangeRubbersheetFilename) + flatten=False + else: + print('Rubbersheeting in range is turned off, taking range geometric offsets') + rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename) + flatten=True + rngImg = isceobj.createImage() rngImg.load(rgname + '.xml') rngImg.setAccessMode('READ') @@ -85,8 +98,8 @@ def runResampleSlc(self, kind='coarse'): width = rngImg.getWidth() length = rngImg.getLength() - - flatten = True +# Modified by V. Brancato 10.10.2019 + #flatten = True rObj.flatten = flatten rObj.outputWidth = width rObj.outputLines = length diff --git a/components/isceobj/StripmapProc/runResampleSubbandSlc.py b/components/isceobj/StripmapProc/runResampleSubbandSlc.py index e3f7ff7..157764b 100644 --- a/components/isceobj/StripmapProc/runResampleSubbandSlc.py +++ b/components/isceobj/StripmapProc/runResampleSubbandSlc.py @@ -14,7 +14,8 @@ import shelve logger = logging.getLogger('isce.insar.runResampleSubbandSlc') -def resampleSlc(masterFrame, slaveFrame, imageSlc2, radarWavelength, coregDir, +# Modified by V. Brancato 10.14.2019 added "self" as input parameter of resampleSLC +def resampleSlc(self,masterFrame, slaveFrame, imageSlc2, radarWavelength, coregDir, azoffname, rgoffname, azpoly = None, rgpoly = None, misreg=False): logger.info("Resampling slave SLC") @@ -56,8 +57,17 @@ def resampleSlc(masterFrame, slaveFrame, imageSlc2, radarWavelength, coregDir, width = rngImg.getWidth() length = rngImg.getLength() - - flatten = True +# Modified by V. Brancato on 10.14.2019 (if Rubbersheeting in range is turned on, flatten the interferogram during cross-correlation) + if not self.doRubbersheetingRange: + print('Rubber sheeting in range is turned off, flattening the interferogram during resampling') + flatten = True + print(flatten) + else: + print('Rubber sheeting in range is turned on, flattening the interferogram during interferogram formation') + flatten=False + print(flatten) +# end of Modification + rObj.flatten = flatten rObj.outputWidth = width rObj.outputLines = length @@ -105,15 +115,25 @@ def runResampleSubbandSlc(self, misreg=False): masterFrame = self._insar.loadProduct( self._insar.masterSlcCropProduct) slaveFrame = self._insar.loadProduct( self._insar.slaveSlcCropProduct) - if self.doRubbersheeting: - print('Using rubber sheeted offsets for resampling sub-bands') +# Modified by V. Brancato 10.14.2019 + + if self.doRubbersheetingAzimuth: + print('Using rubber in azimuth sheeted offsets for resampling sub-bands') azoffname = os.path.join( self.insar.offsetsDirname, self.insar.azimuthRubbersheetFilename) else: print('Using refined offsets for resampling sub-bands') azoffname = os.path.join( self.insar.offsetsDirname, self.insar.azimuthOffsetFilename) - rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeOffsetFilename) + if self.doRubbersheetingRange: + print('Using rubber in range sheeted offsets for resampling sub-bands') + rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeRubbersheetFilename) + else: + print('Using refined offsets for resampling sub-bands') + rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeOffsetFilename) +# ****************** End of Modification + + # rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeOffsetFilename) azpoly = self.insar.loadProduct( os.path.join(self.insar.misregDirname, self.insar.misregFilename) + '_az.xml') rgpoly = self.insar.loadProduct( os.path.join(self.insar.misregDirname, self.insar.misregFilename) + '_rg.xml') @@ -124,7 +144,7 @@ def runResampleSubbandSlc(self, misreg=False): wvlL = self.insar.lowBandRadarWavelength coregDir = os.path.join(self.insar.coregDirname, self.insar.lowBandSlcDirname) - lowbandCoregFilename = resampleSlc(masterFrame, slaveFrame, imageSlc2, wvlL, coregDir, + lowbandCoregFilename = resampleSlc(self,masterFrame, slaveFrame, imageSlc2, wvlL, coregDir, azoffname, rgoffname, azpoly=azpoly, rgpoly=rgpoly,misreg=False) imageSlc2 = os.path.join(self.insar.splitSpectrumDirname, self.insar.highBandSlcDirname, @@ -132,7 +152,7 @@ def runResampleSubbandSlc(self, misreg=False): wvlH = self.insar.highBandRadarWavelength coregDir = os.path.join(self.insar.coregDirname, self.insar.highBandSlcDirname) - highbandCoregFilename = resampleSlc(masterFrame, slaveFrame, imageSlc2, wvlH, coregDir, + highbandCoregFilename = resampleSlc(self,masterFrame, slaveFrame, imageSlc2, wvlH, coregDir, azoffname, rgoffname, azpoly=azpoly, rgpoly=rgpoly, misreg=False) self.insar.lowBandSlc2 = lowbandCoregFilename diff --git a/components/isceobj/StripmapProc/runRubbersheet.py b/components/isceobj/StripmapProc/runRubbersheet.py index 3e10381..b16a58e 100644 --- a/components/isceobj/StripmapProc/runRubbersheet.py +++ b/components/isceobj/StripmapProc/runRubbersheet.py @@ -168,6 +168,7 @@ def runRubbersheet(self): # filtAzOffsetFile to it. resampleOffset(filtAzOffsetFile, geometryAzimuthOffset, sheetOffset) + print("I'm here") return None diff --git a/components/isceobj/StripmapProc/runRubbersheetAzimuth.py b/components/isceobj/StripmapProc/runRubbersheetAzimuth.py new file mode 100755 index 0000000..75583f1 --- /dev/null +++ b/components/isceobj/StripmapProc/runRubbersheetAzimuth.py @@ -0,0 +1,276 @@ +# +# Author: Heresh Fattahi +# Copyright 2017 +# +# Modified by V. Brancato +# Included offset filtering with no SNR +# + +import isce +import isceobj +from osgeo import gdal +from scipy import ndimage +from astropy.convolution import convolve +import numpy as np +import os + +def mask_filterNoSNR(denseOffsetFile,filterSize,outName): + # Masking the offsets with a data-based approach + + # Open the offsets + ds = gdal.Open(denseOffsetFile+'.vrt',gdal.GA_ReadOnly) + off_az = ds.GetRasterBand(1).ReadAsArray() + off_rg = ds.GetRasterBand(2).ReadAsArray() + ds = None + + # Remove missing values from ampcor + off_rg[np.where(off_rg < -9999)]=0 + off_az[np.where(off_az < -9999)]=0 + + + # Store the offsets in a complex variable + off = off_rg + 1j*off_az + + # Mask the azimuth offsets based on the MAD + mask = off_masking(off,filterSize,thre=3) + + xoff_masked = np.ma.array(off.real,mask=mask) + yoff_masked = np.ma.array(off.imag,mask=mask) + + # Delete unused variables + mask = None + off = None + + # Remove residual noisy spots with a median filter on the azimuth offmap + yoff_masked.mask = yoff_masked.mask | \ + (ndimage.median_filter(xoff_masked.filled(fill_value=0),3) == 0) | \ + (ndimage.median_filter(yoff_masked.filled(fill_value=0),3) == 0) + + # Fill the data by iteratively using smoothed values + data = yoff_masked.data + data[yoff_masked.mask]=np.nan + + off_az_filled = fill_with_smoothed(data,filterSize) + + # Apply median filter to smooth the azimuth offset map + off_az_filled = ndimage.median_filter(off_az_filled,filterSize) + + # Save the filtered offsets + length, width = off_az_filled.shape + + # writing the masked and filtered offsets to a file + print ('writing masked and filtered offsets to: ', outName) + + ##Write array to offsetfile + off_az_filled.tofile(outName) + + # write the xml file + img = isceobj.createImage() + img.setFilename(outName) + img.setWidth(width) + img.setAccessMode('READ') + img.bands = 1 + img.dataType = 'FLOAT' + img.scheme = 'BIP' + img.renderHdr() + + + return + + +def off_masking(off,filterSize,thre=2): + # Define the mask to fill the offsets + vram = ndimage.median_filter(off.real, filterSize) + vazm = ndimage.median_filter(off.imag, filterSize) + + mask = (np.abs(off.real-vram) > thre) | (np.abs(off.imag-vazm) > thre) | (off.imag == 0) | (off.real == 0) + + return mask + +def fill(data, invalid=None): + """ + Replace the value of invalid 'data' cells (indicated by 'invalid') + by the value of the nearest valid data cell + + Input: + data: numpy array of any dimension + invalid: a binary array of same shape as 'data'. + data value are replaced where invalid is True + If None (default), use: invalid = np.isnan(data) + + Output: + Return a filled array. + """ + if invalid is None: invalid = np.isnan(data) + + ind = ndimage.distance_transform_edt(invalid, + return_distances=False, + return_indices=True) + return data[tuple(ind)] + + +def mask_filter(denseOffsetFile, snrFile, band, snrThreshold, filterSize, outName): + #masking and Filtering + + ##Read in the offset file + ds = gdal.Open(denseOffsetFile + '.vrt', gdal.GA_ReadOnly) + Offset = ds.GetRasterBand(band).ReadAsArray() + ds = None + + ##Read in the SNR file + ds = gdal.Open(snrFile + '.vrt', gdal.GA_ReadOnly) + snr = ds.GetRasterBand(1).ReadAsArray() + ds = None + + # Masking the dense offsets based on SNR + print ('masking the dense offsets with SNR threshold: ', snrThreshold) + Offset[snr thre) | (np.abs(off.imag-vazm) > thre) | (off.imag == 0) | (off.real == 0) + + return mask + + +def fill(data, invalid=None): + """ + Replace the value of invalid 'data' cells (indicated by 'invalid') + by the value of the nearest valid data cell + + Input: + data: numpy array of any dimension + invalid: a binary array of same shape as 'data'. + data value are replaced where invalid is True + If None (default), use: invalid = np.isnan(data) + + Output: + Return a filled array. + """ + if invalid is None: invalid = np.isnan(data) + + ind = ndimage.distance_transform_edt(invalid, + return_distances=False, + return_indices=True) + return data[tuple(ind)] + +def fill_with_smoothed(off,filterSize): + + off_2filt=np.copy(off) + kernel = np.ones((filterSize,filterSize),np.float32)/(filterSize*filterSize) + loop = 0 + cnt2=1 + + while (cnt2 !=0 & loop<100): + loop += 1 + idx2= np.isnan(off_2filt) + cnt2 = np.sum(np.count_nonzero(np.isnan(off_2filt))) + print(cnt2) + if cnt2 != 0: + off_filt= convolve(off_2filt,kernel,boundary='extend',nan_treatment='interpolate') + off_2filt[idx2]=off_filt[idx2] + idx3 = np.where(off_filt == 0) + off_2filt[idx3]=np.nan + off_filt=None + + return off_2filt + +def mask_filter(denseOffsetFile, snrFile, band, snrThreshold, filterSize, outName): + #masking and Filtering + + ##Read in the offset file + ds = gdal.Open(denseOffsetFile + '.vrt', gdal.GA_ReadOnly) + Offset = ds.GetRasterBand(band).ReadAsArray() + ds = None + + ##Read in the SNR file + ds = gdal.Open(snrFile + '.vrt', gdal.GA_ReadOnly) + snr = ds.GetRasterBand(1).ReadAsArray() + ds = None + + # Masking the dense offsets based on SNR + print ('masking the dense offsets with SNR threshold: ', snrThreshold) + Offset[snr + +#include +#include +#include +#include +#include +#include +#include "cudaError.h" +#include +#include + + +/** + * \brief Constructor + * + * @param filename a std::string with the raster image file name + */ + +GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useMmap) + : _useMmap(useMmap) +{ + // open the file as dataset + _poDataset = (GDALDataset *) GDALOpen(filename.c_str(), GA_ReadOnly ); + // if something is wrong, throw an exception + // GDAL reports the error message + if(!_poDataset) + throw; + + // check the band info + int count = _poDataset->GetRasterCount(); + if(band > count) + { + std::cout << "The desired band " << band << " is greated than " << count << " bands available"; + throw; + } + + // get the desired band + _poBand = _poDataset->GetRasterBand(band); + if(!_poBand) + throw; + + // get the width(x), and height(y) + _width = _poBand->GetXSize(); + _height = _poBand->GetYSize(); + + _dataType = _poBand->GetRasterDataType(); + // determine the image type + _isComplex = GDALDataTypeIsComplex(_dataType); + // determine the pixel size in bytes + _pixelSize = GDALGetDataTypeSize(_dataType); + + _bufferSize = 1024*1024*cacheSizeInGB; + + // checking whether using memory map + if(_useMmap) { + + char **papszOptions = NULL; + // if cacheSizeInGB = 0, use default + // else set the option + if(cacheSizeInGB > 0) + papszOptions = CSLSetNameValue( papszOptions, + "CACHE_SIZE", + std::to_string(_bufferSize).c_str()); + + // space between two lines + GIntBig pnLineSpace; + // set up the virtual mem buffer + _poBandVirtualMem = GDALGetVirtualMemAuto( + static_cast(_poBand), + GF_Read, + &_pixelSize, + &pnLineSpace, + papszOptions); + + // check it + if(!_poBandVirtualMem) + throw; + + // get the starting pointer + _memPtr = CPLVirtualMemGetAddr(_poBandVirtualMem); + } + else { // use a buffer + checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize)); + } + + // make sure memPtr is not Null + if (!_memPtr) + throw; + + // all done +} + + +/// load a tile of data h_tile x w_tile from CPU (mmap) to GPU +/// @param dArray pointer for array in device memory +/// @param h_offset Down/Height offset +/// @param w_offset Across/Width offset +/// @param h_tile Down/Height tile size +/// @param w_tile Across/Width tile size +/// @param stream CUDA stream for copying +void GDALImage::loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream) +{ + size_t tileStartOffset = (h_offset*_width + w_offset)*_pixelSize; + + char * startPtr = (char *)_memPtr ; + startPtr += tileStartOffset; + + // @note + // We assume down/across directions as rows/cols. Therefore, SLC mmap and device array are both row major. + // cuBlas assumes both source and target arrays are column major. + // To use cublasSetMatrix, we need to switch w_tile/h_tile for rows/cols + // checkCudaErrors(cublasSetMatrixAsync(w_tile, h_tile, sizeof(float2), startPtr, width, dArray, w_tile, stream)); + if (_useMmap) + checkCudaErrors(cudaMemcpy2DAsync(dArray, w_tile*_pixelSize, startPtr, _width*_pixelSize, + w_tile*_pixelSize, h_tile, cudaMemcpyHostToDevice,stream)); + else { + // get the total tile size in bytes + size_t tileSize = h_tile*w_tile*_pixelSize; + // if the size is bigger than existing buffer, reallocate + if (tileSize > _bufferSize) { + // maybe we need to make it to fit the pagesize + _bufferSize = tileSize; + checkCudaErrors(cudaFree(_memPtr)); + checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize)); + } + // copy from file to buffer + CPLErr err = _poBand->RasterIO(GF_Read, //eRWFlag + w_offset, h_offset, //nXOff, nYOff + w_tile, h_tile, // nXSize, nYSize + _memPtr, // pData + w_tile*h_tile, 1, // nBufXSize, nBufYSize + _dataType, //eBufType + 0, 0, //nPixelSpace, nLineSpace in pData + NULL //psExtraArg extra resampling callback + ); + + if(err != CE_None) + throw; + // copy from buffer to gpu + checkCudaErrors(cudaMemcpyAsync(dArray, _memPtr, tileSize, cudaMemcpyHostToDevice, stream)); + } +} + +GDALImage::~GDALImage() +{ + // free the virtual memory + CPLVirtualMemFree(_poBandVirtualMem), + // free the GDAL Dataset, close the file + delete _poDataset; +} + +// end of file diff --git a/contrib/PyCuAmpcor/src/GDALImage.h b/contrib/PyCuAmpcor/src/GDALImage.h new file mode 100644 index 0000000..3322a2a --- /dev/null +++ b/contrib/PyCuAmpcor/src/GDALImage.h @@ -0,0 +1,79 @@ +// -*- c++ -*- +/** + * \brief Class for an image described GDAL vrt + * + * only complex (pixelOffset=8) or real(pixelOffset=4) images are supported, such as SLC and single-precision TIFF + */ + +#ifndef __GDALIMAGE_H +#define __GDALIMAGE_H + +#include +#include +#include +#include + +class GDALImage{ + +public: + using size_t = std::size_t; + +private: + size_t _fileSize; + int _height; + int _width; + + // buffer pointer + void * _memPtr = NULL; + + int _pixelSize; //in bytes + + int _isComplex; + + size_t _bufferSize; + int _useMmap; + + GDALDataType _dataType; + CPLVirtualMem * _poBandVirtualMem = NULL; + GDALDataset * _poDataset = NULL; + GDALRasterBand * _poBand = NULL; + +public: + GDALImage() = delete; + GDALImage(std::string fn, int band=1, int cacheSizeInGB=0, int useMmap=1); + + void * getmemPtr() + { + return(_memPtr); + } + + size_t getFileSize() + { + return (_fileSize); + } + + size_t getHeight() { + return (_height); + } + + size_t getWidth() + { + return (_width); + } + + int getPixelSize() + { + return _pixelSize; + } + + bool isComplex() + { + return _isComplex; + } + + void loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream); + ~GDALImage(); + +}; + +#endif //__GDALIMAGE_H diff --git a/contrib/PyCuAmpcor/src/Makefile b/contrib/PyCuAmpcor/src/Makefile index 2138093..b789a59 100644 --- a/contrib/PyCuAmpcor/src/Makefile +++ b/contrib/PyCuAmpcor/src/Makefile @@ -3,23 +3,24 @@ PROJECT = CUAMPCOR LDFLAGS = -lcuda -lcudart -lcufft -lcublas CXXFLAGS = -std=c++11 -fpermissive -fPIC -shared NVCCFLAGS = -ccbin g++ -m64 \ - -gencode arch=compute_35,code=sm_35 \ + -gencode arch=compute_35,code=sm_35 \ + -gencode arch=compute_60,code=sm_60 \ -Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \ -ftz=false -prec-div=true -prec-sqrt=true CXX=g++ NVCC=nvcc -DEPS = cudaUtil.h cudaError.h cuArrays.h SlcImage.h cuAmpcorParameter.h -OBJS = SlcImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \ +DEPS = cudaUtil.h cudaError.h cuArrays.h GDALImage.h cuAmpcorParameter.h +OBJS = GDALImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \ cuSincOverSampler.o cuDeramp.o cuOffset.o \ cuCorrNormalization.o cuAmpcorParameter.o cuCorrTimeDomain.o cuCorrFrequency.o \ cuAmpcorChunk.o cuAmpcorController.o cuEstimateStats.o -all: cuampcor +all: pyampcor -SlcImage.o: SlcImage.cu $(DEPS) - $(NVCC) $(NVCCFLAGS) -c -o $@ SlcImage.cu +GDALImage.o: GDALImage.cu $(DEPS) + $(NVCC) $(NVCCFLAGS) -c -o $@ GDALImage.cu cuArrays.o: cuArrays.cu $(DEPS) $(NVCC) $(NVCCFLAGS) -c -o $@ cuArrays.cu @@ -45,8 +46,8 @@ cuOffset.o: cuOffset.cu $(DEPS) cuCorrNormalization.o: cuCorrNormalization.cu $(DEPS) $(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrNormalization.cu -cuAmpcorParameter.o: cuAmpcorParameter.cu - $(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorParameter.cu +cuAmpcorParameter.o: cuAmpcorParameter.cu + $(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorParameter.cu cuCorrTimeDomain.o: cuCorrTimeDomain.cu $(DEPS) $(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrTimeDomain.cu @@ -54,8 +55,8 @@ cuCorrTimeDomain.o: cuCorrTimeDomain.cu $(DEPS) cuCorrFrequency.o: cuCorrFrequency.cu $(DEPS) cuCorrFrequency.h $(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrFrequency.cu -cuAmpcorChunk.o: cuAmpcorChunk.cu cuAmpcorUtil.h $(DEPS) - $(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorChunk.cu +cuAmpcorChunk.o: cuAmpcorChunk.cu cuAmpcorUtil.h $(DEPS) + $(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorChunk.cu cuAmpcorController.o: cuAmpcorController.cu $(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorController.cu @@ -64,8 +65,8 @@ cuEstimateStats.o: cuEstimateStats.cu $(NVCC) $(NVCCFLAGS) -c -o $@ cuEstimateStats.cu -cuampcor: $(OBJS) +pyampcor: $(OBJS) rm -f PyCuAmpcor.cpp && python3 setup.py build_ext --inplace clean: - rm -rf *.o *so build *~ PyCuAmpcor.cpp ctest *.dat + rm -rf *.o *so build *~ PyCuAmpcor.cpp ctest *.dat diff --git a/contrib/PyCuAmpcor/src/PyCuAmpcor.pyx b/contrib/PyCuAmpcor/src/PyCuAmpcor.pyx index d5f6c8d..857966d 100644 --- a/contrib/PyCuAmpcor/src/PyCuAmpcor.pyx +++ b/contrib/PyCuAmpcor/src/PyCuAmpcor.pyx @@ -1,6 +1,6 @@ -# +# # PYX file to control Python module interface to underlying CUDA-Ampcor code -# +# from libcpp.string cimport string import numpy as np cimport numpy as np @@ -9,13 +9,13 @@ cimport numpy as np cdef extern from "cudaUtil.h": int gpuDeviceInit(int) void gpuDeviceList() - int gpuGetMaxGflopsDeviceId() + int gpuGetMaxGflopsDeviceId() def listGPU(): gpuDeviceList() def findGPU(): - return gpuGetMaxGflopsDeviceId() + return gpuGetMaxGflopsDeviceId() def setGPU(int id): return gpuDeviceInit(id) @@ -24,90 +24,92 @@ def setGPU(int id): cdef extern from "cuAmpcorParameter.h": cdef cppclass cuAmpcorParameter: cuAmpcorParameter() except + - int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain - int deviceID ## Targeted GPU device ID: use -1 to auto select - int nStreams ## Number of streams to asynchonize data transfers and compute kernels + int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain + int deviceID ## Targeted GPU device ID: use -1 to auto select + int nStreams ## Number of streams to asynchonize data transfers and compute kernels int derampMethod ## Method for deramping 0=None, 1=average, 2=phase gradient - + ## chip or window size for raw data int windowSizeHeightRaw ## Template window height (original size) - int windowSizeWidthRaw ## Template window width (original size) - int searchWindowSizeHeightRaw ## Search window height (original size) + int windowSizeWidthRaw ## Template window width (original size) + int searchWindowSizeHeightRaw ## Search window height (original size) int searchWindowSizeWidthRaw ## Search window width (orignal size) - int halfSearchRangeDownRaw ##(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2 + int halfSearchRangeDownRaw ##(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2 int halfSearchRangeAcrossRaw ##(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2 ## chip or window size after oversampling int rawDataOversamplingFactor ## Raw data overampling factor (from original size to oversampled size) - - ## strides between chips/windows + + ## strides between chips/windows int skipSampleDownRaw ## Skip size between neighboring windows in Down direction (original size) int skipSampleAcrossRaw ## Skip size between neighboring windows in across direction (original size) - + ## Zoom in region near location of max correlation - int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions) + int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions) int oversamplingFactor ## Oversampling factor for interpolating correlation surface - int oversamplingMethod - - float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data - + int oversamplingMethod + + float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data + ##master image string masterImageName ## master SLC image name int imageDataType1 ## master image data type, 2=cfloat=complex=float2 1=float - int masterImageHeight ## master image height + int masterImageHeight ## master image height int masterImageWidth ## master image width - + ##slave image string slaveImageName ## slave SLC image name int imageDataType2 ## slave image data type, 2=cfloat=complex=float2 1=float - int slaveImageHeight ## slave image height + int slaveImageHeight ## slave image height int slaveImageWidth ## slave image width - - int mmapSizeInGB ## mmap buffer size in unit of Gigabytes - + + int useMmap ## whether to use mmap + int mmapSizeInGB ## mmap buffer size in unit of Gigabytes (if not mmmap, the buffer size) + ## total number of chips/windows int numberWindowDown ## number of total windows (down) int numberWindowAcross ## number of total windows (across) int numberWindows ## numberWindowDown*numberWindowAcross - + ## number of chips/windows in a batch/chunk int numberWindowDownInChunk ## number of windows processed in a chunk (down) int numberWindowAcrossInChunk ## number of windows processed in a chunk (across) int numberWindowsInChunk ## numberWindowDownInChunk*numberWindowAcrossInChunk int numberChunkDown ## number of chunks (down) int numberChunkAcross ## number of chunks (across) - int numberChunks + int numberChunks - int *masterStartPixelDown ## master starting pixels for each window (down) + int *masterStartPixelDown ## master starting pixels for each window (down) int *masterStartPixelAcross ## master starting pixels for each window (across) - int *slaveStartPixelDown ## slave starting pixels for each window (down) - int *slaveStartPixelAcross ## slave starting pixels for each window (across) + int *slaveStartPixelDown ## slave starting pixels for each window (down) + int *slaveStartPixelAcross ## slave starting pixels for each window (across) int *grossOffsetDown ## Gross offsets between master and slave windows (down) : slaveStartPixel - masterStartPixel - int *grossOffsetAcross ## Gross offsets between master and slave windows (across) + int *grossOffsetAcross ## Gross offsets between master and slave windows (across) int grossOffsetDown0 ## constant gross offset (down) int grossOffsetAcross0 ## constant gross offset (across) - int masterStartPixelDown0 ## the first pixel of master image (down), be adjusted with margins and gross offset + int masterStartPixelDown0 ## the first pixel of master image (down), be adjusted with margins and gross offset int masterStartPixelAcross0 ## the first pixel of master image (across) int *masterChunkStartPixelDown ## array of starting pixels for all master chunks (down) int *masterChunkStartPixelAcross ## array of starting pixels for all master chunks (across) int *slaveChunkStartPixelDown ## array of starting pixels for all slave chunks (down) int *slaveChunkStartPixelAcross ## array of starting pixels for all slave chunks (across) - int *masterChunkHeight ## array of heights of all master chunks, required when loading chunk to GPU + int *masterChunkHeight ## array of heights of all master chunks, required when loading chunk to GPU int *masterChunkWidth ## array of width of all master chunks int *slaveChunkHeight ## array of width of all master chunks int *slaveChunkWidth ## array of width of all slave chunks - int maxMasterChunkHeight ## max height for all master/slave chunks, determine the size of reading cache in GPU - int maxMasterChunkWidth ## max width for all master chunks, determine the size of reading cache in GPU + int maxMasterChunkHeight ## max height for all master/slave chunks, determine the size of reading cache in GPU + int maxMasterChunkWidth ## max width for all master chunks, determine the size of reading cache in GPU int maxSlaveChunkHeight int maxSlaveChunkWidth - - string grossOffsetImageName - string offsetImageName ## Output Offset fields filename + + string grossOffsetImageName + string offsetImageName ## Output Offset fields filename string snrImageName ## Output SNR filename - void setStartPixels(int*, int*, int*, int*) - void setStartPixels(int, int, int*, int*) - void setStartPixels(int, int, int, int) - void checkPixelInImageRange() ## check whether - + string covImageName ## Output COV filename + void setStartPixels(int*, int*, int*, int*) + void setStartPixels(int, int, int*, int*) + void setStartPixels(int, int, int, int) + void checkPixelInImageRange() ## check whether + void setupParameters() ## Process other parameters after Python Inpu cdef extern from "cuAmpcorController.h": @@ -115,34 +117,40 @@ cdef extern from "cuAmpcorController.h": cuAmpcorController() except + cuAmpcorParameter *param void runAmpcor() - + cdef class PyCuAmpcor(object): ''' - Python interface for cuda Ampcor + Python interface for cuda Ampcor ''' cdef cuAmpcorController c_cuAmpcor def __cinit__(self): - return - + return + @property def algorithm(self): - return self.c_cuAmpcor.param.algorithm + return self.c_cuAmpcor.param.algorithm @algorithm.setter def algorithm(self, int a): self.c_cuAmpcor.param.algorithm = a @property def deviceID(self): - return self.c_cuAmpcor.param.deviceID + return self.c_cuAmpcor.param.deviceID @deviceID.setter def deviceID(self, int a): self.c_cuAmpcor.param.deviceID = a @property def nStreams(self): - return self.c_cuAmpcor.param.nStreams + return self.c_cuAmpcor.param.nStreams @nStreams.setter def nStreams(self, int a): self.c_cuAmpcor.param.nStreams = a - @property + @property + def useMmap(self): + return self.c_cuAmpcor.param.useMmap + @useMmap.setter + def useMmap(self, int a): + self.c_cuAmpcor.param.useMmap = a + @property def mmapSize(self): return self.c_cuAmpcor.param.mmapSizeInGB @mmapSize.setter @@ -150,19 +158,19 @@ cdef class PyCuAmpcor(object): self.c_cuAmpcor.param.mmapSizeInGB = a @property def derampMethod(self): - return self.c_cuAmpcor.param.derampMethod + return self.c_cuAmpcor.param.derampMethod @derampMethod.setter def derampMethod(self, int a): self.c_cuAmpcor.param.derampMethod = a @property def windowSizeHeight(self): - return self.c_cuAmpcor.param.windowSizeHeightRaw + return self.c_cuAmpcor.param.windowSizeHeightRaw @windowSizeHeight.setter def windowSizeHeight(self, int a): self.c_cuAmpcor.param.windowSizeHeightRaw = a @property def windowSizeWidth(self): - return self.c_cuAmpcor.param.windowSizeWidthRaw + return self.c_cuAmpcor.param.windowSizeWidthRaw @windowSizeWidth.setter def windowSizeWidth(self, int a): self.c_cuAmpcor.param.windowSizeWidthRaw = a @@ -200,7 +208,7 @@ cdef class PyCuAmpcor(object): @skipSampleAcross.setter def skipSampleAcross(self, int a): self.c_cuAmpcor.param.skipSampleAcrossRaw = a - + @property def rawDataOversamplingFactor(self): """anti-aliasing oversampling factor""" @@ -229,7 +237,7 @@ cdef class PyCuAmpcor(object): @corrSufaceOverSamplingMethod.setter def corrSufaceOverSamplingMethod(self, int a): self.c_cuAmpcor.param.oversamplingMethod = a - @property + @property def masterImageName(self): return self.c_cuAmpcor.param.masterImageName @masterImageName.setter @@ -241,12 +249,12 @@ cdef class PyCuAmpcor(object): @slaveImageName.setter def slaveImageName(self, str a): self.c_cuAmpcor.param.slaveImageName = a.encode() - @property + @property def masterImageName(self): return self.c_cuAmpcor.param.masterImageName @masterImageName.setter def masterImageName(self, str a): - self.c_cuAmpcor.param.masterImageName = a.encode() + self.c_cuAmpcor.param.masterImageName = a.encode() @property def masterImageHeight(self): return self.c_cuAmpcor.param.masterImageHeight @@ -258,7 +266,7 @@ cdef class PyCuAmpcor(object): return self.c_cuAmpcor.param.masterImageWidth @masterImageWidth.setter def masterImageWidth(self, int a): - self.c_cuAmpcor.param.masterImageWidth=a + self.c_cuAmpcor.param.masterImageWidth=a @property def slaveImageHeight(self): return self.c_cuAmpcor.param.slaveImageHeight @@ -270,8 +278,8 @@ cdef class PyCuAmpcor(object): return self.c_cuAmpcor.param.slaveImageWidth @slaveImageWidth.setter def slaveImageWidth(self, int a): - self.c_cuAmpcor.param.slaveImageWidth=a - + self.c_cuAmpcor.param.slaveImageWidth=a + @property def numberWindowDown(self): return self.c_cuAmpcor.param.numberWindowDown @@ -283,11 +291,11 @@ cdef class PyCuAmpcor(object): return self.c_cuAmpcor.param.numberWindowAcross @numberWindowAcross.setter def numberWindowAcross(self, int a): - self.c_cuAmpcor.param.numberWindowAcross = a + self.c_cuAmpcor.param.numberWindowAcross = a @property def numberWindows(self): return self.c_cuAmpcor.param.numberWindows - + @property def numberWindowDownInChunk(self): return self.c_cuAmpcor.param.numberWindowDownInChunk @@ -299,7 +307,7 @@ cdef class PyCuAmpcor(object): return self.c_cuAmpcor.param.numberWindowAcrossInChunk @numberWindowAcrossInChunk.setter def numberWindowAcrossInChunk(self, int a): - self.c_cuAmpcor.param.numberWindowAcrossInChunk = a + self.c_cuAmpcor.param.numberWindowAcrossInChunk = a @property def numberChunkDown(self): return self.c_cuAmpcor.param.numberChunkDown @@ -309,9 +317,9 @@ cdef class PyCuAmpcor(object): @property def numberChunks(self): return self.c_cuAmpcor.param.numberChunks - - - ## gross offets + + + ## gross offets @property def grossOffsetImageName(self): return self.c_cuAmpcor.param.grossOffsetImageName @@ -324,13 +332,21 @@ cdef class PyCuAmpcor(object): @offsetImageName.setter def offsetImageName(self, str a): self.c_cuAmpcor.param.offsetImageName = a.encode() + @property def snrImageName(self): return self.c_cuAmpcor.param.snrImageName @snrImageName.setter def snrImageName(self, str a): self.c_cuAmpcor.param.snrImageName = a.encode() - + + @property + def covImageName(self): + return self.c_cuAmpcor.param.covImageName + @covImageName.setter + def covImageName(self, str a): + self.c_cuAmpcor.param.covImageName = a.encode() + @property def masterStartPixelDownStatic(self): return self.c_cuAmpcor.param.masterStartPixelDown0 @@ -342,20 +358,20 @@ cdef class PyCuAmpcor(object): return self.c_cuAmpcor.param.masterStartPixelAcross0 @masterStartPixelAcrossStatic.setter def masterStartPixelAcrossStatic(self, int a): - self.c_cuAmpcor.param.masterStartPixelAcross0 = a + self.c_cuAmpcor.param.masterStartPixelAcross0 = a @property def grossOffsetDownStatic(self): return self.c_cuAmpcor.param.grossOffsetDown0 @grossOffsetDownStatic.setter def grossOffsetDownStatic(self, int a): - self.c_cuAmpcor.param.grossOffsetDown0 =a + self.c_cuAmpcor.param.grossOffsetDown0 =a @property def grossOffsetAcrossStatic(self): return self.c_cuAmpcor.param.grossOffsetAcross0 @grossOffsetAcrossStatic.setter def grossOffsetAcrossStatic(self, int a): - self.c_cuAmpcor.param.grossOffsetAcross0 =a - + self.c_cuAmpcor.param.grossOffsetAcross0 =a + @property def grossOffsetDownDynamic(self): cdef int *c_data @@ -366,12 +382,12 @@ cdef class PyCuAmpcor(object): return p_data @grossOffsetDownDynamic.setter def grossOffsetDownDynamic (self, np.ndarray[np.int32_t,ndim=1,mode="c"] pa): - cdef int *c_data + cdef int *c_data cdef int *p_data c_data = self.c_cuAmpcor.param.grossOffsetDown p_data = pa.data for i in range (self.numberWindows): - c_data[i] = p_data[i] + c_data[i] = p_data[i] @property def grossOffsetAcrossDynamic(self): cdef int *c_data @@ -382,23 +398,23 @@ cdef class PyCuAmpcor(object): return p_data @grossOffsetAcrossDynamic.setter def grossOffsetAcrossDynamic (self, np.ndarray[np.int32_t,ndim=1,mode="c"] pa): - cdef int *c_data + cdef int *c_data cdef int *p_data c_data = self.c_cuAmpcor.param.grossOffsetAcross p_data = pa.data for i in range (self.numberWindows): - c_data[i] = p_data[i] + c_data[i] = p_data[i] return - + def setConstantGrossOffset(self, int goDown, int goAcross): - """ + """ constant gross offsets param goDown gross offset in azimuth direction param goAcross gross offset in range direction """ self.c_cuAmpcor.param.setStartPixels(self.masterStartPixelDownStatic, self.masterStartPixelAcrossStatic, goDown, goAcross) - + def setVaryingGrossOffset(self, np.ndarray[np.int32_t,ndim=1,mode="c"] vD, np.ndarray[np.int32_t,ndim=1,mode="c"] vA): """ varying gross offsets for each window @@ -411,21 +427,21 @@ cdef class PyCuAmpcor(object): def checkPixelInImageRange(self): """ check whether each window is with image range """ self.c_cuAmpcor.param.checkPixelInImageRange() - + def setupParams(self): """ set up constant parameters and allocate array parameters (offsets) should be called after number of windows is set and before setting varying gross offsets """ - self.c_cuAmpcor.param.setupParameters() + self.c_cuAmpcor.param.setupParameters() def runAmpcor(self): """ main procedure to run ampcor """ self.c_cuAmpcor.runAmpcor() - - - - - - + + + + + + diff --git a/contrib/PyCuAmpcor/src/SConscript b/contrib/PyCuAmpcor/src/SConscript index 1bb8d59..5de06c7 100644 --- a/contrib/PyCuAmpcor/src/SConscript +++ b/contrib/PyCuAmpcor/src/SConscript @@ -6,7 +6,7 @@ package = envPyCuAmpcor['PACKAGE'] project = envPyCuAmpcor['PROJECT'] build = envPyCuAmpcor['PRJ_LIB_DIR'] install = envPyCuAmpcor['PRJ_SCONS_INSTALL'] + '/' + package + '/' + project -listFiles = ['SlcImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu', +listFiles = ['GDALImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu', 'cuArraysPadding.cu', 'cuOverSampler.cu', 'cuSincOverSampler.cu', 'cuDeramp.cu', 'cuOffset.cu', 'cuCorrNormalization.cu', diff --git a/contrib/PyCuAmpcor/src/cuAmpcorChunk.cu b/contrib/PyCuAmpcor/src/cuAmpcorChunk.cu index 4f0b43d..ef911e7 100644 --- a/contrib/PyCuAmpcor/src/cuAmpcorChunk.cu +++ b/contrib/PyCuAmpcor/src/cuAmpcorChunk.cu @@ -2,58 +2,74 @@ #include "cuAmpcorUtil.h" /** - * Run ampcor process for a batch of images (a chunk) + * Run ampcor process for a batch of images (a chunk) * @param[in] idxDown_ index oIDIVUP(i,j) ((i+j-1)/j)f the chunk along Down/Azimuth direction * @param[in] idxAcross_ index of the chunk along Across/Range direction - */ + */ void cuAmpcorChunk::run(int idxDown_, int idxAcross_) { // set chunk index setIndex(idxDown_, idxAcross_); - - // load master image chunk - loadMasterChunk(); - + + // load master image chunk + loadMasterChunk(); + //std::cout << "load master chunk ok\n"; - + cuArraysAbs(c_masterBatchRaw, r_masterBatchRaw, stream); cuArraysSubtractMean(r_masterBatchRaw, stream); // load slave image chunk loadSlaveChunk(); cuArraysAbs(c_slaveBatchRaw, r_slaveBatchRaw, stream); - + //std::cout << "load slave chunk ok\n"; - - + + //cross correlation for none-oversampled data if(param->algorithm == 0) { cuCorrFreqDomain->execute(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw); } else { cuCorrTimeDomain(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw, stream); //time domain cross correlation - } + } cuCorrNormalize(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw, stream); - //find the maximum location of none-oversampled correlation - cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, stream); -// Estimate SNR (Minyan Zhong) - //std::cout<< "flag stats 1" <outputToFile("offsetInit1", stream); - //std::cout<< "flag stats 3" <outputToFile("r_maxval",stream); + r_corrBatchRaw->outputToFile("r_corrBatchRaw",stream); + r_corrBatchRawZoomIn->outputToFile("r_corrBatchRawZoomIn",stream); + i_corrBatchZoomInValid->outputToFile("i_corrBatchZoomInValid",stream); + + // Summation of correlation and data point values + cuArraysSumCorr(r_corrBatchRawZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount, stream); + + // SNR + cuEstimateSnr(r_corrBatchSum, i_corrBatchValidCount, r_maxval, r_snrValue, stream); + + // Variance + // cuEstimateVariance(r_corrBatchRaw, offsetInit, r_maxval, r_covValue, stream); + + // Using the approximate estimation to adjust slave image (half search window size becomes only 4 pixels) //offsetInit->debuginfo(stream); // determine the starting pixel to extract slave images around the max location - cuDetermineSlaveExtractOffset(offsetInit, + cuDetermineSlaveExtractOffset(offsetInit, param->halfSearchRangeDownRaw, // old range - param->halfSearchRangeAcrossRaw, + param->halfSearchRangeAcrossRaw, param->halfZoomWindowSizeRaw, // new range param->halfZoomWindowSizeRaw, stream); @@ -63,58 +79,67 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_) masterBatchOverSampler->execute(c_masterBatchRaw, c_masterBatchOverSampled, param->derampMethod); cuArraysAbs(c_masterBatchOverSampled, r_masterBatchOverSampled, stream); cuArraysSubtractMean(r_masterBatchOverSampled, stream); - + // extract slave and oversample cuArraysCopyExtract(c_slaveBatchRaw, c_slaveBatchZoomIn, offsetInit, stream); slaveBatchOverSampler->execute(c_slaveBatchZoomIn, c_slaveBatchOverSampled, param->derampMethod); cuArraysAbs(c_slaveBatchOverSampled, r_slaveBatchOverSampled, stream); - + // correlate oversampled images if(param->algorithm == 0) { cuCorrFreqDomain_OverSampled->execute(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn); } else { - cuCorrTimeDomain(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream); - } + cuCorrTimeDomain(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream); + } cuCorrNormalize(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream); - + //std::cout << "debug correlation oversample\n"; //std::cout << r_masterBatchOverSampled->height << " " << r_masterBatchOverSampled->width << "\n"; //std::cout << r_slaveBatchOverSampled->height << " " << r_slaveBatchOverSampled->width << "\n"; //std::cout << r_corrBatchZoomIn->height << " " << r_corrBatchZoomIn->width << "\n"; - - // oversample the correlation surface + + // oversample the correlation surface cuArraysCopyExtract(r_corrBatchZoomIn, r_corrBatchZoomInAdjust, make_int2(0,0), stream); - + //std::cout << "debug oversampling " << r_corrBatchZoomInAdjust << " " << r_corrBatchZoomInOverSampled << "\n"; - + if(param->oversamplingMethod) { corrSincOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled); } else { - corrOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled); + corrOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled); } - + //find the max again - + cuArraysMaxloc2D(r_corrBatchZoomInOverSampled, offsetZoomIn, corrMaxValue, stream); - - // determine the final offset from non-oversampled (pixel) and oversampled (sub-pixel) - cuSubPixelOffset(offsetInit, offsetZoomIn, offsetFinal, - param->oversamplingFactor, param->rawDataOversamplingFactor, + + // determine the final offset from non-oversampled (pixel) and oversampled (sub-pixel) + cuSubPixelOffset(offsetInit, offsetZoomIn, offsetFinal, + param->oversamplingFactor, param->rawDataOversamplingFactor, param->halfSearchRangeDownRaw, param->halfSearchRangeAcrossRaw, param->halfZoomWindowSizeRaw, param->halfZoomWindowSizeRaw, stream); //offsetInit->debuginfo(stream); //offsetZoomIn->debuginfo(stream); - //offsetFinal->debuginfo(stream); - + //offsetFinal->debuginfo(stream); + + // Do insertion. + // Offsetfields. cuArraysCopyInsert(offsetFinal, offsetImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); - // Minyan Zhong - //cuArraysCopyInsert(corrMaxValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); - //cuArraysCopyInsert(r_snrValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); + // Debugging matrix. + cuArraysCopyInsert(r_corrBatchSum, floatImage1, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); + cuArraysCopyInsert(i_corrBatchValidCount, intImage1, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); + // Old: save max correlation coefficients. + //cuArraysCopyInsert(corrMaxValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); + // New: save SNR + cuArraysCopyInsert(r_snrValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); + + // Variance. + cuArraysCopyInsert(r_covValue, covImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); } void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_) @@ -122,14 +147,14 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_) idxChunkDown = idxDown_; idxChunkAcross = idxAcross_; idxChunk = idxChunkAcross + idxChunkDown*param->numberChunkAcross; - + if(idxChunkDown == param->numberChunkDown -1) { nWindowsDown = param->numberWindowDown - param->numberWindowDownInChunk*(param->numberChunkDown -1); } else { nWindowsDown = param->numberWindowDownInChunk; } - + if(idxChunkAcross == param->numberChunkAcross -1) { nWindowsAcross = param->numberWindowAcross - param->numberWindowAcrossInChunk*(param->numberChunkAcross -1); } @@ -137,20 +162,20 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_) nWindowsAcross = param->numberWindowAcrossInChunk; } //std::cout << "DEBUG setIndex" << idxChunk << " " << nWindowsDown << " " << nWindowsAcross << "\n"; - + } /// obtain the starting pixels for each chip -/// @param[in] oStartPixel +/// @param[in] oStartPixel /// void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff) { for(int i=0; inumberWindowDownInChunk; ++i) { int iDown = i; - if(i>=nWindowsDown) iDown = nWindowsDown-1; + if(i>=nWindowsDown) iDown = nWindowsDown-1; for(int j=0; jnumberWindowAcrossInChunk; ++j){ int iAcross = j; - if(j>=nWindowsAcross) iAcross = nWindowsAcross-1; + if(j>=nWindowsAcross) iAcross = nWindowsAcross-1; int idxInChunk = iDown*param->numberWindowAcrossInChunk+iAcross; int idxInAll = (iDown+idxChunkDown*param->numberWindowDownInChunk)*param->numberWindowAcross + idxChunkAcross*param->numberWindowAcrossInChunk+iAcross; @@ -158,108 +183,179 @@ void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel, //fprintf(stderr, "relative offset %d %d %d %d\n", i, j, rStartPixel[idxInChunk], diff); } } -} +} void cuAmpcorChunk::loadMasterChunk() { - //load a chunk from mmap to gpu - int startD = param->masterChunkStartPixelDown[idxChunk]; - int startA = param->masterChunkStartPixelAcross[idxChunk]; - int height = param->masterChunkHeight[idxChunk]; - int width = param->masterChunkWidth[idxChunk]; - masterImage->loadToDevice(c_masterChunkRaw->devData, startD, startA, height, width, stream); - std::cout << "debug load master: " << startD << " " << startA << " " << height << " " << width << "\n"; - //copy the chunk to a batch of images format (nImages, height, width) - //use cpu for some simple math + + // we first load the whole chunk of image from cpu to a gpu buffer c(r)_masterChunkRaw + // then copy to a batch of windows with (nImages, height, width) (leading dimension on the right) + + // get the chunk size to be loaded to gpu + int startD = param->masterChunkStartPixelDown[idxChunk]; //start pixel down (along height) + int startA = param->masterChunkStartPixelAcross[idxChunk]; // start pixel across (along width) + int height = param->masterChunkHeight[idxChunk]; // number of pixels along height + int width = param->masterChunkWidth[idxChunk]; // number of pixels along width + + //use cpu to compute the starting positions for each window getRelativeOffset(ChunkOffsetDown->hostData, param->masterStartPixelDown, param->masterChunkStartPixelDown[idxChunk]); + // copy the positions to gpu ChunkOffsetDown->copyToDevice(stream); + // same for the across direction getRelativeOffset(ChunkOffsetAcross->hostData, param->masterStartPixelAcross, param->masterChunkStartPixelAcross[idxChunk]); ChunkOffsetAcross->copyToDevice(stream); - // if derampMethod = 0 (no deramp), take amplitudes; otherwise, copy complex data - if(param->derampMethod == 0) { - cuArraysCopyToBatchAbsWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk], - c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); + + // check whether the image is complex (e.g., SLC) or real( e.g. TIFF) + if(masterImage->isComplex()) + { + // allocate a gpu buffer to load data from cpu/file + // try allocate/deallocate the buffer on the fly to save gpu memory 07/09/19 + c_masterChunkRaw = new cuArrays (param->maxMasterChunkHeight, param->maxMasterChunkWidth); + c_masterChunkRaw->allocate(); + + // load the data from cpu + masterImage->loadToDevice((void *)c_masterChunkRaw->devData, startD, startA, height, width, stream); + //std::cout << "debug load master: " << startD << " " << startA << " " << height << " " << width << "\n"; + + //copy the chunk to a batch format (nImages, height, width) + // if derampMethod = 0 (no deramp), take amplitudes; otherwise, copy complex data + if(param->derampMethod == 0) { + cuArraysCopyToBatchAbsWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk], + c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); + } + else { + cuArraysCopyToBatchWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk], + c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); + } + // deallocate the gpu buffer + delete c_masterChunkRaw; } + // if the image is real else { - cuArraysCopyToBatchWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk], - c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); + r_masterChunkRaw = new cuArrays (param->maxMasterChunkHeight, param->maxMasterChunkWidth); + r_masterChunkRaw->allocate(); + + // load the data from cpu + masterImage->loadToDevice((void *)r_masterChunkRaw->devData, startD, startA, height, width, stream); + + // copy the chunk (real) to a batch format (complex) + cuArraysCopyToBatchWithOffsetR2C(r_masterChunkRaw, param->masterChunkWidth[idxChunk], + c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); + // deallocate the gpu buffer + delete r_masterChunkRaw; } + + } void cuAmpcorChunk::loadSlaveChunk() { - //load a chunk from mmap to gpu - slaveImage->loadToDevice(c_slaveChunkRaw->devData, - param->slaveChunkStartPixelDown[idxChunk], - param->slaveChunkStartPixelAcross[idxChunk], - param->slaveChunkHeight[idxChunk], - param->slaveChunkWidth[idxChunk], - stream); + //copy to a batch format (nImages, height, width) getRelativeOffset(ChunkOffsetDown->hostData, param->slaveStartPixelDown, param->slaveChunkStartPixelDown[idxChunk]); ChunkOffsetDown->copyToDevice(stream); getRelativeOffset(ChunkOffsetAcross->hostData, param->slaveStartPixelAcross, param->slaveChunkStartPixelAcross[idxChunk]); ChunkOffsetAcross->copyToDevice(stream); - if(param->derampMethod == 0) { - cuArraysCopyToBatchAbsWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk], - c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); - } - else + + if(slaveImage->isComplex()) { - cuArraysCopyToBatchWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk], - c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); - } + c_slaveChunkRaw = new cuArrays (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth); + c_slaveChunkRaw->allocate(); + + //load a chunk from mmap to gpu + slaveImage->loadToDevice(c_slaveChunkRaw->devData, + param->slaveChunkStartPixelDown[idxChunk], + param->slaveChunkStartPixelAcross[idxChunk], + param->slaveChunkHeight[idxChunk], + param->slaveChunkWidth[idxChunk], + stream); + + if(param->derampMethod == 0) { + cuArraysCopyToBatchAbsWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk], + c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); + } + else { + cuArraysCopyToBatchWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk], + c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); + } + delete c_slaveChunkRaw; + } + else { //real image + //allocate the gpu buffer + r_slaveChunkRaw = new cuArrays (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth); + r_slaveChunkRaw->allocate(); + + //load a chunk from mmap to gpu + slaveImage->loadToDevice(r_slaveChunkRaw->devData, + param->slaveChunkStartPixelDown[idxChunk], + param->slaveChunkStartPixelAcross[idxChunk], + param->slaveChunkHeight[idxChunk], + param->slaveChunkWidth[idxChunk], + stream); + + // convert to the batch format + cuArraysCopyToBatchWithOffsetR2C(r_slaveChunkRaw, param->slaveChunkWidth[idxChunk], + c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); + delete r_slaveChunkRaw; + } } -cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_, - cuArrays *offsetImage_, cuArrays *snrImage_, cudaStream_t stream_) +cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *master_, GDALImage *slave_, + cuArrays *offsetImage_, cuArrays *snrImage_, cuArrays *covImage_, cuArrays *intImage1_, cuArrays *floatImage1_, cudaStream_t stream_) + { param = param_; masterImage = master_; - slaveImage = slave_; + slaveImage = slave_; offsetImage = offsetImage_; snrImage = snrImage_; + covImage = covImage_; + + intImage1 = intImage1_; + floatImage1 = floatImage1_; + stream = stream_; - - std::cout << "debug Chunk creator " << param->maxMasterChunkHeight << " " << param->maxMasterChunkWidth << "\n"; - c_masterChunkRaw = new cuArrays (param->maxMasterChunkHeight, param->maxMasterChunkWidth); - c_masterChunkRaw->allocate(); - - c_slaveChunkRaw = new cuArrays (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth); - c_slaveChunkRaw->allocate(); - + + // std::cout << "debug Chunk creator " << param->maxMasterChunkHeight << " " << param->maxMasterChunkWidth << "\n"; + // try allocate/deallocate on the fly to save gpu memory 07/09/19 + // c_masterChunkRaw = new cuArrays (param->maxMasterChunkHeight, param->maxMasterChunkWidth); + // c_masterChunkRaw->allocate(); + + // c_slaveChunkRaw = new cuArrays (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth); + // c_slaveChunkRaw->allocate(); + ChunkOffsetDown = new cuArrays (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); ChunkOffsetDown->allocate(); ChunkOffsetDown->allocateHost(); ChunkOffsetAcross = new cuArrays (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); ChunkOffsetAcross->allocate(); ChunkOffsetAcross->allocateHost(); - + c_masterBatchRaw = new cuArrays ( param->windowSizeHeightRaw, param->windowSizeWidthRaw, param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); c_masterBatchRaw->allocate(); - + c_slaveBatchRaw = new cuArrays ( param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw, param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); c_slaveBatchRaw->allocate(); - + r_masterBatchRaw = new cuArrays ( param->windowSizeHeightRaw, param->windowSizeWidthRaw, param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); r_masterBatchRaw->allocate(); - + r_slaveBatchRaw = new cuArrays ( param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw, param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); r_slaveBatchRaw->allocate(); - + c_slaveBatchZoomIn = new cuArrays ( param->searchWindowSizeHeightRawZoomIn, param->searchWindowSizeWidthRawZoomIn, param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); c_slaveBatchZoomIn->allocate(); - + c_masterBatchOverSampled = new cuArrays ( param->windowSizeHeight, param->windowSizeWidth, param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); @@ -269,7 +365,7 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm param->searchWindowSizeHeight, param->searchWindowSizeWidth, param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); c_slaveBatchOverSampled->allocate(); - + r_masterBatchOverSampled = new cuArrays ( param->windowSizeHeight, param->windowSizeWidth, param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); @@ -279,66 +375,114 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm param->searchWindowSizeHeight, param->searchWindowSizeWidth, param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); r_slaveBatchOverSampled->allocate(); - + masterBatchOverSampler = new cuOverSamplerC2C( c_masterBatchRaw->height, c_masterBatchRaw->width, //orignal size - c_masterBatchOverSampled->height, c_masterBatchOverSampled->width, //oversampled size + c_masterBatchOverSampled->height, c_masterBatchOverSampled->width, //oversampled size c_masterBatchRaw->count, stream); - - slaveBatchOverSampler = new cuOverSamplerC2C(c_slaveBatchZoomIn->height, c_slaveBatchZoomIn->width, + + slaveBatchOverSampler = new cuOverSamplerC2C(c_slaveBatchZoomIn->height, c_slaveBatchZoomIn->width, c_slaveBatchOverSampled->height, c_slaveBatchOverSampled->width, c_slaveBatchRaw->count, stream); - + r_corrBatchRaw = new cuArrays ( - param->searchWindowSizeHeightRaw-param->windowSizeHeightRaw+1, - param->searchWindowSizeWidthRaw-param->windowSizeWidthRaw+1, - param->numberWindowDownInChunk, + param->searchWindowSizeHeightRaw-param->windowSizeHeightRaw+1, + param->searchWindowSizeWidthRaw-param->windowSizeWidthRaw+1, + param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); r_corrBatchRaw->allocate(); - + r_corrBatchZoomIn = new cuArrays ( - param->searchWindowSizeHeight - param->windowSizeHeight+1, - param->searchWindowSizeWidth - param->windowSizeWidth+1, - param->numberWindowDownInChunk, + param->searchWindowSizeHeight - param->windowSizeHeight+1, + param->searchWindowSizeWidth - param->windowSizeWidth+1, + param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); r_corrBatchZoomIn->allocate(); - + r_corrBatchZoomInAdjust = new cuArrays ( - param->searchWindowSizeHeight - param->windowSizeHeight, - param->searchWindowSizeWidth - param->windowSizeWidth, - param->numberWindowDownInChunk, + param->searchWindowSizeHeight - param->windowSizeHeight, + param->searchWindowSizeWidth - param->windowSizeWidth, + param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); r_corrBatchZoomInAdjust->allocate(); - - + + r_corrBatchZoomInOverSampled = new cuArrays ( - param->zoomWindowSize * param->oversamplingFactor, param->zoomWindowSize * param->oversamplingFactor, - param->numberWindowDownInChunk, + param->zoomWindowSize * param->oversamplingFactor, + param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); r_corrBatchZoomInOverSampled->allocate(); - + offsetInit = new cuArrays (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); offsetInit->allocate(); - + offsetZoomIn = new cuArrays (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); offsetZoomIn->allocate(); - + offsetFinal = new cuArrays (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); offsetFinal->allocate(); - + corrMaxValue = new cuArrays (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); corrMaxValue->allocate(); - + + + // new arrays due to snr estimation + std::cout<< "corrRawZoomInHeight: " << param->corrRawZoomInHeight << "\n"; + std::cout<< "corrRawZoomInWidth: " << param->corrRawZoomInWidth << "\n"; + + r_corrBatchRawZoomIn = new cuArrays ( + param->corrRawZoomInHeight, + param->corrRawZoomInWidth, + param->numberWindowDownInChunk, + param->numberWindowAcrossInChunk); + r_corrBatchRawZoomIn->allocate(); + + i_corrBatchZoomInValid = new cuArrays ( + param->corrRawZoomInHeight, + param->corrRawZoomInWidth, + param->numberWindowDownInChunk, + param->numberWindowAcrossInChunk); + i_corrBatchZoomInValid->allocate(); + + + r_corrBatchSum = new cuArrays ( + param->numberWindowDownInChunk, + param->numberWindowAcrossInChunk); + r_corrBatchSum->allocate(); + + i_corrBatchValidCount = new cuArrays ( + param->numberWindowDownInChunk, + param->numberWindowAcrossInChunk); + i_corrBatchValidCount->allocate(); + + i_maxloc = new cuArrays (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); + + i_maxloc->allocate(); + + r_maxval = new cuArrays (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); + + r_maxval->allocate(); + + r_snrValue = new cuArrays (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); + + r_snrValue->allocate(); + + r_covValue = new cuArrays (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); + + r_covValue->allocate(); + + // end of new arrays + if(param->oversamplingMethod) { corrSincOverSampler = new cuSincOverSamplerR2R(param->zoomWindowSize, param->oversamplingFactor, stream); } - else { + else { corrOverSampler= new cuOverSamplerR2R(param->zoomWindowSize, param->zoomWindowSize, - (param->zoomWindowSize)*param->oversamplingFactor, + (param->zoomWindowSize)*param->oversamplingFactor, (param->zoomWindowSize)*param->oversamplingFactor, - param->numberWindowDownInChunk*param->numberWindowAcrossInChunk, - stream); - } + param->numberWindowDownInChunk*param->numberWindowAcrossInChunk, + stream); + } if(param->algorithm == 0) { cuCorrFreqDomain = new cuFreqCorrelator( param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw, @@ -347,10 +491,10 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm cuCorrFreqDomain_OverSampled = new cuFreqCorrelator( param->searchWindowSizeHeight, param->searchWindowSizeWidth, param->numberWindowDownInChunk*param->numberWindowAcrossInChunk, - stream); + stream); } - + debugmsg("all objects in chunk are created ...\n"); diff --git a/contrib/PyCuAmpcor/src/cuAmpcorChunk.h b/contrib/PyCuAmpcor/src/cuAmpcorChunk.h index 0de0aed..9ca2766 100644 --- a/contrib/PyCuAmpcor/src/cuAmpcorChunk.h +++ b/contrib/PyCuAmpcor/src/cuAmpcorChunk.h @@ -1,4 +1,4 @@ -/* +/* * cuAmpcorChunk.h * Purpose: a group of chips processed at the same time */ @@ -6,7 +6,7 @@ #ifndef __CUAMPCORCHUNK_H #define __CUAMPCORCHUNK_H -#include "SlcImage.h" +#include "GDALImage.h" #include "cuArrays.h" #include "cuAmpcorParameter.h" #include "cuOverSampler.h" @@ -22,64 +22,81 @@ private: int nWindowsAcross; int devId; - cudaStream_t stream; - - SlcImage *masterImage; - SlcImage *slaveImage; + cudaStream_t stream; + + GDALImage *masterImage; + GDALImage *slaveImage; cuAmpcorParameter *param; cuArrays *offsetImage; cuArrays *snrImage; - - cuArrays * c_masterChunkRaw, * c_slaveChunkRaw; + cuArrays *covImage; + + // added for test + cuArrays *intImage1; + cuArrays *floatImage1; + + // gpu buffer + cuArrays * c_masterChunkRaw, * c_slaveChunkRaw; + cuArrays * r_masterChunkRaw, * r_slaveChunkRaw; + + // gpu windows raw data cuArrays * c_masterBatchRaw, * c_slaveBatchRaw, * c_slaveBatchZoomIn; cuArrays * r_masterBatchRaw, * r_slaveBatchRaw; - cuArrays * c_masterBatchOverSampled, * c_slaveBatchOverSampled; + + // gpu windows oversampled data + cuArrays * c_masterBatchOverSampled, * c_slaveBatchOverSampled; cuArrays * r_masterBatchOverSampled, * r_slaveBatchOverSampled; cuArrays * r_corrBatchRaw, * r_corrBatchZoomIn, * r_corrBatchZoomInOverSampled, * r_corrBatchZoomInAdjust; - + cuArrays *ChunkOffsetDown, *ChunkOffsetAcross; - + cuOverSamplerC2C *masterBatchOverSampler, *slaveBatchOverSampler; - + cuOverSamplerR2R *corrOverSampler; - cuSincOverSamplerR2R *corrSincOverSampler; - + cuSincOverSamplerR2R *corrSincOverSampler; + //for frequency domain cuFreqCorrelator *cuCorrFreqDomain, *cuCorrFreqDomain_OverSampled; - + cuArrays *offsetInit; cuArrays *offsetZoomIn; cuArrays *offsetFinal; + cuArrays *corrMaxValue; + + + //SNR estimation + + cuArrays *r_corrBatchRawZoomIn; + cuArrays *r_corrBatchSum; + cuArrays *i_corrBatchZoomInValid, *i_corrBatchValidCount; + + cuArrays *r_snrValue; - //corr statistics cuArrays *i_maxloc; cuArrays *r_maxval; - - cuArrays *r_corrBatchSum; - cuArrays *i_corrBatchZoomInValid, *i_corrBatchValidCount; - - cuArrays *corrMaxValue; - cuArrays *r_snrValue; - + + // Varince estimation. + cuArrays *r_covValue; + public: cuAmpcorChunk() {} //cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_); - + void setIndex(int idxDown_, int idxAcross_); + cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *master_, GDALImage *slave_, cuArrays *offsetImage_, + cuArrays *snrImage_, cuArrays *covImage_, cuArrays *intImage1_, cuArrays *floatImage1_, cudaStream_t stream_); + - cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_, cuArrays *offsetImage_, - cuArrays *snrImage_, cudaStream_t stream_); - void loadMasterChunk(); void loadSlaveChunk(); void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff); - - ~cuAmpcorChunk(); - - void run(int, int); + + ~cuAmpcorChunk(); + + void run(int, int); }; -#endif +#endif diff --git a/contrib/PyCuAmpcor/src/cuAmpcorController.cu b/contrib/PyCuAmpcor/src/cuAmpcorController.cu index 4798716..e9a6c33 100644 --- a/contrib/PyCuAmpcor/src/cuAmpcorController.cu +++ b/contrib/PyCuAmpcor/src/cuAmpcorController.cu @@ -1,113 +1,142 @@ // Implementation of cuAmpcorController #include "cuAmpcorController.h" -#include "SlcImage.h" +#include "GDALImage.h" #include "cuArrays.h" #include "cudaUtil.h" #include "cuAmpcorChunk.h" #include "cuAmpcorUtil.h" #include -cuAmpcorController::cuAmpcorController() { param = new cuAmpcorParameter();} -cuAmpcorController::~cuAmpcorController() { delete param; } +cuAmpcorController::cuAmpcorController() { param = new cuAmpcorParameter();} +cuAmpcorController::~cuAmpcorController() { delete param; } -void cuAmpcorController::runAmpcor() { - +void cuAmpcorController::runAmpcor() { + + // set the gpu id param->deviceID = gpuDeviceInit(param->deviceID); - SlcImage *masterImage; - SlcImage *slaveImage; - + // initialize the gdal driver + GDALAllRegister(); + // master and slave images; use band=1 as default + // TODO: selecting band + GDALImage *masterImage = new GDALImage(param->masterImageName, 1, param->mmapSizeInGB); + GDALImage *slaveImage = new GDALImage(param->slaveImageName, 1, param->mmapSizeInGB); + cuArrays *offsetImage, *offsetImageRun; cuArrays *snrImage, *snrImageRun; - - -// cuArrays *floatImage; -// cuArrays *intImage; + cuArrays *covImage, *covImageRun; + + // For debugging. + cuArrays *intImage1; + cuArrays *floatImage1; + + int nWindowsDownRun = param->numberChunkDown * param->numberWindowDownInChunk; + int nWindowsAcrossRun = param->numberChunkAcross * param->numberWindowAcrossInChunk; - masterImage = new SlcImage(param->masterImageName, param->masterImageHeight, param->masterImageWidth, param->mmapSizeInGB); - slaveImage = new SlcImage(param->slaveImageName, param->slaveImageHeight, param->slaveImageWidth, param->mmapSizeInGB); - - int nWindowsDownRun = param->numberChunkDown*param->numberWindowDownInChunk; - int nWindowsAcrossRun = param->numberChunkAcross*param->numberWindowAcrossInChunk; - std::cout << "Debug " << nWindowsDownRun << " " << param->numberWindowDown << "\n"; - + offsetImageRun = new cuArrays(nWindowsDownRun, nWindowsAcrossRun); - snrImageRun = new cuArrays(nWindowsDownRun, nWindowsAcrossRun); offsetImageRun->allocate(); + + snrImageRun = new cuArrays(nWindowsDownRun, nWindowsAcrossRun); snrImageRun->allocate(); - + + covImageRun = new cuArrays(nWindowsDownRun, nWindowsAcrossRun); + covImageRun->allocate(); + + // intImage 1 and floatImage 1 are added for debugging issues + + intImage1 = new cuArrays(nWindowsDownRun, nWindowsAcrossRun); + intImage1->allocate(); + + floatImage1 = new cuArrays(nWindowsDownRun, nWindowsAcrossRun); + floatImage1->allocate(); + + // Offsetfields. offsetImage = new cuArrays(param->numberWindowDown, param->numberWindowAcross); - snrImage = new cuArrays(param->numberWindowDown, param->numberWindowAcross); offsetImage->allocate(); + + // SNR. + snrImage = new cuArrays(param->numberWindowDown, param->numberWindowAcross); snrImage->allocate(); -// Minyan Zhong -// floatImage = new cuArrays(param->numberWindowDown, param->numberWindowAcross); -// intImage = new cuArrays(param->numberWindowDown, param->numberWindowAcross); + // Variance. + covImage = new cuArrays(param->numberWindowDown, param->numberWindowAcross); + covImage->allocate(); -// floatImage->allocate(); -// intImage->allocate(); -// cudaStream_t streams[param->nStreams]; cuAmpcorChunk *chunk[param->nStreams]; - for(int ist=0; istnStreams; ist++) + for(int ist=0; istnStreams; ist++) { cudaStreamCreate(&streams[ist]); - chunk[ist]= new cuAmpcorChunk(param, masterImage, slaveImage, offsetImageRun, snrImageRun, streams[ist]); + chunk[ist]= new cuAmpcorChunk(param, masterImage, slaveImage, offsetImageRun, snrImageRun, covImageRun, intImage1, floatImage1, streams[ist]); + } - + int nChunksDown = param->numberChunkDown; - int nChunksAcross = param->numberChunkAcross; - + int nChunksAcross = param->numberChunkAcross; + std::cout << "Total number of windows (azimuth x range): " <numberWindowDown << " x " << param->numberWindowAcross << std::endl; std::cout << "to be processed in the number of chunks: " <nStreams) { //std::cout << "Processing chunk(" << i <<", " << j <<")" << std::endl; for(int ist = 0; istnStreams; ist++) - { + { if(j+ist < nChunksAcross) { - + chunk[ist]->run(i, j+ist); } - } + } } } - + cudaDeviceSynchronize(); - + + // Do extraction. cuArraysCopyExtract(offsetImageRun, offsetImage, make_int2(0,0), streams[0]); - cuArraysCopyExtract(snrImageRun, snrImage, make_int2(0,0), streams[0]); - + cuArraysCopyExtract(snrImageRun, snrImage, make_int2(0,0), streams[0]); + cuArraysCopyExtract(covImageRun, covImage, make_int2(0,0), streams[0]); + offsetImage->outputToFile(param->offsetImageName, streams[0]); snrImage->outputToFile(param->snrImageName, streams[0]); + covImage->outputToFile(param->covImageName, streams[0]); -// Minyan Zhong -// floatImage->allocate(); -// intImage->allocate(); -// + // Output debugging arrays. + intImage1->outputToFile("intImage1", streams[0]); + floatImage1->outputToFile("floatImage1", streams[0]); outputGrossOffsets(); + + // Delete arrays. delete offsetImage; delete snrImage; + delete covImage; + + delete intImage1; + delete floatImage1; + delete offsetImageRun; delete snrImageRun; + delete covImageRun; + for (int ist=0; istnStreams; ist++) delete chunk[ist]; + delete masterImage; - delete slaveImage; -} + delete slaveImage; + +} void cuAmpcorController::outputGrossOffsets() { cuArrays *grossOffsets = new cuArrays(param->numberWindowDown, param->numberWindowAcross); grossOffsets->allocateHost(); - + for(int i=0; i< param->numberWindows; i++) grossOffsets->hostData[i] = make_float2(param->grossOffsetDown[i], param->grossOffsetAcross[i]); grossOffsets->outputHostToFile(param->grossOffsetImageName); @@ -176,7 +205,7 @@ void cuAmpcorController::setGrossOffsets(int *in, int size) { param->grossOffsets = (int *)malloc(size*sizeof(int)); mempcpy(param->grossOffsets, in, size*sizeof(int)); fprintf(stderr, "copy grossOffsets %d\n", size); -} +} void cuAmpcorController::setOffsetImageName(std::string s) { param->offsetImageName = s; } void cuAmpcorController::setSNRImageName(std::string s) { param->snrImageName = s; } //void cuAmpcorController::setMargin(int n) { param->margin = n; } diff --git a/contrib/PyCuAmpcor/src/cuAmpcorParameter.cu b/contrib/PyCuAmpcor/src/cuAmpcorParameter.cu index 1f42340..d50f5d3 100644 --- a/contrib/PyCuAmpcor/src/cuAmpcorParameter.cu +++ b/contrib/PyCuAmpcor/src/cuAmpcorParameter.cu @@ -1,6 +1,6 @@ /** * cuAmpcorParameter.cu - * Input parameters for ampcor + * Input parameters for ampcor */ #include "cuAmpcorParameter.h" @@ -11,17 +11,19 @@ #endif /// -/// Constructor for cuAmpcorParameter class +/// Constructor for cuAmpcorParameter class /// also sets the default/initial values of various parameters /// - + cuAmpcorParameter::cuAmpcorParameter() { - algorithm = 0; //0 freq; 1 time - deviceID = 0; - nStreams = 1; + // default settings + // will be changed if they are set by python scripts + algorithm = 0; //0 freq; 1 time + deviceID = 0; + nStreams = 1; derampMethod = 1; - + windowSizeWidthRaw = 64; windowSizeHeightRaw = 64; halfSearchRangeDownRaw = 20; @@ -31,9 +33,9 @@ cuAmpcorParameter::cuAmpcorParameter() skipSampleDownRaw = 64; rawDataOversamplingFactor = 2; zoomWindowSize = 8; - oversamplingFactor = 16; - oversamplingMethod = 0; - + oversamplingFactor = 16; + oversamplingMethod = 0; + masterImageName = "master.slc"; masterImageWidth = 1000; masterImageHeight = 1000; @@ -43,50 +45,58 @@ cuAmpcorParameter::cuAmpcorParameter() offsetImageName = "DenseOffset.off"; grossOffsetImageName = "GrossOffset.off"; snrImageName = "snr.snr"; + covImageName = "cov.cov"; numberWindowDown = 1; - numberWindowAcross = 1; + numberWindowAcross = 1; numberWindowDownInChunk = 1; - numberWindowAcrossInChunk = 1 ; - + numberWindowAcrossInChunk = 1 ; + masterStartPixelDown0 = 0; masterStartPixelAcross0 = 0; + + corrRawZoomInHeight = 17; // 8*2+1 + corrRawZoomInWidth = 17; + + useMmap = 1; // use mmap + mmapSizeInGB = 1; + } /** - * To determine other process parameters after reading essential parameters from python - */ + * To determine other process parameters after reading essential parameters from python + */ void cuAmpcorParameter::setupParameters() -{ +{ zoomWindowSize *= rawDataOversamplingFactor; //8 * 2 - halfZoomWindowSizeRaw = zoomWindowSize/(2*rawDataOversamplingFactor); // 8*2/(2*2) = 4 - + halfZoomWindowSizeRaw = zoomWindowSize/(2*rawDataOversamplingFactor); // 8*2/(2*2) = 4 + windowSizeWidth = windowSizeWidthRaw*rawDataOversamplingFactor; // windowSizeHeight = windowSizeHeightRaw*rawDataOversamplingFactor; - searchWindowSizeWidthRaw = windowSizeWidthRaw + 2*halfSearchRangeDownRaw; + searchWindowSizeWidthRaw = windowSizeWidthRaw + 2*halfSearchRangeDownRaw; searchWindowSizeHeightRaw = windowSizeHeightRaw + 2*halfSearchRangeAcrossRaw; - + searchWindowSizeWidthRawZoomIn = windowSizeWidthRaw + 2*halfZoomWindowSizeRaw; searchWindowSizeHeightRawZoomIn = windowSizeHeightRaw + 2*halfZoomWindowSizeRaw; - + searchWindowSizeWidth = searchWindowSizeWidthRawZoomIn*rawDataOversamplingFactor; searchWindowSizeHeight = searchWindowSizeHeightRawZoomIn*rawDataOversamplingFactor; - + numberWindows = numberWindowDown*numberWindowAcross; if(numberWindows <=0) { fprintf(stderr, "Incorrect number of windows! (%d, %d)\n", numberWindowDown, numberWindowAcross); exit(EXIT_FAILURE); - } - + } + // modified 02/12/2018 to include one more chunk // e.g. numberWindowDownInChunk=102, numberWindowDown=10, results in numberChunkDown=11 - // the last chunk will include 2 windows, numberWindowDownInChunkRun = 2. - + // the last chunk will include 2 windows, numberWindowDownInChunkRun = 2. + numberChunkDown = IDIVUP(numberWindowDown, numberWindowDownInChunk); numberChunkAcross = IDIVUP(numberWindowAcross, numberWindowAcrossInChunk); numberChunks = numberChunkDown*numberChunkAcross; - allocateArrays(); + allocateArrays(); } @@ -99,7 +109,7 @@ void cuAmpcorParameter::allocateArrays() masterStartPixelAcross = (int *)malloc(arraySize); slaveStartPixelDown = (int *)malloc(arraySize); slaveStartPixelAcross = (int *)malloc(arraySize); - + int arraySizeChunk = numberChunks*sizeof(int); masterChunkStartPixelDown = (int *)malloc(arraySizeChunk); masterChunkStartPixelAcross = (int *)malloc(arraySizeChunk); @@ -130,18 +140,18 @@ void cuAmpcorParameter::deallocateArrays() } -/// Set starting pixels for master and slave windows from arrays +/// Set starting pixels for master and slave windows from arrays /// set also gross offsets between master and slave windows -/// +/// void cuAmpcorParameter::setStartPixels(int *mStartD, int *mStartA, int *gOffsetD, int *gOffsetA) { for(int i=0; i vpixel) mChunkSD = vpixel; + if(mChunkSD > vpixel) mChunkSD = vpixel; if(mChunkED < vpixel) mChunkED = vpixel; vpixel = masterStartPixelAcross[idxWindow]; - if(mChunkSA > vpixel) mChunkSA = vpixel; + if(mChunkSA > vpixel) mChunkSA = vpixel; if(mChunkEA < vpixel) mChunkEA = vpixel; vpixel = slaveStartPixelDown[idxWindow]; - if(sChunkSD > vpixel) sChunkSD = vpixel; + if(sChunkSD > vpixel) sChunkSD = vpixel; if(sChunkED < vpixel) sChunkED = vpixel; vpixel = slaveStartPixelAcross[idxWindow]; - if(sChunkSA > vpixel) sChunkSA = vpixel; + if(sChunkSA > vpixel) sChunkSA = vpixel; if(sChunkEA < vpixel) sChunkEA = vpixel; } } @@ -261,58 +271,58 @@ void cuAmpcorParameter::checkPixelInImageRange() for(int col = 0; col < numberWindowAcross; col++) { int i = row*numberWindowAcross + col; - if(masterStartPixelDown[i] <0) + if(masterStartPixelDown[i] <0) { fprintf(stderr, "Master Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, masterStartPixelDown[i]); exit(EXIT_FAILURE); //or raise range error - } - if(masterStartPixelAcross[i] <0) + } + if(masterStartPixelAcross[i] <0) { fprintf(stderr, "Master Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, masterStartPixelAcross[i]); exit(EXIT_FAILURE); - } - endPixel = masterStartPixelDown[i] + windowSizeHeightRaw; - if(endPixel >= masterImageHeight) + } + endPixel = masterStartPixelDown[i] + windowSizeHeightRaw; + if(endPixel >= masterImageHeight) { fprintf(stderr, "Master Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel); exit(EXIT_FAILURE); - } - endPixel = masterStartPixelAcross[i] + windowSizeWidthRaw; - if(endPixel >= masterImageWidth) + } + endPixel = masterStartPixelAcross[i] + windowSizeWidthRaw; + if(endPixel >= masterImageWidth) { fprintf(stderr, "Master Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel); exit(EXIT_FAILURE); - } + } //slave - if(slaveStartPixelDown[i] <0) + if(slaveStartPixelDown[i] <0) { fprintf(stderr, "Slave Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, slaveStartPixelDown[i]); - exit(EXIT_FAILURE); - } - if(slaveStartPixelAcross[i] <0) + exit(EXIT_FAILURE); + } + if(slaveStartPixelAcross[i] <0) { fprintf(stderr, "Slave Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, slaveStartPixelAcross[i]); exit(EXIT_FAILURE); - } - endPixel = slaveStartPixelDown[i] + searchWindowSizeHeightRaw; - if(endPixel >= slaveImageHeight) + } + endPixel = slaveStartPixelDown[i] + searchWindowSizeHeightRaw; + if(endPixel >= slaveImageHeight) { fprintf(stderr, "Slave Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel); exit(EXIT_FAILURE); - } - endPixel = slaveStartPixelAcross[i] + searchWindowSizeWidthRaw; - if(endPixel >= slaveImageWidth) + } + endPixel = slaveStartPixelAcross[i] + searchWindowSizeWidthRaw; + if(endPixel >= slaveImageWidth) { fprintf(stderr, "Slave Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel); exit(EXIT_FAILURE); - } - - } + } + + } } } -cuAmpcorParameter::~cuAmpcorParameter() +cuAmpcorParameter::~cuAmpcorParameter() { deallocateArrays(); } diff --git a/contrib/PyCuAmpcor/src/cuAmpcorParameter.h b/contrib/PyCuAmpcor/src/cuAmpcorParameter.h index 6c258b7..eed1027 100644 --- a/contrib/PyCuAmpcor/src/cuAmpcorParameter.h +++ b/contrib/PyCuAmpcor/src/cuAmpcorParameter.h @@ -1,7 +1,7 @@ /** * cuAmpcorParameter.h * Header file for Ampcor Parameter Class - * + * * Author: Lijun Zhu @ Seismo Lab, Caltech * March 2017 */ @@ -12,13 +12,13 @@ #include /// Class container for all parameters -/// +/// /// @note -/// The dimension/direction names used are: +/// The dimension/direction names used are: /// The inner-most dimension: x, row, height, down, azimuth, along the track. /// The outer-most dimension: y, column, width, across, range, along the sight. -/// C/C++/Python use row-major indexing: a[i][j] -> a[i*WIDTH+j] -/// FORTRAN/BLAS/CUBLAS use column-major indexing: a[i][j]->a[i+j*LENGTH] +/// C/C++/Python use row-major indexing: a[i][j] -> a[i*WIDTH+j] +/// FORTRAN/BLAS/CUBLAS use column-major indexing: a[i][j]->a[i+j*LENGTH] /// @note /// Common procedures to use cuAmpcorParameter @@ -27,72 +27,74 @@ /// 3. Call setupParameters() to determine related parameters and allocate starting pixels for each window: param->setupParameters() /// 4. Provide/set Master window starting pixel(s), and gross offset(s): param->setStartPixels(masterStartDown, masterStartAcross, grossOffsetDown, grossOffsetAcross) /// 4a. Optionally, check the range of windows is within the SLC image range: param->checkPixelInImageRange() -/// Steps 1, 3, 4 are mandatory. If step 2 is missing, default values will be used +/// Steps 1, 3, 4 are mandatory. If step 2 is missing, default values will be used class cuAmpcorParameter{ public: - int algorithm; /// Cross-correlation algorithm: 0=freq domain (default) 1=time domain - int deviceID; /// Targeted GPU device ID: use -1 to auto select - int nStreams; /// Number of streams to asynchonize data transfers and compute kernels + int algorithm; /// Cross-correlation algorithm: 0=freq domain (default) 1=time domain + int deviceID; /// Targeted GPU device ID: use -1 to auto select + int nStreams; /// Number of streams to asynchonize data transfers and compute kernels int derampMethod; /// Method for deramping 0=None, 1=average, 2=phase gradient - + // chip or window size for raw data int windowSizeHeightRaw; /// Template window height (original size) - int windowSizeWidthRaw; /// Template window width (original size) - int searchWindowSizeHeightRaw; /// Search window height (original size) + int windowSizeWidthRaw; /// Template window width (original size) + int searchWindowSizeHeightRaw; /// Search window height (original size) int searchWindowSizeWidthRaw; /// Search window width (orignal size) - - int halfSearchRangeDownRaw; ///(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2 + + int halfSearchRangeDownRaw; ///(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2 int halfSearchRangeAcrossRaw; ///(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2 // search range is (-halfSearchRangeRaw, halfSearchRangeRaw) - + int searchWindowSizeHeightRawZoomIn; int searchWindowSizeWidthRawZoomIn; - - + + int corrRawZoomInHeight; // window to estimate snr + int corrRawZoomInWidth; + // chip or window size after oversampling int rawDataOversamplingFactor; /// Raw data overampling factor (from original size to oversampled size) int windowSizeHeight; /// Template window length (oversampled size) int windowSizeWidth; /// Template window width (original size) - int searchWindowSizeHeight; /// Search window height (oversampled size) - int searchWindowSizeWidth; /// Search window width (oversampled size) - - // strides between chips/windows + int searchWindowSizeHeight; /// Search window height (oversampled size) + int searchWindowSizeWidth; /// Search window width (oversampled size) + + // strides between chips/windows int skipSampleDownRaw; /// Skip size between neighboring windows in Down direction (original size) int skipSampleAcrossRaw; /// Skip size between neighboring windows in across direction (original size) //int skipSampleDown; /// Skip size between neighboring windows in Down direction (oversampled size) //int skipSampleAcross; /// Skip size between neighboring windows in Across direction (oversampled size) - + // Zoom in region near location of max correlation - int zoomWindowSize; /// Zoom-in window size in correlation surface (same for down and across directions) - int halfZoomWindowSizeRaw; /// = half of zoomWindowSize/rawDataOversamplingFactor - - - + int zoomWindowSize; /// Zoom-in window size in correlation surface (same for down and across directions) + int halfZoomWindowSizeRaw; /// = half of zoomWindowSize/rawDataOversamplingFactor + + + int oversamplingFactor; /// Oversampling factor for interpolating correlation surface - int oversamplingMethod; /// 0 = fft (default) 1 = sinc - - - float thresholdSNR; /// Threshold of Signal noise ratio to remove noisy data - + int oversamplingMethod; /// 0 = fft (default) 1 = sinc + + + float thresholdSNR; /// Threshold of Signal noise ratio to remove noisy data + //master image std::string masterImageName; /// master SLC image name int imageDataType1; /// master image data type, 2=cfloat=complex=float2 1=float - int masterImageHeight; /// master image height + int masterImageHeight; /// master image height int masterImageWidth; /// master image width - + //slave image std::string slaveImageName; /// slave SLC image name int imageDataType2; /// slave image data type, 2=cfloat=complex=float2 1=float - int slaveImageHeight; /// slave image height + int slaveImageHeight; /// slave image height int slaveImageWidth; /// slave image width - + // total number of chips/windows int numberWindowDown; /// number of total windows (down) int numberWindowAcross; /// number of total windows (across) int numberWindows; /// numberWindowDown*numberWindowAcross - + // number of chips/windows in a batch/chunk int numberWindowDownInChunk; /// number of windows processed in a chunk (down) int numberWindowAcrossInChunk; /// number of windows processed in a chunk (across) @@ -100,20 +102,21 @@ public: int numberChunkDown; /// number of chunks (down) int numberChunkAcross; /// number of chunks (across) int numberChunks; - - int mmapSizeInGB; - + + int useMmap; /// whether to use mmap 0=not 1=yes (default = 0) + int mmapSizeInGB; /// size for mmap buffer(useMmap=1) or a cpu memory buffer (useMmap=0) + int masterStartPixelDown0; int masterStartPixelAcross0; - int *masterStartPixelDown; /// master starting pixels for each window (down) + int *masterStartPixelDown; /// master starting pixels for each window (down) int *masterStartPixelAcross;/// master starting pixels for each window (across) - int *slaveStartPixelDown; /// slave starting pixels for each window (down) - int *slaveStartPixelAcross; /// slave starting pixels for each window (across) + int *slaveStartPixelDown; /// slave starting pixels for each window (down) + int *slaveStartPixelAcross; /// slave starting pixels for each window (across) int grossOffsetDown0; int grossOffsetAcross0; int *grossOffsetDown; /// Gross offsets between master and slave windows (down) : slaveStartPixel - masterStartPixel - int *grossOffsetAcross; /// Gross offsets between master and slave windows (across) - + int *grossOffsetAcross; /// Gross offsets between master and slave windows (across) + int *masterChunkStartPixelDown; int *masterChunkStartPixelAcross; int *slaveChunkStartPixelDown; @@ -124,18 +127,19 @@ public: int *slaveChunkWidth; int maxMasterChunkHeight, maxMasterChunkWidth; int maxSlaveChunkHeight, maxSlaveChunkWidth; - - std::string grossOffsetImageName; - std::string offsetImageName; /// Output Offset fields filename + + std::string grossOffsetImageName; + std::string offsetImageName; /// Output Offset fields filename std::string snrImageName; /// Output SNR filename - + std::string covImageName; + cuAmpcorParameter(); /// Class constructor and default parameters setter - ~cuAmpcorParameter(); /// Class descontructor - + ~cuAmpcorParameter(); /// Class descontructor + void allocateArrays(); /// Allocate various arrays after the number of Windows is given void deallocateArrays(); /// Deallocate arrays on exit - + /// Three methods to set master/slave starting pixels and gross offsets from input master start pixel(s) and gross offset(s) /// 1 (int *, int *, int *, int *): varying master start pixels and gross offsets /// 2 (int, int, int *, int *): fixed master start pixel (first window) and varying gross offsets @@ -144,7 +148,7 @@ public: void setStartPixels(int, int, int*, int*); void setStartPixels(int, int, int, int); void setChunkStartPixels(); - void checkPixelInImageRange(); /// check whether + void checkPixelInImageRange(); /// check whether void setupParameters(); /// Process other parameters after Python Input }; diff --git a/contrib/PyCuAmpcor/src/cuAmpcorUtil.h b/contrib/PyCuAmpcor/src/cuAmpcorUtil.h index a8c8a2e..fe20b19 100644 --- a/contrib/PyCuAmpcor/src/cuAmpcorUtil.h +++ b/contrib/PyCuAmpcor/src/cuAmpcorUtil.h @@ -1,10 +1,10 @@ -/* +/* * cuAmpcorUtil.h * header file to include the various routines for ampcor - * serves as an index + * serves as an index */ - - + + #ifndef __CUAMPCORUTIL_H #define __CUMAPCORUTIL_H @@ -18,20 +18,27 @@ //in cuArraysCopy.cu: various utitlies for copy images file in gpu memory void cuArraysCopyToBatch(cuArrays *image1, cuArrays *image2, int strideH, int strideW, cudaStream_t stream); -void cuArraysCopyToBatchWithOffset(cuArrays *image1, const int lda1, cuArrays *image2, +void cuArraysCopyToBatchWithOffset(cuArrays *image1, const int lda1, cuArrays *image2, const int *offsetH, const int* offsetW, cudaStream_t stream); -void cuArraysCopyToBatchAbsWithOffset(cuArrays *image1, const int lda1, cuArrays *image2, +void cuArraysCopyToBatchAbsWithOffset(cuArrays *image1, const int lda1, cuArrays *image2, + const int *offsetH, const int* offsetW, cudaStream_t stream); +void cuArraysCopyToBatchWithOffsetR2C(cuArrays *image1, const int lda1, cuArrays *image2, const int *offsetH, const int* offsetW, cudaStream_t stream); void cuArraysCopyC2R(cuArrays *image1, cuArrays *image2, int strideH, int strideW, cudaStream_t stream); +// same routine name overloaded for different data type void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, cuArrays *offset, cudaStream_t stream); void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, int2 offset, cudaStream_t stream); void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, int2 offset, cudaStream_t stream); void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, int2 offset, cudaStream_t stream); void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, cuArrays *offsets, cudaStream_t stream); +void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, int2 offset, cudaStream_t stream); void cuArraysCopyInsert(cuArrays *imageIn, cuArrays *imageOut, int offsetX, int offersetY, cudaStream_t stream); +void cuArraysCopyInsert(cuArrays *imageIn, cuArrays *imageOut, int offsetX, int offersetY, cudaStream_t stream); void cuArraysCopyInsert(cuArrays *imageIn, cuArrays *imageOut, int offsetX, int offsetY, cudaStream_t stream); +void cuArraysCopyInsert(cuArrays *imageIn, cuArrays *imageOut, int offsetX, int offersetY, cudaStream_t stream); + void cuArraysCopyInversePadded(cuArrays *imageIn, cuArrays *imageOut,cudaStream_t stream); void cuArraysCopyPadded(cuArrays *imageIn, cuArrays *imageOut,cudaStream_t stream); @@ -46,8 +53,8 @@ void cuDerampMethod2(cuArrays *images, cudaStream_t stream); void cpuDerampMethod3(cuArrays *images, cudaStream_t stream); //in cuArraysPadding.cu: various utilities for oversampling padding -void cuArraysPadding(cuArrays *image1, cuArrays *image2, cudaStream_t stream); -void cuArraysPaddingMany(cuArrays *image1, cuArrays *image2, cudaStream_t stream); +void cuArraysPadding(cuArrays *image1, cuArrays *image2, cudaStream_t stream); +void cuArraysPaddingMany(cuArrays *image1, cuArrays *image2, cudaStream_t stream); void cuArraysR2C(cuArrays *image1, cuArrays *image2, cudaStream_t stream); void cuArraysC2R(cuArrays *image1, cuArrays *image2, cudaStream_t stream); void cuArraysAbs(cuArrays *image1, cuArrays *image2, cudaStream_t stream); @@ -57,21 +64,21 @@ void cuArraysSubtractMean(cuArrays *images, cudaStream_t stream); void cuCorrNormalize(cuArrays *templates, cuArrays *images, cuArrays *results, cudaStream_t stream); //in cuOffset.cu: utitilies for determining the max locaiton of cross correlations or the offset -void cuArraysMaxloc2D(cuArrays *images, cuArrays *maxloc, cuArrays *maxval, cudaStream_t stream); -void cuArraysMaxloc2D(cuArrays *images, cuArrays *maxloc, cudaStream_t stream); +void cuArraysMaxloc2D(cuArrays *images, cuArrays *maxloc, cuArrays *maxval, cudaStream_t stream); +void cuArraysMaxloc2D(cuArrays *images, cuArrays *maxloc, cudaStream_t stream); void cuSubPixelOffset(cuArrays *offsetInit, cuArrays *offsetZoomIn, cuArrays *offsetFinal, int OverSampleRatioZoomin, int OverSampleRatioRaw, int xHalfRangeInit, int yHalfRangeInit, int xHalfRangeZoomIn, int yHalfRangeZoomIn, cudaStream_t stream); - + void cuDetermineInterpZone(cuArrays *maxloc, cuArrays *zoomInOffset, cuArrays *corrOrig, cuArrays *corrZoomIn, cudaStream_t stream); void cuDetermineSlaveExtractOffset(cuArrays *maxLoc, int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream); //in cuCorrTimeDomain.cu: cross correlation in time domain -void cuCorrTimeDomain(cuArrays *templates, cuArrays *images, cuArrays *results, cudaStream_t stream); +void cuCorrTimeDomain(cuArrays *templates, cuArrays *images, cuArrays *results, cudaStream_t stream); //in cuCorrFrequency.cu: cross correlation in freq domain, also include fft correlatior class -void cuArraysElementMultiply(cuArrays *image1, cuArrays *image2, cudaStream_t stream); +void cuArraysElementMultiply(cuArrays *image1, cuArrays *image2, cudaStream_t stream); void cuArraysElementMultiplyConjugate(cuArrays *image1, cuArrays *image2, float coef, cudaStream_t stream); @@ -80,7 +87,11 @@ void cuArraysElementMultiplyConjugate(cuArrays *image1, cuArrays void cuArraysCopyExtractCorr(cuArrays *imagesIn, cuArrays *imagesOut, cuArrays *imagesValid, cuArrays *maxloc, cudaStream_t stream); // implemented in cuCorrNormalization.cu void cuArraysSumCorr(cuArrays *images, cuArrays *imagesValid, cuArrays *imagesSum, cuArrays *imagesValidCount, cudaStream_t stream); + // implemented in cuEstimateStats.cu void cuEstimateSnr(cuArrays *corrSum, cuArrays *corrValidCount, cuArrays *maxval, cuArrays *snrValue, cudaStream_t stream); -#endif +// implemented in cuEstimateStats.cu +void cuEstimateVariance(cuArrays *corrBatchRaw, cuArrays *maxloc, cuArrays *maxval, cuArrays *covValue, cudaStream_t stream); + +#endif diff --git a/contrib/PyCuAmpcor/src/cuArrays.cu b/contrib/PyCuAmpcor/src/cuArrays.cu index 7417e3c..82dfe51 100644 --- a/contrib/PyCuAmpcor/src/cuArrays.cu +++ b/contrib/PyCuAmpcor/src/cuArrays.cu @@ -154,8 +154,21 @@ file.write((char *)data, size*count*sizeof(float2)); file.close(); } + + template<> + void cuArrays::outputToFile(std::string fn, cudaStream_t stream) + { + float *data; + data = (float *)malloc(size*count*sizeof(float3)); + checkCudaErrors(cudaMemcpyAsync(data, devData, size*count*sizeof(float3), cudaMemcpyDeviceToHost, stream)); + std::ofstream file; + file.open(fn.c_str(), std::ios_base::binary); + file.write((char *)data, size*count*sizeof(float3)); + file.close(); + } template class cuArrays; template class cuArrays; + template class cuArrays; template class cuArrays; template class cuArrays; diff --git a/contrib/PyCuAmpcor/src/cuArraysCopy.cu b/contrib/PyCuAmpcor/src/cuArraysCopy.cu index dd7a42f..822b3d2 100644 --- a/contrib/PyCuAmpcor/src/cuArraysCopy.cu +++ b/contrib/PyCuAmpcor/src/cuArraysCopy.cu @@ -4,7 +4,7 @@ * Lijun Zhu @ Seismo Lab, Caltech * v1.0 Jan 2017 */ - + #include "cuArrays.h" #include "cudaUtil.h" #include "cudaError.h" @@ -16,14 +16,14 @@ inline __device__ float cuAbs(float2 a) return sqrtf(a.x*a.x+a.y*a.y); }*/ -//copy a chunk into a series of chips -__global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX, const int inNY, - float2 *imageOut, const int outNX, const int outNY, - const int nImagesX, const int nImagesY, +// copy a chunk into a batch of chips for a given stride +__global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX, const int inNY, + float2 *imageOut, const int outNX, const int outNY, + const int nImagesX, const int nImagesY, const int strideX, const int strideY) { int idxImage = blockIdx.z; - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; if(idxImage >=nImagesX*nImagesY|| outx >= outNX || outy >= outNY) return; int idxOut = idxImage*outNX*outNY + outx*outNY + outy; @@ -33,8 +33,7 @@ __global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX imageOut[idxOut] = imageIn[idxIn]; } -//tested -void cuArraysCopyToBatch(cuArrays *image1, cuArrays *image2, +void cuArraysCopyToBatch(cuArrays *image1, cuArrays *image2, int strideH, int strideW, cudaStream_t stream) { const int nthreads = NTHREADS2D; @@ -48,12 +47,14 @@ void cuArraysCopyToBatch(cuArrays *image1, cuArrays *image2, getLastCudaError("cuArraysCopyToBatch_kernel"); } -__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY, - float2 *imageOut, const int outNX, const int outNY, const int nImages, + +// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to complex +__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY, + float2 *imageOut, const int outNX, const int outNY, const int nImages, const int *offsetX, const int *offsetY) { int idxImage = blockIdx.z; - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; if(idxImage>=nImages || outx >= outNX || outy >= outNY) return; int idxOut = idxImage*outNX*outNY + outx*outNY + outy; @@ -61,11 +62,8 @@ __global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, cons imageOut[idxOut] = imageIn[idxIn]; } -/// @param[in] image1 input image in a large chunk -/// @param[in] lda1 width of image 1 -/// @param[out] image2 output image with a batch of small windows - -void cuArraysCopyToBatchWithOffset(cuArrays *image1, const int lda1, cuArrays *image2, +// lda1 (inNY) is the leading dimension of image1, usually, its width +void cuArraysCopyToBatchWithOffset(cuArrays *image1, const int lda1, cuArrays *image2, const int *offsetH, const int* offsetW, cudaStream_t stream) { const int nthreads = 16; @@ -79,12 +77,13 @@ void cuArraysCopyToBatchWithOffset(cuArrays *image1, const int lda1, cuA getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel"); } -__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY, - float2 *imageOut, const int outNX, const int outNY, const int nImages, +// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to real(take amplitudes) +__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY, + float2 *imageOut, const int outNX, const int outNY, const int nImages, const int *offsetX, const int *offsetY) { int idxImage = blockIdx.z; - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; if(idxImage>=nImages || outx >= outNX || outy >= outNY) return; int idxOut = idxImage*outNX*outNY + outx*outNY + outy; @@ -92,7 +91,7 @@ __global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, c imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0); } -void cuArraysCopyToBatchAbsWithOffset(cuArrays *image1, const int lda1, cuArrays *image2, +void cuArraysCopyToBatchAbsWithOffset(cuArrays *image1, const int lda1, cuArrays *image2, const int *offsetH, const int* offsetW, cudaStream_t stream) { const int nthreads = 16; @@ -106,14 +105,42 @@ void cuArraysCopyToBatchAbsWithOffset(cuArrays *image1, const int lda1, getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel"); } +// copy a chunk into a batch of chips for a set of offsets (varying strides), from real to complex(to real part) +__global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, const int inNY, + float2 *imageOut, const int outNX, const int outNY, const int nImages, + const int *offsetX, const int *offsetY) +{ + int idxImage = blockIdx.z; + int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outy = threadIdx.y + blockDim.y*blockIdx.y; + if(idxImage>=nImages || outx >= outNX || outy >= outNY) return; + int idxOut = idxImage*outNX*outNY + outx*outNY + outy; + int idxIn = (offsetX[idxImage]+outx)*inNY + offsetY[idxImage] + outy; + imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f); +} + +void cuArraysCopyToBatchWithOffsetR2C(cuArrays *image1, const int lda1, cuArrays *image2, + const int *offsetH, const int* offsetW, cudaStream_t stream) +{ + const int nthreads = 16; + dim3 blockSize(nthreads, nthreads, 1); + dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count); + //fprintf(stderr, "copy tile to batch, %d %d\n", lda1, image2->count); + cuArraysCopyToBatchWithOffsetR2C_kernel<<>> ( + image1->devData, lda1, + image2->devData, image2->height, image2->width, image2->count, + offsetH, offsetW); + getLastCudaError("cuArraysCopyToBatchWithOffsetR2C_kernel"); +} + //copy a chunk into a series of chips -__global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, const int inNY, - float *imageOut, const int outNX, const int outNY, - const int nImagesX, const int nImagesY, +__global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, const int inNY, + float *imageOut, const int outNX, const int outNY, + const int nImagesX, const int nImagesY, const int strideX, const int strideY, const float factor) { int idxImage = blockIdx.z; - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; if(idxImage >=nImagesX*nImagesY|| outx >= outNX || outy >= outNY) return; int idxOut = idxImage*outNX*outNY + outx*outNY + outy; @@ -121,17 +148,17 @@ __global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, co int idxImageY = idxImage%nImagesY; int idxIn = (idxImageX*strideX+outx)*inNY + idxImageY*strideY+outy; imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor; - //printf( "%d\n", idxOut); + //printf( "%d\n", idxOut); } //tested -void cuArraysCopyC2R(cuArrays *image1, cuArrays *image2, +void cuArraysCopyC2R(cuArrays *image1, cuArrays *image2, int strideH, int strideW, cudaStream_t stream) { const int nthreads = 16; dim3 blockSize(nthreads, nthreads, 1); dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count); - float factor = 1.0f/image1->size; //the FFT factor + float factor = 1.0f/image1->size; //the FFT factor cuArraysCopyC2R_kernel<<>> ( image1->devData, image1->height, image1->width, image2->devData, image2->height, image2->width, @@ -141,22 +168,22 @@ void cuArraysCopyC2R(cuArrays *image1, cuArrays *image2, } __global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int inNX, const int inNY, - float *imageOut, const int outNX, const int outNY, const int nImages, + float *imageOut, const int outNX, const int outNY, const int nImages, const int2 *offsets) { - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; - + if(outx < outNX && outy < outNY) { - int idxImage = blockIdx.z; + int idxImage = blockIdx.z; int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; - int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y; + int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y; imageOut[idxOut] = imageIn[idxIn]; } } -/* copy a tile of images to another image, with starting pixels offsets +/* copy a tile of images to another image, with starting pixels offsets * param[in] imageIn inut images * param[out] imageOut output images of dimension nImages*outNX*outNY */ @@ -166,24 +193,24 @@ void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, const int nthreads = 16; dim3 threadsperblock(nthreads, nthreads,1); dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count); - cuArraysCopyExtractVaryingOffset<<>>(imagesIn->devData, imagesIn->height, imagesIn->width, + cuArraysCopyExtractVaryingOffset<<>>(imagesIn->devData, imagesIn->height, imagesIn->width, imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offsets->devData); getLastCudaError("cuArraysCopyExtract error"); } __global__ void cuArraysCopyExtractVaryingOffset_C2C(const float2 *imageIn, const int inNX, const int inNY, - float2 *imageOut, const int outNX, const int outNY, const int nImages, + float2 *imageOut, const int outNX, const int outNY, const int nImages, const int2 *offsets) { - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; - + if(outx < outNX && outy < outNY) { - int idxImage = blockIdx.z; + int idxImage = blockIdx.z; int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; - int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y; + int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y; imageOut[idxOut] = imageIn[idxIn]; } } @@ -194,7 +221,7 @@ void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut const int nthreads = 16; dim3 threadsperblock(nthreads, nthreads,1); dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count); - cuArraysCopyExtractVaryingOffset_C2C<<>>(imagesIn->devData, imagesIn->height, imagesIn->width, + cuArraysCopyExtractVaryingOffset_C2C<<>>(imagesIn->devData, imagesIn->height, imagesIn->width, imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offsets->devData); getLastCudaError("cuArraysCopyExtractC2C error"); @@ -202,26 +229,29 @@ void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut // correlation surface extraction (Minyan Zhong) __global__ void cuArraysCopyExtractVaryingOffsetCorr(const float *imageIn, const int inNX, const int inNY, - float *imageOut, const int outNX, const int outNY, int *imageValid, const int nImages, + float *imageOut, const int outNX, const int outNY, int *imageValid, const int nImages, const int2 *maxloc) { int idxImage = blockIdx.z; - int outx = threadIdx.x + blockDim.x*blockIdx.x; + // One thread per out point. Find the coordinates within the current image. + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; + // Find the correponding input. int inx = outx + maxloc[idxImage].x - outNX/2; int iny = outy + maxloc[idxImage].y - outNY/2; - - if (outx < outNX && outy < outNY) + + if (outx < outNX && outy < outNY) { + // Find the location in full array. int idxOut = ( blockIdx.z * outNX + outx ) * outNY + outy; int idxIn = ( blockIdx.z * inNX + inx ) * inNY + iny; if (inx>=0 && iny>=0 && inx *imagesIn, cuArrays *imagesO __global__ void cuArraysCopyExtractFixedOffset(const float *imageIn, const int inNX, const int inNY, - float *imageOut, const int outNX, const int outNY, const int nImages, + float *imageOut, const int outNX, const int outNY, const int nImages, const int offsetX, const int offsetY) { - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; - + if(outx < outNX && outy < outNY) - { + { int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; - int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY; + int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY; imageOut[idxOut] = imageIn[idxIn]; } } -/* copy a tile of images to another image, with starting pixels offsets +/* copy a tile of images to another image, with starting pixels offsets * param[in] imageIn inut images * param[out] imageOut output images of dimension nImages*outNX*outNY */ @@ -279,23 +309,24 @@ void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, const int nthreads = 16; dim3 threadsperblock(nthreads, nthreads,1); dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count); - cuArraysCopyExtractFixedOffset<<>>(imagesIn->devData, imagesIn->height, imagesIn->width, + cuArraysCopyExtractFixedOffset<<>>(imagesIn->devData, imagesIn->height, imagesIn->width, imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y); getLastCudaError("cuArraysCopyExtract error"); } +// __global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY, - float2 *imageOut, const int outNX, const int outNY, const int nImages, + float2 *imageOut, const int outNX, const int outNY, const int nImages, const int offsetX, const int offsetY) { - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; - + if(outx < outNX && outy < outNY) - { + { int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; - int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY; + int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY; imageOut[idxOut] = imageIn[idxIn]; } } @@ -311,27 +342,64 @@ void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut //imagesIn->debuginfo(stream); //imagesOut->debuginfo(stream); cuArraysCopyExtract_C2C_FixedOffset<<>> - (imagesIn->devData, imagesIn->height, imagesIn->width, + (imagesIn->devData, imagesIn->height, imagesIn->width, imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y); getLastCudaError("cuArraysCopyExtractC2C error"); } +// -__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY, - float *imageOut, const int outNX, const int outNY, const int nImages, +// float3 +__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const int inNX, const int inNY, + float3 *imageOut, const int outNX, const int outNY, const int nImages, const int offsetX, const int offsetY) { - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; - + if(outx < outNX && outy < outNY) - { + { int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; - int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY; + int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY; + imageOut[idxOut] = imageIn[idxIn]; + } +} + + +void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, int2 offset, cudaStream_t stream) +{ + //assert(imagesIn->height >= imagesOut && inNY >= outNY); + const int nthreads = NTHREADS2D; + dim3 threadsperblock(nthreads, nthreads,1); + dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count); + //std::cout << "debug copyExtract" << imagesOut->width << imagesOut->height << "\n"; + //imagesIn->debuginfo(stream); + //imagesOut->debuginfo(stream); + cuArraysCopyExtract_C2C_FixedOffset<<>> + (imagesIn->devData, imagesIn->height, imagesIn->width, + imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y); + getLastCudaError("cuArraysCopyExtractFloat3 error"); +} + +// + + +__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY, + float *imageOut, const int outNX, const int outNY, const int nImages, + const int offsetX, const int offsetY) +{ + int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outy = threadIdx.y + blockDim.y*blockIdx.y; + + if(outx < outNX && outy < outNY) + { + int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; + int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY; imageOut[idxOut] = imageIn[idxIn].x; } } + void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, int2 offset, cudaStream_t stream) { //assert(imagesIn->height >= imagesOut && inNY >= outNY); @@ -339,16 +407,16 @@ void cuArraysCopyExtract(cuArrays *imagesIn, cuArrays *imagesOut, dim3 threadsperblock(nthreads, nthreads,1); dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count); cuArraysCopyExtract_C2R_FixedOffset<<>> - (imagesIn->devData, imagesIn->height, imagesIn->width, + (imagesIn->devData, imagesIn->height, imagesIn->width, imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y); getLastCudaError("cuArraysCopyExtractC2C error"); } - +// __global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY, float2* imageOut, const int outNY, const int offsetX, const int offsetY) { - int inx = threadIdx.x + blockDim.x*blockIdx.x; + int inx = threadIdx.x + blockDim.x*blockIdx.x; int iny = threadIdx.y + blockDim.y*blockIdx.y; if(inx < inNX && iny < inNY) { int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY); @@ -363,16 +431,40 @@ void cuArraysCopyInsert(cuArrays *imageIn, cuArrays *imageOut, i const int nthreads = 16; dim3 threadsperblock(nthreads, nthreads); dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads)); - cuArraysCopyInsert_kernel<<>>(imageIn->devData, imageIn->height, imageIn->width, + cuArraysCopyInsert_kernel<<>>(imageIn->devData, imageIn->height, imageIn->width, + imageOut->devData, imageOut->width, offsetX, offsetY); + getLastCudaError("cuArraysCopyInsert error"); +} +// +// float3 +__global__ void cuArraysCopyInsert_kernel(const float3* imageIn, const int inNX, const int inNY, + float3* imageOut, const int outNY, const int offsetX, const int offsetY) +{ + int inx = threadIdx.x + blockDim.x*blockIdx.x; + int iny = threadIdx.y + blockDim.y*blockIdx.y; + if(inx < inNX && iny < inNY) { + int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY); + int idxIn = IDX2R(inx, iny, inNY); + imageOut[idxOut] = make_float3(imageIn[idxIn].x, imageIn[idxIn].y, imageIn[idxIn].z); + } +} + +void cuArraysCopyInsert(cuArrays *imageIn, cuArrays *imageOut, int offsetX, int offsetY, cudaStream_t stream) +{ + const int nthreads = 16; + dim3 threadsperblock(nthreads, nthreads); + dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads)); + cuArraysCopyInsert_kernel<<>>(imageIn->devData, imageIn->height, imageIn->width, imageOut->devData, imageOut->width, offsetX, offsetY); getLastCudaError("cuArraysCopyInsert error"); } +// __global__ void cuArraysCopyInsert_kernel(const float* imageIn, const int inNX, const int inNY, float* imageOut, const int outNY, const int offsetX, const int offsetY) { - int inx = threadIdx.x + blockDim.x*blockIdx.x; + int inx = threadIdx.x + blockDim.x*blockIdx.x; int iny = threadIdx.y + blockDim.y*blockIdx.y; if(inx < inNX && iny < inNY) { int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY); @@ -387,18 +479,44 @@ void cuArraysCopyInsert(cuArrays *imageIn, cuArrays *imageOut, int const int nthreads = 16; dim3 threadsperblock(nthreads, nthreads); dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads)); - cuArraysCopyInsert_kernel<<>>(imageIn->devData, imageIn->height, imageIn->width, + cuArraysCopyInsert_kernel<<>>(imageIn->devData, imageIn->height, imageIn->width, imageOut->devData, imageOut->width, offsetX, offsetY); getLastCudaError("cuArraysCopyInsert Float error"); } +// + +__global__ void cuArraysCopyInsert_kernel(const int* imageIn, const int inNX, const int inNY, + int* imageOut, const int outNY, const int offsetX, const int offsetY) +{ + int inx = threadIdx.x + blockDim.x*blockIdx.x; + int iny = threadIdx.y + blockDim.y*blockIdx.y; + if(inx < inNX && iny < inNY) { + int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY); + int idxIn = IDX2R(inx, iny, inNY); + imageOut[idxOut] = imageIn[idxIn]; + } +} + + +void cuArraysCopyInsert(cuArrays *imageIn, cuArrays *imageOut, int offsetX, int offsetY, cudaStream_t stream) +{ + const int nthreads = 16; + dim3 threadsperblock(nthreads, nthreads); + dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads)); + cuArraysCopyInsert_kernel<<>>(imageIn->devData, imageIn->height, imageIn->width, + imageOut->devData, imageOut->width, offsetX, offsetY); + getLastCudaError("cuArraysCopyInsert Integer error"); +} +// + __global__ void cuArraysCopyInversePadded_kernel(float *imageIn, int inNX, int inNY, int sizeIn, float *imageOut, int outNX, int outNY, int sizeOut, int nImages) { - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; - + if(outx < outNX && outy < outNY) { int idxImage = blockIdx.z; @@ -409,27 +527,27 @@ __global__ void cuArraysCopyInversePadded_kernel(float *imageIn, int inNX, int i } else { imageOut[idxOut] = 0.0f; } - } + } } void cuArraysCopyInversePadded(cuArrays *imageIn, cuArrays *imageOut,cudaStream_t stream) { const int nthreads = 16; - int nImages = imageIn->count; + int nImages = imageIn->count; dim3 blockSize(nthreads, nthreads,1); dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages); cuArraysCopyInversePadded_kernel<<>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size, imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages); - getLastCudaError("cuArraysCopyInversePadded error"); + getLastCudaError("cuArraysCopyInversePadded error"); } __global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY, int sizeIn, float *imageOut, int outNX, int outNY, int sizeOut, int nImages) { - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; - + if(outx < outNX && outy < outNY) { int idxImage = blockIdx.z; @@ -440,26 +558,26 @@ __global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY } else { imageOut[idxOut] = 0.0f; } - } + } } void cuArraysCopyPadded(cuArrays *imageIn, cuArrays *imageOut,cudaStream_t stream) { const int nthreads = 16; - int nImages = imageIn->count; + int nImages = imageIn->count; dim3 blockSize(nthreads, nthreads,1); dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages); cuArraysCopyPadded_R2R_kernel<<>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size, imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages); - getLastCudaError("cuArraysCopyPaddedR2R error"); + getLastCudaError("cuArraysCopyPaddedR2R error"); } __global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inNY, int sizeIn, float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages) { - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; - + if(outx < outNX && outy < outNY) { int idxImage = blockIdx.z; @@ -468,31 +586,31 @@ __global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inN int idxIn = IDX2R(outx, outy, inNY)+idxImage*sizeIn; imageOut[idxOut] = imageIn[idxIn]; } - else{ - imageOut[idxOut] = make_float2(0.0f, 0.0f); + else{ + imageOut[idxOut] = make_float2(0.0f, 0.0f); } - } + } } void cuArraysCopyPadded(cuArrays *imageIn, cuArrays *imageOut,cudaStream_t stream) { const int nthreads = NTHREADS2D; - int nImages = imageIn->count; + int nImages = imageIn->count; dim3 blockSize(nthreads, nthreads,1); dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages); cuArraysCopyPadded_C2C_kernel<<>> (imageIn->devData, imageIn->height, imageIn->width, imageIn->size, imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages); - getLastCudaError("cuArraysCopyInversePadded error"); + getLastCudaError("cuArraysCopyInversePadded error"); } __global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY, int sizeIn, float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages) { - int outx = threadIdx.x + blockDim.x*blockIdx.x; + int outx = threadIdx.x + blockDim.x*blockIdx.x; int outy = threadIdx.y + blockDim.y*blockIdx.y; - + if(outx < outNX && outy < outNY) { int idxImage = blockIdx.z; @@ -501,42 +619,42 @@ __global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY int idxIn = IDX2R(outx, outy, inNY)+idxImage*sizeIn; imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f); } - else{ - imageOut[idxOut] = make_float2(0.0f, 0.0f); + else{ + imageOut[idxOut] = make_float2(0.0f, 0.0f); } - } + } } void cuArraysCopyPadded(cuArrays *imageIn, cuArrays *imageOut,cudaStream_t stream) { const int nthreads = NTHREADS2D; - int nImages = imageIn->count; + int nImages = imageIn->count; dim3 blockSize(nthreads, nthreads,1); dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages); cuArraysCopyPadded_R2C_kernel<<>> (imageIn->devData, imageIn->height, imageIn->width, imageIn->size, imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages); - getLastCudaError("cuArraysCopyPadded error"); + getLastCudaError("cuArraysCopyPadded error"); } __global__ void cuArraysSetConstant_kernel(float *image, int size, float value) { - int idx = threadIdx.x + blockDim.x*blockIdx.x; - + int idx = threadIdx.x + blockDim.x*blockIdx.x; + if(idx < size) { image[idx] = value; - } + } } void cuArraysSetConstant(cuArrays *imageIn, float value, cudaStream_t stream) { const int nthreads = 256; - int size = imageIn->getSize(); + int size = imageIn->getSize(); cuArraysSetConstant_kernel<<>> (imageIn->devData, imageIn->size, value); - getLastCudaError("cuArraysCopyPadded error"); + getLastCudaError("cuArraysCopyPadded error"); } diff --git a/contrib/PyCuAmpcor/src/cuCorrNormalization.cu b/contrib/PyCuAmpcor/src/cuCorrNormalization.cu index 83dc1cf..7c9c339 100644 --- a/contrib/PyCuAmpcor/src/cuCorrNormalization.cu +++ b/contrib/PyCuAmpcor/src/cuCorrNormalization.cu @@ -195,7 +195,6 @@ __device__ float2 partialSums(const float v, volatile float* shmem, const int st return make_float2(Sum, Sum2); } -__forceinline__ __device__ int __mul(const int a, const int b) { return a*b; } template __global__ void cuCorrNormalize_kernel( @@ -232,7 +231,7 @@ __global__ void cuCorrNormalize_kernel( templateSum += templateD[i]; } templateSum = sumReduceBlock(templateSum, shmem); - + __syncthreads(); float templateSum2 = 0.0f; for (int i = tid; i < templateSize; i += Nthreads) @@ -241,11 +240,12 @@ __global__ void cuCorrNormalize_kernel( templateSum2 += t*t; } templateSum2 = sumReduceBlock(templateSum2, shmem); + __syncthreads(); //if(tid ==0) printf("template sum %d %g %g \n", imageIdx, templateSum, templateSum2); /*********/ - shmem[tid] = shmem[tid + Nthreads] = 0.0f; + shmem[tid] = shmem[tid + Nthreads] = shmem[tid + 2*Nthreads] = 0.0f; __syncthreads(); float imageSum = 0.0f; @@ -281,7 +281,7 @@ __global__ void cuCorrNormalize_kernel( if (tid < resultNY) { const int ix = iaddr/imageNY; - const int addr = __mul(ix-templateNX, resultNY); + const int addr = (ix-templateNX)*resultNY; //printf("test norm %d %d %d %d %f\n", tid, ix, addr, addr+tid, resultD[addr + tid]); diff --git a/contrib/PyCuAmpcor/src/cuEstimateStats.cu b/contrib/PyCuAmpcor/src/cuEstimateStats.cu index 8fa7e45..49c1d71 100644 --- a/contrib/PyCuAmpcor/src/cuEstimateStats.cu +++ b/contrib/PyCuAmpcor/src/cuEstimateStats.cu @@ -25,7 +25,7 @@ __global__ void cudaKernel_estimateSnr(const float* corrSum, const int* corrVali float mean = (corrSum[idx] - maxval[idx] * maxval[idx]) / (corrValidCount[idx] - 1); - snrValue[idx] = maxval[idx] / mean; + snrValue[idx] = maxval[idx] * maxval[idx] / mean; } void cuEstimateSnr(cuArrays *corrSum, cuArrays *corrValidCount, cuArrays *maxval, cuArrays *snrValue, cudaStream_t stream) @@ -55,7 +55,7 @@ void cuEstimateSnr(cuArrays *corrSum, cuArrays *corrValidCount, cuAr //for (int i=0; ihostData[i]<hostData[i]< *corrSum, cuArrays *corrValidCount, cuAr getLastCudaError("cuda kernel estimate stats error\n"); } + + +template // number of threads per block. +__global__ void cudaKernel_estimateVar(const float* corrBatchRaw, const int NX, const int NY, const int2* maxloc, const float* maxval, float3* covValue, const int size) +{ + + // Find image id. + int idxImage = threadIdx.x + blockDim.x*blockIdx.x; + + if (idxImage >= size) return; + + // Preparation. + int px = maxloc[idxImage].x; + int py = maxloc[idxImage].y; + float peak = maxval[idxImage]; + + // Check if maxval is on the margin. + if (px-1 < 0 || py-1 <0 || px + 1 >=NX || py+1 >=NY) { + + covValue[idxImage] = make_float3(99.0, 99.0, 99.0); + + } + else { + int offset = NX * NY * idxImage; + int idx00 = offset + (px - 1) * NY + py - 1; + int idx01 = offset + (px - 1) * NY + py ; + int idx02 = offset + (px - 1) * NY + py + 1; + int idx10 = offset + (px ) * NY + py - 1; + int idx11 = offset + (px ) * NY + py ; + int idx12 = offset + (px ) * NY + py + 1; + int idx20 = offset + (px + 1) * NY + py - 1; + int idx21 = offset + (px + 1) * NY + py ; + int idx22 = offset + (px + 1) * NY + py + 1; + + float dxx = - ( corrBatchRaw[idx21] + corrBatchRaw[idx01] - 2*corrBatchRaw[idx11] ) * 0.5; + float dyy = - ( corrBatchRaw[idx12] + corrBatchRaw[idx10] - 2*corrBatchRaw[idx11] ) * 0.5; + float dxy = - ( corrBatchRaw[idx22] + corrBatchRaw[idx00] - corrBatchRaw[idx20] - corrBatchRaw[idx02] ) *0.25; + + float n2 = fmaxf(1 - peak, 0.0); + + int winSize = NX*NY; + + dxx = dxx * winSize; + dyy = dyy * winSize; + dxy = dxy * winSize; + + float n4 = n2*n2; + n2 = n2 * 2; + n4 = n4 * 0.5 * winSize; + + float u = dxy * dxy - dxx * dyy; + float u2 = u*u; + + if (fabsf(u) < 1e-2) { + + covValue[idxImage] = make_float3(99.0, 99.0, 99.0); + + } + else { + float cov_xx = (- n2 * u * dyy + n4 * ( dyy*dyy + dxy*dxy) ) / u2; + float cov_yy = (- n2 * u * dxx + n4 * ( dxx*dxx + dxy*dxy) ) / u2; + float cov_xy = ( n2 * u * dxy - n4 * ( dxx + dyy ) * dxy ) / u2; + covValue[idxImage] = make_float3(cov_xx, cov_yy, cov_xy); + } + } +} + +void cuEstimateVariance(cuArrays *corrBatchRaw, cuArrays *maxloc, cuArrays *maxval, cuArrays *covValue, cudaStream_t stream) +{ + + int size = corrBatchRaw->count; + + // One dimensional launching parameters to loop over every correlation surface. + cudaKernel_estimateVar<<< IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>> + (corrBatchRaw->devData, corrBatchRaw->height, corrBatchRaw->width, maxloc->devData, maxval->devData, covValue->devData, size); + getLastCudaError("cudaKernel_estimateVar error\n"); +} diff --git a/contrib/PyCuAmpcor/src/setup.py b/contrib/PyCuAmpcor/src/setup.py index 6384c91..aa34441 100644 --- a/contrib/PyCuAmpcor/src/setup.py +++ b/contrib/PyCuAmpcor/src/setup.py @@ -7,20 +7,21 @@ from distutils.core import setup from distutils.extension import Extension from Cython.Build import cythonize -import os -os.environ["CC"] = "g++" +import numpy setup( name = 'PyCuAmpcor', ext_modules = cythonize(Extension( "PyCuAmpcor", sources=['PyCuAmpcor.pyx'], - include_dirs=['/usr/local/cuda/include'], # REPLACE WITH YOUR PATH TO YOUR CUDA LIBRARY HEADERS + include_dirs=['/usr/local/cuda/include', numpy.get_include()], # REPLACE WITH YOUR PATH TO YOUR CUDA LIBRARY HEADERS extra_compile_args=['-fPIC','-fpermissive'], - extra_objects=['SlcImage.o','cuAmpcorChunk.o','cuAmpcorParameter.o','cuCorrFrequency.o', + extra_objects=['GDALImage.o','cuAmpcorChunk.o','cuAmpcorParameter.o','cuCorrFrequency.o', 'cuCorrNormalization.o','cuCorrTimeDomain.o','cuArraysCopy.o', 'cuArrays.o','cuArraysPadding.o','cuOffset.o','cuOverSampler.o', - 'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o'], - extra_link_args=['-L/usr/local/cuda/lib64','-lcuda','-lcudart','-lcufft','-lcublas'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES + 'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o','cuEstimateStats.o'], + extra_link_args=['-L/usr/local/cuda/lib64', + '-L/usr/lib64/nvidia', + '-lcuda','-lcudart','-lcufft','-lcublas','-lgdal'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES language='c++' ))) diff --git a/contrib/SConscript b/contrib/SConscript index fb91670..ecc0baf 100644 --- a/contrib/SConscript +++ b/contrib/SConscript @@ -78,3 +78,6 @@ SConscript(rfi) SConscript('PyCuAmpcor/SConscript') SConscript('splitSpectrum/SConscript') SConscript('alos2proc/SConscript') + +if os.path.exists('geo_autoRIFT'): + SConscript('geo_autoRIFT/SConscript') diff --git a/contrib/stack/stripmapStack/baselineGrid.py b/contrib/stack/stripmapStack/baselineGrid.py index 21fded7..ea316df 100755 --- a/contrib/stack/stripmapStack/baselineGrid.py +++ b/contrib/stack/stripmapStack/baselineGrid.py @@ -139,7 +139,7 @@ def main(iargs=None): direction = np.sign(np.dot( np.cross(targxyz-mxyz, sxyz-mxyz), mvel)) Bperp[ii,jj] = direction*perp - Bperp.tofile(fid) + Bperp.tofile(fid) fid.close() ####Write XML diff --git a/contrib/stack/stripmapStack/crossmul.py b/contrib/stack/stripmapStack/crossmul.py index 8ffeeef..7b07716 100755 --- a/contrib/stack/stripmapStack/crossmul.py +++ b/contrib/stack/stripmapStack/crossmul.py @@ -33,6 +33,8 @@ def cmdLineParse(iargs = None): def run(imageSlc1, imageSlc2, resampName, azLooks, rgLooks): objSlc1 = isceobj.createSlcImage() + #right now imageSlc1 and 2 are just text files, need to open them as image + IU.copyAttributes(imageSlc1, objSlc1) objSlc1.setAccessMode('read') objSlc1.createImage() @@ -81,7 +83,6 @@ def run(imageSlc1, imageSlc2, resampName, azLooks, rgLooks): def main(iargs=None): - inps = cmdLineParse(iargs) img1 = isceobj.createImage() @@ -96,9 +97,8 @@ def main(iargs=None): run(img1, img2, inps.prefix, inps.azlooks, inps.rglooks) if __name__ == '__main__': - + + main() ''' Main driver. ''' - - diff --git a/scons_tools/cuda.py b/scons_tools/cuda.py index fe2f6d0..9dce35b 100644 --- a/scons_tools/cuda.py +++ b/scons_tools/cuda.py @@ -52,7 +52,7 @@ def generate(env): # default flags for the NVCC compiler env['STATICNVCCFLAGS'] = '' env['SHAREDNVCCFLAGS'] = '' - env['ENABLESHAREDNVCCFLAG'] = '-arch=sm_35 -shared -Xcompiler -fPIC' + env['ENABLESHAREDNVCCFLAG'] = '-shared -Xcompiler -fPIC' # default NVCC commands env['STATICNVCCCMD'] = '$NVCC -o $TARGET -c $NVCCFLAGS $STATICNVCCFLAGS $SOURCES' @@ -153,7 +153,7 @@ def generate(env): #env.Append(LIBPATH=[cudaSDKPath + '/lib', cudaSDKPath + '/common/lib' + cudaSDKSubLibDir, cudaToolkitPath + '/lib']) env.Append(CUDACPPPATH=[cudaToolkitPath + '/include']) - env.Append(CUDALIBPATH=[cudaToolkitPath + '/lib', cudaToolkitPath + '/lib64']) + env.Append(CUDALIBPATH=[cudaToolkitPath + '/lib', cudaToolkitPath + '/lib64', '/lib64']) env.Append(CUDALIBS=['cudart']) def exists(env): diff --git a/setup/setup.py b/setup/setup.py index e25db37..3e01849 100755 --- a/setup/setup.py +++ b/setup/setup.py @@ -12,7 +12,7 @@ from __future__ import print_function import sys import os -import urllib2 +import urllib import getopt import re import shutil @@ -57,7 +57,7 @@ def print2log(msg, withtime=True, cmd=False): if withtime: now = datetime.datetime.today() msg = "%s >> %s" % (now.isoformat(), msg) - LOGFILE.write(msg + '\n') + LOGFILE.write((msg + '\n').encode('utf-8')) LOGFILE.flush() os.fsync(LOGFILE) @@ -157,9 +157,9 @@ def downloadfile(url, fname, repeat=1): counter = 0 while counter < repeat: try: - response = urllib2.urlopen(url) + response = urllib.request.urlopen(url) break - except urllib2.URLError, e: + except urllib.request.URLError as e: counter += 1 if hasattr(e, 'reason'): print2log("Failed to reach server. Reason: %s" % e.reason) @@ -851,7 +851,7 @@ class ISCEDeps(object): f = open(self.config, 'rb') lines = f.readlines() for line in lines: - m = re.match("([^#].*?)=([^#]+?)$", line.strip()) + m = re.match("([^#].*?)=([^#]+?)$", line.strip().decode('utf-8')) if m: var = m.group(1).strip() val = m.group(2).strip() @@ -867,7 +867,7 @@ def readSetupConfig(setup_config): f = open(setup_config, 'rb') lines = f.readlines() for line in lines: - m = re.match("([^#].*?)=([^#]+?)$", line.strip()) + m = re.match("([^#].*?)=([^#]+?)$", line.strip().decode('utf-8')) if m: var = m.group(1).strip() val = m.group(2).strip().replace('"', '') @@ -885,7 +885,7 @@ def checkArgs(args): """ try: opts, args = getopt.getopt(args, "h", ["help", "prefix=", "ping=", "config=", "uname=", "download=", "unpack=", "install=", "gcc=", "gpp=", "verbose"]) - except getopt.GetoptError, err: + except getopt.GetoptError as err: print2log("ProgError: %s" % str(err)) usage() sys.exit(2)