Merge branch 'master' of https://github.com/isce-framework/isce2 into UAVSAR

LT1AB
Eric J. Fielding 2019-12-03 16:41:41 -08:00
commit 36e7012e66
49 changed files with 2239 additions and 742 deletions

View File

@ -23,7 +23,7 @@ jobs:
pwd pwd
mkdir config build install mkdir config build install
. /opt/conda/bin/activate root . /opt/conda/bin/activate root
conda install --yes cython gdal h5py libgdal pytest numpy fftw scipy basemap scons opencv hdf4 hdf5 netcdf4 libgcc libstdcxx-ng cmake conda install --yes cython gdal h5py libgdal pytest numpy fftw scipy basemap scons opencv hdf4 hdf5 netcdf4 libgcc libstdcxx-ng cmake astropy
yum install -y uuid-devel x11-devel motif-devel jq gcc-gfortran yum install -y uuid-devel x11-devel motif-devel jq gcc-gfortran
ln -s /opt/conda/bin/cython /opt/conda/bin/cython3 ln -s /opt/conda/bin/cython /opt/conda/bin/cython3
cd /opt/conda/lib cd /opt/conda/lib

View File

@ -623,7 +623,23 @@ between three files as follows:
<property name="OUTPUT">20061231</property> <property name="OUTPUT">20061231</property>
</component> </component>
``` ```
### rtcApp.xml
The inputs are Sentinel GRD zipfiles
```xml
<rtcApp>
<constant name="dir">/Users/data/sentinel1 </constant>
<component name="rtcApp">
<property name="posting">20</property>
<property name="sensor name">sentinel1</property>
<component name="master">
<property name="safe">$dir$/rtcApp/data/S1A_IW_GRDH_1SDV_20181221T225104_20181221T225129_025130_02C664_B46C.zip</property>
<property name="orbit directory">$dir$/orbits</property>
<property name="output directory">$dir$/rtcApp/output</property>
<property name="polarization">[VV, VH]</property>
</component>
</component>
</rtcApp>
```
----- -----
## Component Configurability ## Component Configurability

View File

@ -293,6 +293,7 @@ def main(args, files):
#######Determine number of input and output bands #######Determine number of input and output bands
bandList = [] bandList = []
iMath['equations'] = []
for ii,expr in enumerate(args.equation.split(';')): for ii,expr in enumerate(args.equation.split(';')):
#####Now parse the equation to get the file names used #####Now parse the equation to get the file names used
@ -319,7 +320,11 @@ def main(args, files):
######Create input memmaps ######Create input memmaps
for ii,infile in enumerate(fileList): for ii,infile in enumerate(fileList):
fstr, files = parseInputFile(infile, files) if type(files) == list:
fstr, files = parseInputFile(infile, files)
else:
fstr = getattr(files, infile)
logger.debug('Input string for File %d: %s: %s'%(ii, infile, fstr)) logger.debug('Input string for File %d: %s: %s'%(ii, infile, fstr))
if len(fstr.split(';')) > 1: if len(fstr.split(';')) > 1:
@ -341,8 +346,9 @@ def main(args, files):
if bbox is not None: if bbox is not None:
iMath['bboxes'].append(bbox) iMath['bboxes'].append(bbox)
if len(files): if type(files) == list:
raise IOError('Unused input variables set:\n'+ ' '.join(files)) if len(files):
raise IOError('Unused input variables set:\n'+ ' '.join(files))
#######Some debugging #######Some debugging
logger.debug('List of available bands: ' + str(iMath['inBands'].keys())) logger.debug('List of available bands: ' + str(iMath['inBands'].keys()))

View File

@ -155,7 +155,7 @@ NUMBER_RANGE_LOOKS = Application.Parameter('numberRangeLooks',
) )
POSTING = Application.Parameter('posting', POSTING = Application.Parameter('posting',
public_name='azimuth looks', public_name='posting',
default = 20.0, default = 20.0,
type = float, type = float,
mandatory = False, mandatory = False,
@ -363,6 +363,7 @@ class GRDSAR(Application):
self.verifyDEM = RtcProc.createVerifyDEM(self) self.verifyDEM = RtcProc.createVerifyDEM(self)
self.multilook = RtcProc.createLooks(self) self.multilook = RtcProc.createLooks(self)
self.runTopo = RtcProc.createTopo(self) self.runTopo = RtcProc.createTopo(self)
self.runNormalize = RtcProc.createNormalize(self)
# self.runGeocode = RtcProc.createGeocode(self) # self.runGeocode = RtcProc.createGeocode(self)
return None return None
@ -392,6 +393,9 @@ class GRDSAR(Application):
##Run topo for each bursts ##Run topo for each bursts
self.step('topo', func=self.runTopo) self.step('topo', func=self.runTopo)
##Run normalize to get gamma0
self.step('normalize', func=self.runNormalize)
# Geocode # Geocode
# self.step('geocode', func=self.runGeocode, # self.step('geocode', func=self.runGeocode,
# args=(self.geocode_list, self.do_unwrap, self.geocode_bbox)) # args=(self.geocode_list, self.do_unwrap, self.geocode_bbox))
@ -416,6 +420,9 @@ class GRDSAR(Application):
##Run topo for each burst ##Run topo for each burst
self.runTopo() self.runTopo()
##Run normalize to get gamma0
self.runNormalize()
###Compute covariance ###Compute covariance
# self.runEstimateCovariance() # self.runEstimateCovariance()

View File

@ -242,14 +242,20 @@ FILTER_STRENGTH = Application.Parameter('filterStrength',
mandatory=False, mandatory=False,
doc='') doc='')
############################################## Modified by V.Brancato 10.07.2019
DO_RUBBERSHEETING = Application.Parameter('doRubbersheeting', DO_RUBBERSHEETINGAZIMUTH = Application.Parameter('doRubbersheetingAzimuth',
public_name='do rubbersheeting', public_name='do rubbersheetingAzimuth',
default=False, default=False,
type=bool, type=bool,
mandatory=False, mandatory=False,
doc='') doc='')
DO_RUBBERSHEETINGRANGE = Application.Parameter('doRubbersheetingRange',
public_name='do rubbersheetingRange',
default=False,
type=bool,
mandatory=False,
doc='')
#################################################################################
RUBBERSHEET_SNR_THRESHOLD = Application.Parameter('rubberSheetSNRThreshold', RUBBERSHEET_SNR_THRESHOLD = Application.Parameter('rubberSheetSNRThreshold',
public_name='rubber sheet SNR Threshold', public_name='rubber sheet SNR Threshold',
default = 5.0, default = 5.0,
@ -533,7 +539,8 @@ class _RoiBase(Application, FrameMixin):
GEOCODE_BOX, GEOCODE_BOX,
REGION_OF_INTEREST, REGION_OF_INTEREST,
HEIGHT_RANGE, HEIGHT_RANGE,
DO_RUBBERSHEETING, DO_RUBBERSHEETINGRANGE, #Modified by V. Brancato 10.07.2019
DO_RUBBERSHEETINGAZIMUTH, #Modified by V. Brancato 10.07.2019
RUBBERSHEET_SNR_THRESHOLD, RUBBERSHEET_SNR_THRESHOLD,
RUBBERSHEET_FILTER_SIZE, RUBBERSHEET_FILTER_SIZE,
DO_DENSEOFFSETS, DO_DENSEOFFSETS,
@ -724,7 +731,8 @@ class _RoiBase(Application, FrameMixin):
self.runResampleSlc = StripmapProc.createResampleSlc(self) self.runResampleSlc = StripmapProc.createResampleSlc(self)
self.runRefineSlaveTiming = StripmapProc.createRefineSlaveTiming(self) self.runRefineSlaveTiming = StripmapProc.createRefineSlaveTiming(self)
self.runDenseOffsets = StripmapProc.createDenseOffsets(self) self.runDenseOffsets = StripmapProc.createDenseOffsets(self)
self.runRubbersheet = StripmapProc.createRubbersheet(self) self.runRubbersheetRange = StripmapProc.createRubbersheetRange(self) #Modified by V. Brancato 10.07.2019
self.runRubbersheetAzimuth =StripmapProc.createRubbersheetAzimuth(self) #Modified by V. Brancato 10.07.2019
self.runResampleSubbandSlc = StripmapProc.createResampleSubbandSlc(self) self.runResampleSubbandSlc = StripmapProc.createResampleSubbandSlc(self)
self.runInterferogram = StripmapProc.createInterferogram(self) self.runInterferogram = StripmapProc.createInterferogram(self)
self.runFilter = StripmapProc.createFilter(self) self.runFilter = StripmapProc.createFilter(self)
@ -774,8 +782,11 @@ class _RoiBase(Application, FrameMixin):
args=('refined',)) args=('refined',))
self.step('dense_offsets', func=self.runDenseOffsets) self.step('dense_offsets', func=self.runDenseOffsets)
######################################################################## Modified by V. Brancato 10.07.2019
self.step('rubber_sheet', func=self.runRubbersheet) self.step('rubber_sheet_range', func=self.runRubbersheetRange)
self.step('rubber_sheet_azimuth',func=self.runRubbersheetAzimuth)
#########################################################################
self.step('fine_resample', func=self.runResampleSlc, self.step('fine_resample', func=self.runResampleSlc,
args=('fine',)) args=('fine',))
@ -852,10 +863,14 @@ class _RoiBase(Application, FrameMixin):
# run dense offsets # run dense offsets
self.runDenseOffsets() self.runDenseOffsets()
# adding the azimuth offsets computed from cross correlation to geometry offsets ############ Modified by V. Brancato 10.07.2019
self.runRubbersheet() # adding the azimuth offsets computed from cross correlation to geometry offsets
self.runRubbersheetAzimuth()
# adding the range offsets computed from cross correlation to geometry offsets
self.runRubbersheetRange()
####################################################################################
# resampling using rubbersheeted offsets # resampling using rubbersheeted offsets
# which include geometry + constant range + constant azimuth # which include geometry + constant range + constant azimuth
# + dense azimuth offsets # + dense azimuth offsets

View File

@ -1061,7 +1061,7 @@ class Orbit(Component):
###This wont break the old interface but could cause ###This wont break the old interface but could cause
###issues at midnight crossing ###issues at midnight crossing
if reference is None: if reference is None:
reference = self.minTime() reference = self.minTime
refEpoch = reference.replace(hour=0, minute=0, second=0, microsecond=0) refEpoch = reference.replace(hour=0, minute=0, second=0, microsecond=0)

View File

@ -46,5 +46,6 @@ createPreprocessor = _factory("runPreprocessor")
createVerifyDEM = _factory("runVerifyDEM") createVerifyDEM = _factory("runVerifyDEM")
createLooks = _factory("runLooks") createLooks = _factory("runLooks")
createTopo = _factory("runTopo") createTopo = _factory("runTopo")
createNormalize = _factory("runNormalize")
#createGeocode = _factory("runGeocode") #createGeocode = _factory("runGeocode")

View File

@ -69,7 +69,7 @@ INC_FILENAME = Component.Parameter(
GAMMA0_FILENAME = Component.Parameter( GAMMA0_FILENAME = Component.Parameter(
'gamma0FileName', 'gamma0FileName',
public_name='Gamma0 backscatter file', public_name='Gamma0 backscatter file',
default = 'gamma0.rdr', default = 'gamma0.img',
type = str, type = str,
mandatory = False, mandatory = False,
doc = 'Unmasked gamma0 backscatter file') doc = 'Unmasked gamma0 backscatter file')

View File

@ -1,4 +1,4 @@
# #!/usr/bin/env python3
# Author: Piyush Agram # Author: Piyush Agram
# Copyright 2016 # Copyright 2016
# #
@ -6,19 +6,23 @@
import logging import logging
import isceobj import isceobj
import mroipac import mroipac
from .runTopo import filenameWithLooks
from .runLooks import takeLooks
import os import os
import itertools
import numpy as np import numpy as np
from isceobj.Util.decorators import use_api from isceobj.Util.decorators import use_api
from applications import imageMath
logger = logging.getLogger('isce.grdsar.looks') logger = logging.getLogger('isce.grdsar.looks')
class Dummy:
pass
def runNormalize(self): def runNormalize(self):
''' '''
Make sure that a DEM is available for processing the given data. Make sure that a DEM is available for processing the given data.
''' '''
refPol = self._grd.polarizations[0] refPol = self._grd.polarizations[0]
master = self._grd.loadProduct( os.path.join(self._grd.outputFolder, 'beta_{0}.xml'.format(refPol))) master = self._grd.loadProduct( os.path.join(self._grd.outputFolder, 'beta_{0}.xml'.format(refPol)))
@ -26,17 +30,31 @@ def runNormalize(self):
azlooks, rglooks = self._grd.getLooks( self.posting, master.groundRangePixelSize, master.azimuthPixelSize, self.numberAzimuthLooks, self.numberRangeLooks) azlooks, rglooks = self._grd.getLooks( self.posting, master.groundRangePixelSize, master.azimuthPixelSize, self.numberAzimuthLooks, self.numberRangeLooks)
if (azlooks == 1) and (rglooks == 1):
return
slantRange = False
for pol in self._grd.polarizations: for pol in self._grd.polarizations:
inname = os.path.join( self._grd.outputFolder, 'beta_{0}.img'.format(pol) ) if (azlooks == 1) and (rglooks == 1):
takeLooks(inname, azlooks, rglooks) inname = os.path.join( self._grd.outputFolder, 'beta_{0}.img'.format(pol))
else:
inname = os.path.join( self._grd.outputFolder, filenameWithLooks('beta_{0}.img'.format(pol), azlooks, rglooks))
if not slantRange: basefolder, output = os.path.split(self._grd.outputFolder)
inname = master.slantRangeImage.filename incname = os.path.join(basefolder, self._grd.geometryFolder, self._grd.incFileName)
takeLooks(inname, azlooks, rglooks) outname = os.path.join(self._grd.outputFolder, filenameWithLooks('gamma_{0}'.format(pol)+'.img', azlooks, rglooks))
slantRange = True maskname = os.path.join(basefolder, self._grd.geometryFolder, self._grd.slMaskFileName)
args = imageMath.createNamespace()
args.equation = 'a*cos(b_0*PI/180.)/cos(b_1*PI/180.) * (c==0)'
args.dtype = np.float32
args.scheme = 'BIL'
args.out = outname
#args.debug = True
files = Dummy()
files.a = inname
files.b = incname
files.c = maskname
imageMath.main(args, files)
return return

View File

@ -261,10 +261,10 @@ class Sentinel1(Component):
self.validateUserInputs() self.validateUserInputs()
if self.xml.startswith('/vsizip'): #Read from zip file if '.zip' in self.xml:
try: try:
parts = self.xml.split(os.path.sep) parts = self.xml.split(os.path.sep)
zipname = os.path.join(*(parts[2:-3])) zipname = os.path.join('/',*(parts[:-3]))
fname = os.path.join(*(parts[-3:])) fname = os.path.join(*(parts[-3:]))
with zipfile.ZipFile(zipname, 'r') as zf: with zipfile.ZipFile(zipname, 'r') as zf:
@ -283,23 +283,22 @@ class Sentinel1(Component):
self.populateMetadata() self.populateMetadata()
self.populateBbox() self.populateBbox()
####Tru and locate an orbit file ####Try and locate an orbit file
if self.orbitFile is None: if self.orbitFile is None:
if self.orbitDir is not None: if self.orbitDir is not None:
self.orbitFile = self.findOrbitFile() self.orbitFile = self.findOrbitFile()
print('Found this orbitfile: %s' %self.orbitFile)
####Read in the orbits ####Read in the orbits
if self.orbitFile: if '_POEORB_' in self.orbitFile:
orb = self.extractPreciseOrbit() orb = self.extractPreciseOrbit()
else: elif '_RESORB_' in self.orbitFile:
orb = self.extractOrbit() orb = self.extractOrbit()
self.product.orbit.setOrbitSource('Header') self.product.orbit.setOrbitSource('Header')
for sv in orb: for sv in orb:
self.product.orbit.addStateVector(sv) self.product.orbit.addStateVector(sv)
self.populateIPFVersion() self.populateIPFVersion()
self.extractBetaLUT() self.extractBetaLUT()
self.extractNoiseLUT() self.extractNoiseLUT()
@ -423,10 +422,11 @@ class Sentinel1(Component):
nsp = "{http://www.esa.int/safe/sentinel-1.0}" nsp = "{http://www.esa.int/safe/sentinel-1.0}"
if self.manifest.startswith('/vsizip'): if '.zip' in self.manifest:
import zipfile import zipfile
parts = self.manifest.split(os.path.sep) parts = self.manifest.split(os.path.sep)
zipname = os.path.join(*(parts[2:-2])) zipname = os.path.join('/',*(parts[:-2]))
fname = os.path.join(*(parts[-2:])) fname = os.path.join(*(parts[-2:]))
try: try:
@ -462,38 +462,40 @@ class Sentinel1(Component):
datefmt = "%Y%m%dT%H%M%S" datefmt = "%Y%m%dT%H%M%S"
types = ['POEORB', 'RESORB'] types = ['POEORB', 'RESORB']
filelist = []
match = [] match = []
timeStamp = self.product.sensingMid timeStamp = self.product.sensingStart+(self.product.sensingStop - self.product.sensingStart)/2.
for orbType in types: for orbType in types:
files = glob.glob( os.path.join(self.orbitDir, 'S1A_OPER_AUX_' + orbType + '_OPOD*')) files = glob.glob( os.path.join(self.orbitDir, 'S1A_OPER_AUX_' + orbType + '_OPOD*'))
filelist.extend(files)
###List all orbit files ###List all orbit files
for result in files:
fields = result.split('_')
taft = datetime.datetime.strptime(fields[-1][0:15], datefmt)
tbef = datetime.datetime.strptime(fields[-2][1:16], datefmt)
#####Get all files that span the acquisition
if (tbef <= timeStamp) and (taft >= timeStamp):
tmid = tbef + 0.5 * (taft - tbef)
match.append((result, abs((timeStamp-tmid).total_seconds())))
#####Return the file with the image is aligned best to the middle of the file for result in filelist:
if len(match) != 0: fields = result.split('_')
bestmatch = min(match, key = lambda x: x[1]) taft = datetime.datetime.strptime(fields[-1][0:15], datefmt)
return bestmatch[0] tbef = datetime.datetime.strptime(fields[-2][1:16], datefmt)
print(taft, tbef)
#####Get all files that span the acquisition
if (tbef <= timeStamp) and (taft >= timeStamp):
tmid = tbef + 0.5 * (taft - tbef)
match.append((result, abs((timeStamp-tmid).total_seconds())))
#####Return the file with the image is aligned best to the middle of the file
if len(match) != 0:
bestmatch = min(match, key = lambda x: x[1])
return bestmatch[0]
if len(match) == 0: if len(match) == 0:
raise Exception('No suitable orbit file found. If you want to process anyway - unset the orbitdir parameter') raise Exception('No suitable orbit file found. If you want to process anyway - unset the orbitdir parameter')
def extractOrbit(self): def extractOrbit(self):
''' '''
Extract orbit information from xml node. Extract orbit information from xml node.
''' '''
node = self._xml_root.find('generalAnnotation/orbitList') node = self._xml_root.find('generalAnnotation/orbitList')
print('Extracting orbit from annotation XML file') print('Extracting orbit from annotation XML file')
frameOrbit = Orbit() frameOrbit = Orbit()
frameOrbit.configure() frameOrbit.configure()
@ -516,13 +518,7 @@ class Sentinel1(Component):
vec.setVelocity(vel) vec.setVelocity(vel)
frameOrbit.addStateVector(vec) frameOrbit.addStateVector(vec)
return frameOrbit
orbExt = OrbitExtender(planet=Planet(pname='Earth'))
orbExt.configure()
newOrb = orbExt.extendOrbit(frameOrbit)
return newOrb
def extractPreciseOrbit(self): def extractPreciseOrbit(self):
''' '''
@ -534,11 +530,10 @@ class Sentinel1(Component):
print("IOError: %s" % strerr) print("IOError: %s" % strerr)
return return
_xml_root = ElementTree(file=fp).getroot() _xml_root = ElementTree.ElementTree(file=fp).getroot()
node = _xml_root.find('Data_Block/List_of_OSVs') node = _xml_root.find('Data_Block/List_of_OSVs')
print('Extracting orbit from Orbit File: ', self.orbitFile)
orb = Orbit() orb = Orbit()
orb.configure() orb.configure()
@ -582,10 +577,10 @@ class Sentinel1(Component):
if self.calibrationXml is None: if self.calibrationXml is None:
raise Exception('No calibration file provided') raise Exception('No calibration file provided')
if self.calibrationXml.startswith('/vsizip'): if '.zip' in self.calibrationXml:
import zipfile import zipfile
parts = self.calibrationXml.split(os.path.sep) parts = self.calibrationXml.split(os.path.sep)
zipname = os.path.join(*(parts[2:-4])) zipname = os.path.join('/',*(parts[:-4]))
fname = os.path.join(*(parts[-4:])) fname = os.path.join(*(parts[-4:]))
try: try:
@ -723,7 +718,7 @@ class Sentinel1(Component):
print('Extracting normalized image ....') print('Extracting normalized image ....')
src = gdal.Open(self.tiff.strip(), gdal.GA_ReadOnly) src = gdal.Open('/vsizip//'+self.tiff.strip(), gdal.GA_ReadOnly)
band = src.GetRasterBand(1) band = src.GetRasterBand(1)
if self.product.numberOfSamples != src.RasterXSize: if self.product.numberOfSamples != src.RasterXSize:

View File

@ -21,6 +21,7 @@ c get alos position and times
integer*1 indata(32768) integer*1 indata(32768)
integer statb(13),stat integer statb(13),stat
integer numdata,rowPos,colPos,eof integer numdata,rowPos,colPos,eof
integer*4 unpackBytes
c read the leader file descriptor record c read the leader file descriptor record
!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!
@ -106,12 +107,9 @@ c read in the raw data file line by line
do i=1,nlines do i=1,nlines
! jng ierr=ioread(ichandata,indata,len) ! jng ierr=ioread(ichandata,indata,len)
call getLineSequential(rawAccessor,indata,eof) call getLineSequential(rawAccessor,indata,eof)
iyear=iand(indata(40),255)*256*256*256+iand(indata(39),255)*256*256+ iyear = unpackBytes(indata(40), indata(39), indata(38), indata(37))
$ iand(indata(38),255)*256+iand(indata(37),255) idoy = unpackBytes(indata(44), indata(43), indata(42), indata(41))
idoy=iand(indata(44),255)*256*256*256+iand(indata(43),255)*256*256+ ims = unpackBytes(indata(48), indata(47), indata(46), indata(45))
$ iand(indata(42),255)*256+iand(indata(41),255)
ims=iand(indata(48),255)*256*256*256+iand(indata(47),255)*256*256+
$ iand(indata(46),255)*256+iand(indata(45),255)
ddate(2) = ims*1000.0 !we save days in the year and microsec in the day ddate(2) = ims*1000.0 !we save days in the year and microsec in the day
ddate(1) = 1.*idoy ddate(1) = 1.*idoy
call setLineSequential(auxAccessor,ddate) call setLineSequential(auxAccessor,ddate)
@ -144,3 +142,9 @@ c print *,val
return return
end end
integer*4 function unpackBytes(i1, i2, i3, i4)
integer*4 i1, i2, i3, i4
unpackBytes = iand(i1, 255)*256*256*256 + iand(i2, 255)*256*256 +
$ iand(i3, 255)*256 + iand(i4, 255)
end function

View File

@ -112,7 +112,8 @@ createResampleSlc = _factory("runResampleSlc")
createResampleSubbandSlc = _factory("runResampleSubbandSlc") createResampleSubbandSlc = _factory("runResampleSubbandSlc")
createRefineSlaveTiming = _factory("runRefineSlaveTiming") createRefineSlaveTiming = _factory("runRefineSlaveTiming")
createDenseOffsets = _factory("runDenseOffsets") createDenseOffsets = _factory("runDenseOffsets")
createRubbersheet = _factory("runRubbersheet") createRubbersheetAzimuth = _factory("runRubbersheetAzimuth") # Modified by V. Brancato (10.07.2019)
createRubbersheetRange = _factory("runRubbersheetRange") # Modified by V. Brancato (10.07.2019)
createInterferogram = _factory("runInterferogram") createInterferogram = _factory("runInterferogram")
createCoherence = _factory("runCoherence") createCoherence = _factory("runCoherence")
createFilter = _factory("runFilter") createFilter = _factory("runFilter")

View File

@ -49,7 +49,7 @@ listFiles = ['StripmapProc.py', 'runPreprocessor.py', 'runSplitSpectrum.py',
'Factories.py' , 'runDenseOffsets.py', 'runResampleSlc.py' , 'runUnwrapGrass.py', 'Factories.py' , 'runDenseOffsets.py', 'runResampleSlc.py' , 'runUnwrapGrass.py',
'__init__.py' , 'runDispersive.py' , 'runResampleSubbandSlc.py', 'runUnwrapIcu.py', '__init__.py' , 'runDispersive.py' , 'runResampleSubbandSlc.py', 'runUnwrapIcu.py',
'runFilter.py' , 'runROI.py' , 'runUnwrapSnaphu.py', 'runCrop.py', 'runFilter.py' , 'runROI.py' , 'runUnwrapSnaphu.py', 'runCrop.py',
'runGeo2rdr.py', 'runRubbersheet.py', '__StripmapProc.py' , 'runInterferogram.py', 'runGeo2rdr.py', 'runRubbersheetRange.py', 'runRubbersheetAzimuth.py', '__StripmapProc.py' , 'runInterferogram.py',
'runVerifyDEM.py', 'runGeocode.py', 'Sensor.py' 'runVerifyDEM.py', 'runGeocode.py', 'Sensor.py'
] ]

View File

@ -325,14 +325,21 @@ AZIMUTH_OFFSET_FILENAME = Component.Parameter('azimuthOffsetFilename',
doc='') doc='')
# Modified by V. Brancato 10.07.2019
AZIMUTH_RUBBERSHEET_FILENAME = Component.Parameter('azimuthRubbersheetFilename', AZIMUTH_RUBBERSHEET_FILENAME = Component.Parameter('azimuthRubbersheetFilename',
public_name='azimuth Rubbersheet Image Name', public_name='azimuth Rubbersheet Image Name',
default = 'azimuth_sheet.off', default = 'azimuth_sheet.off',
type=str, type=str,
mandatory=False, mandatory=False,
doc='') doc='')
RANGE_RUBBERSHEET_FILENAME = Component.Parameter('rangeRubbersheetFilename',
public_name='range Rubbersheet Image Name',
default = 'range_sheet.off',
type=str,
mandatory=False,
doc='')
# End of modification
MISREG_FILENAME = Component.Parameter('misregFilename', MISREG_FILENAME = Component.Parameter('misregFilename',
public_name='misreg file name', public_name='misreg file name',
default='misreg', default='misreg',
@ -346,14 +353,21 @@ DENSE_OFFSET_FILENAME = Component.Parameter('denseOffsetFilename',
type=str, type=str,
mandatory=False, mandatory=False,
doc='file name of dense offsets computed from cross correlating two SLC images') doc='file name of dense offsets computed from cross correlating two SLC images')
# Modified by V. Brancato 10.07.2019
FILT_AZIMUTH_OFFSET_FILENAME = Component.Parameter('filtAzimuthOffsetFilename', FILT_AZIMUTH_OFFSET_FILENAME = Component.Parameter('filtAzimuthOffsetFilename',
public_name='filtered azimuth offset filename', public_name='filtered azimuth offset filename',
default='filtAzimuth.off', default='filtAzimuth.off',
type=str, type=str,
mandatory=False, mandatory=False,
doc='Filtered azimuth dense offsets') doc='Filtered azimuth dense offsets')
FILT_RANGE_OFFSET_FILENAME = Component.Parameter('filtRangeOffsetFilename',
public_name='filtered range offset filename',
default='filtRange.off',
type=str,
mandatory=False,
doc='Filtered range dense offsets')
# End of modification
DISPERSIVE_FILENAME = Component.Parameter('dispersiveFilename', DISPERSIVE_FILENAME = Component.Parameter('dispersiveFilename',
public_name = 'dispersive phase filename', public_name = 'dispersive phase filename',
default='dispersive.bil', default='dispersive.bil',
@ -470,8 +484,10 @@ class StripmapProc(Component, FrameMixin):
LOS_FILENAME, LOS_FILENAME,
RANGE_OFFSET_FILENAME, RANGE_OFFSET_FILENAME,
AZIMUTH_OFFSET_FILENAME, AZIMUTH_OFFSET_FILENAME,
AZIMUTH_RUBBERSHEET_FILENAME, AZIMUTH_RUBBERSHEET_FILENAME, # Added by V. Brancato 10.07.2019
FILT_AZIMUTH_OFFSET_FILENAME, RANGE_RUBBERSHEET_FILENAME, # Added by V. Brancato 10.07.2019
FILT_AZIMUTH_OFFSET_FILENAME, # Added by V. Brancato 10.07.2019
FILT_RANGE_OFFSET_FILENAME, # Added by V. Brancato 10.07.2019
DENSE_OFFSET_FILENAME, DENSE_OFFSET_FILENAME,
MISREG_FILENAME, MISREG_FILENAME,
DISPERSIVE_FILENAME, DISPERSIVE_FILENAME,

View File

@ -1,14 +1,73 @@
# #
# Author: Heresh Fattahi, 2017 # Author: Heresh Fattahi, 2017
# # Modified by V. Brancato (10.2019)
# (Included flattening when rubbersheeting in range is turned on
import isceobj import isceobj
import logging import logging
from components.stdproc.stdproc import crossmul from components.stdproc.stdproc import crossmul
from iscesys.ImageUtil.ImageUtil import ImageUtil as IU from iscesys.ImageUtil.ImageUtil import ImageUtil as IU
import os import os
import gdal
import numpy as np
logger = logging.getLogger('isce.insar.runInterferogram') logger = logging.getLogger('isce.insar.runInterferogram')
# Added by V. Brancato 10.09.2019
def write_xml(fileName,width,length,bands,dataType,scheme):
img = isceobj.createImage()
img.setFilename(fileName)
img.setWidth(width)
img.setLength(length)
img.setAccessMode('READ')
img.bands = bands
img.dataType = dataType
img.scheme = scheme
img.renderHdr()
img.renderVRT()
return None
def compute_FlatEarth(self,ifgFilename,width,length,radarWavelength):
from imageMath import IML
import logging
# If rubbersheeting has been performed add back the range sheet offsets
info = self._insar.loadProduct(self._insar.slaveSlcCropProduct)
#radarWavelength = info.getInstrument().getRadarWavelength()
rangePixelSize = info.getInstrument().getRangePixelSize()
fact = 4 * np.pi* rangePixelSize / radarWavelength
cJ = np.complex64(-1j)
# Open the range sheet offset
rngOff = os.path.join(self.insar.offsetsDirname, self.insar.rangeOffsetFilename )
print(rngOff)
if os.path.exists(rngOff):
rng2 = np.memmap(rngOff, dtype=np.float64, mode='r', shape=(length,width))
else:
print('No range offsets provided')
rng2 = np.zeros((length,width))
# Open the interferogram
#ifgFilename= os.path.join(self.insar.ifgDirname, self.insar.ifgFilename)
intf = np.memmap(ifgFilename+'.full',dtype=np.complex64,mode='r+',shape=(length,width))
for ll in range(length):
intf[ll,:] *= np.exp(cJ*fact*rng2[ll,:])
del rng2
del intf
return
def multilook(infile, outname=None, alks=5, rlks=15): def multilook(infile, outname=None, alks=5, rlks=15):
''' '''
Take looks. Take looks.
@ -66,8 +125,9 @@ def computeCoherence(slc1name, slc2name, corname, virtual=True):
slc2.finalizeImage() slc2.finalizeImage()
return return
# Modified by V. Brancato on 10.09.2019 (added self)
def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks): # Modified by V. Brancato on 11.13.2019 (added radar wavelength for low and high band flattening
def generateIgram(self,imageSlc1, imageSlc2, resampName, azLooks, rgLooks,radarWavelength):
objSlc1 = isceobj.createSlcImage() objSlc1 = isceobj.createSlcImage()
IU.copyAttributes(imageSlc1, objSlc1) IU.copyAttributes(imageSlc1, objSlc1)
objSlc1.setAccessMode('read') objSlc1.setAccessMode('read')
@ -79,8 +139,13 @@ def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
objSlc2.createImage() objSlc2.createImage()
slcWidth = imageSlc1.getWidth() slcWidth = imageSlc1.getWidth()
intWidth = int(slcWidth / rgLooks)
if not self.doRubbersheetingRange:
intWidth = int(slcWidth/rgLooks) # Modified by V. Brancato intWidth = int(slcWidth / rgLooks)
else:
intWidth = int(slcWidth)
lines = min(imageSlc1.getLength(), imageSlc2.getLength()) lines = min(imageSlc1.getLength(), imageSlc2.getLength())
if '.flat' in resampName: if '.flat' in resampName:
@ -93,7 +158,7 @@ def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
resampInt = resampName resampInt = resampName
objInt = isceobj.createIntImage() objInt = isceobj.createIntImage()
objInt.setFilename(resampInt) objInt.setFilename(resampInt+'.full')
objInt.setWidth(intWidth) objInt.setWidth(intWidth)
imageInt = isceobj.createIntImage() imageInt = isceobj.createIntImage()
IU.copyAttributes(objInt, imageInt) IU.copyAttributes(objInt, imageInt)
@ -101,28 +166,48 @@ def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
objInt.createImage() objInt.createImage()
objAmp = isceobj.createAmpImage() objAmp = isceobj.createAmpImage()
objAmp.setFilename(resampAmp) objAmp.setFilename(resampAmp+'.full')
objAmp.setWidth(intWidth) objAmp.setWidth(intWidth)
imageAmp = isceobj.createAmpImage() imageAmp = isceobj.createAmpImage()
IU.copyAttributes(objAmp, imageAmp) IU.copyAttributes(objAmp, imageAmp)
objAmp.setAccessMode('write') objAmp.setAccessMode('write')
objAmp.createImage() objAmp.createImage()
if not self.doRubbersheetingRange:
print('Rubbersheeting in range is off, interferogram is already flattened')
objCrossmul = crossmul.createcrossmul()
objCrossmul.width = slcWidth
objCrossmul.length = lines
objCrossmul.LooksDown = azLooks
objCrossmul.LooksAcross = rgLooks
objCrossmul = crossmul.createcrossmul() objCrossmul.crossmul(objSlc1, objSlc2, objInt, objAmp)
objCrossmul.width = slcWidth else:
objCrossmul.length = lines # Modified by V. Brancato 10.09.2019 (added option to add Range Rubber sheet Flat-earth back)
objCrossmul.LooksDown = azLooks print('Rubbersheeting in range is on, removing flat-Earth phase')
objCrossmul.LooksAcross = rgLooks objCrossmul = crossmul.createcrossmul()
objCrossmul.width = slcWidth
objCrossmul.crossmul(objSlc1, objSlc2, objInt, objAmp) objCrossmul.length = lines
objCrossmul.LooksDown = 1
objCrossmul.LooksAcross = 1
objCrossmul.crossmul(objSlc1, objSlc2, objInt, objAmp)
# Remove Flat-Earth component
compute_FlatEarth(self,resampInt,intWidth,lines,radarWavelength)
# Perform Multilook
multilook(resampInt+'.full', outname=resampInt, alks=azLooks, rlks=rgLooks) #takeLooks(objAmp,azLooks,rgLooks)
multilook(resampAmp+'.full', outname=resampAmp, alks=azLooks, rlks=rgLooks) #takeLooks(objInt,azLooks,rgLooks)
#os.system('rm ' + resampInt+'.full* ' + resampAmp + '.full* ')
# End of modification
for obj in [objInt, objAmp, objSlc1, objSlc2]: for obj in [objInt, objAmp, objSlc1, objSlc2]:
obj.finalizeImage() obj.finalizeImage()
return imageInt, imageAmp return imageInt, imageAmp
def subBandIgram(self, masterSlc, slaveSlc, subBandDir): def subBandIgram(self, masterSlc, slaveSlc, subBandDir,radarWavelength):
img1 = isceobj.createImage() img1 = isceobj.createImage()
img1.load(masterSlc + '.xml') img1.load(masterSlc + '.xml')
@ -142,7 +227,7 @@ def subBandIgram(self, masterSlc, slaveSlc, subBandDir):
interferogramName = os.path.join(ifgDir , self.insar.ifgFilename) interferogramName = os.path.join(ifgDir , self.insar.ifgFilename)
generateIgram(img1, img2, interferogramName, azLooks, rgLooks) generateIgram(self,img1, img2, interferogramName, azLooks, rgLooks,radarWavelength)
return interferogramName return interferogramName
@ -175,9 +260,9 @@ def runSubBandInterferograms(self):
slaveHighBandSlc = os.path.join(coregDir , os.path.basename(slaveSlc)) slaveHighBandSlc = os.path.join(coregDir , os.path.basename(slaveSlc))
########## ##########
interferogramName = subBandIgram(self, masterLowBandSlc, slaveLowBandSlc, self.insar.lowBandSlcDirname) interferogramName = subBandIgram(self, masterLowBandSlc, slaveLowBandSlc, self.insar.lowBandSlcDirname,self.insar.lowBandRadarWavelength)
interferogramName = subBandIgram(self, masterHighBandSlc, slaveHighBandSlc, self.insar.highBandSlcDirname) interferogramName = subBandIgram(self, masterHighBandSlc, slaveHighBandSlc, self.insar.highBandSlcDirname,self.insar.highBandRadarWavelength)
def runFullBandInterferogram(self): def runFullBandInterferogram(self):
logger.info("Generating interferogram") logger.info("Generating interferogram")
@ -185,7 +270,7 @@ def runFullBandInterferogram(self):
masterFrame = self._insar.loadProduct( self._insar.masterSlcCropProduct) masterFrame = self._insar.loadProduct( self._insar.masterSlcCropProduct)
masterSlc = masterFrame.getImage().filename masterSlc = masterFrame.getImage().filename
if self.doRubbersheeting: if (self.doRubbersheetingRange | self.doRubbersheetingAzimuth):
slaveSlc = os.path.join(self._insar.coregDirname, self._insar.fineCoregFilename) slaveSlc = os.path.join(self._insar.coregDirname, self._insar.fineCoregFilename)
else: else:
slaveSlc = os.path.join(self._insar.coregDirname, self._insar.refinedCoregFilename) slaveSlc = os.path.join(self._insar.coregDirname, self._insar.refinedCoregFilename)
@ -211,8 +296,11 @@ def runFullBandInterferogram(self):
os.makedirs(ifgDir) os.makedirs(ifgDir)
interferogramName = os.path.join(ifgDir , self.insar.ifgFilename) interferogramName = os.path.join(ifgDir , self.insar.ifgFilename)
generateIgram(img1, img2, interferogramName, azLooks, rgLooks) info = self._insar.loadProduct(self._insar.slaveSlcCropProduct)
radarWavelength = info.getInstrument().getRadarWavelength()
generateIgram(self,img1, img2, interferogramName, azLooks, rgLooks,radarWavelength)
###Compute coherence ###Compute coherence
@ -221,7 +309,7 @@ def runFullBandInterferogram(self):
multilook(cohname+'.full', outname=cohname, alks=azLooks, rlks=rgLooks) multilook(cohname+'.full', outname=cohname, alks=azLooks, rlks=rgLooks)
###Multilook relevant geometry products ##Multilook relevant geometry products
for fname in [self.insar.latFilename, self.insar.lonFilename, self.insar.losFilename]: for fname in [self.insar.latFilename, self.insar.lonFilename, self.insar.losFilename]:
inname = os.path.join(self.insar.geometryDirname, fname) inname = os.path.join(self.insar.geometryDirname, fname)
multilook(inname + '.full', outname= inname, alks=azLooks, rlks=rgLooks) multilook(inname + '.full', outname= inname, alks=azLooks, rlks=rgLooks)

View File

@ -23,7 +23,7 @@ def runResampleSlc(self, kind='coarse'):
raise Exception('Unknown operation type {0} in runResampleSlc'.format(kind)) raise Exception('Unknown operation type {0} in runResampleSlc'.format(kind))
if kind == 'fine': if kind == 'fine':
if not self.doRubbersheeting: if not (self.doRubbersheetingRange | self.doRubbersheetingAzimuth): # Modified by V. Brancato 10.10.2019
print('Rubber sheeting not requested, skipping resampling ....') print('Rubber sheeting not requested, skipping resampling ....')
return return
@ -68,12 +68,25 @@ def runResampleSlc(self, kind='coarse'):
#Since the app is based on geometry module we expect pixel-by-pixel offset #Since the app is based on geometry module we expect pixel-by-pixel offset
#field #field
offsetsDir = self.insar.offsetsDirname offsetsDir = self.insar.offsetsDirname
rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename)
# Modified by V. Brancato 10.10.2019
#rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename)
if kind in ['coarse', 'refined']: if kind in ['coarse', 'refined']:
azname = os.path.join(offsetsDir, self.insar.azimuthOffsetFilename) azname = os.path.join(offsetsDir, self.insar.azimuthOffsetFilename)
rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename)
else: else:
azname = os.path.join(offsetsDir, self.insar.azimuthRubbersheetFilename) azname = os.path.join(offsetsDir, self.insar.azimuthRubbersheetFilename)
if self.doRubbersheetingRange:
print('Rubbersheeting in range is turned on, taking the cross-correlation offsets')
print('Setting Flattening to False')
rgname = os.path.join(offsetsDir, self.insar.rangeRubbersheetFilename)
flatten=False
else:
print('Rubbersheeting in range is turned off, taking range geometric offsets')
rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename)
flatten=True
rngImg = isceobj.createImage() rngImg = isceobj.createImage()
rngImg.load(rgname + '.xml') rngImg.load(rgname + '.xml')
rngImg.setAccessMode('READ') rngImg.setAccessMode('READ')
@ -85,8 +98,8 @@ def runResampleSlc(self, kind='coarse'):
width = rngImg.getWidth() width = rngImg.getWidth()
length = rngImg.getLength() length = rngImg.getLength()
# Modified by V. Brancato 10.10.2019
flatten = True #flatten = True
rObj.flatten = flatten rObj.flatten = flatten
rObj.outputWidth = width rObj.outputWidth = width
rObj.outputLines = length rObj.outputLines = length

View File

@ -14,7 +14,8 @@ import shelve
logger = logging.getLogger('isce.insar.runResampleSubbandSlc') logger = logging.getLogger('isce.insar.runResampleSubbandSlc')
def resampleSlc(masterFrame, slaveFrame, imageSlc2, radarWavelength, coregDir, # Modified by V. Brancato 10.14.2019 added "self" as input parameter of resampleSLC
def resampleSlc(self,masterFrame, slaveFrame, imageSlc2, radarWavelength, coregDir,
azoffname, rgoffname, azpoly = None, rgpoly = None, misreg=False): azoffname, rgoffname, azpoly = None, rgpoly = None, misreg=False):
logger.info("Resampling slave SLC") logger.info("Resampling slave SLC")
@ -56,8 +57,17 @@ def resampleSlc(masterFrame, slaveFrame, imageSlc2, radarWavelength, coregDir,
width = rngImg.getWidth() width = rngImg.getWidth()
length = rngImg.getLength() length = rngImg.getLength()
# Modified by V. Brancato on 10.14.2019 (if Rubbersheeting in range is turned on, flatten the interferogram during cross-correlation)
flatten = True if not self.doRubbersheetingRange:
print('Rubber sheeting in range is turned off, flattening the interferogram during resampling')
flatten = True
print(flatten)
else:
print('Rubber sheeting in range is turned on, flattening the interferogram during interferogram formation')
flatten=False
print(flatten)
# end of Modification
rObj.flatten = flatten rObj.flatten = flatten
rObj.outputWidth = width rObj.outputWidth = width
rObj.outputLines = length rObj.outputLines = length
@ -105,15 +115,25 @@ def runResampleSubbandSlc(self, misreg=False):
masterFrame = self._insar.loadProduct( self._insar.masterSlcCropProduct) masterFrame = self._insar.loadProduct( self._insar.masterSlcCropProduct)
slaveFrame = self._insar.loadProduct( self._insar.slaveSlcCropProduct) slaveFrame = self._insar.loadProduct( self._insar.slaveSlcCropProduct)
if self.doRubbersheeting: # Modified by V. Brancato 10.14.2019
print('Using rubber sheeted offsets for resampling sub-bands')
if self.doRubbersheetingAzimuth:
print('Using rubber in azimuth sheeted offsets for resampling sub-bands')
azoffname = os.path.join( self.insar.offsetsDirname, self.insar.azimuthRubbersheetFilename) azoffname = os.path.join( self.insar.offsetsDirname, self.insar.azimuthRubbersheetFilename)
else: else:
print('Using refined offsets for resampling sub-bands') print('Using refined offsets for resampling sub-bands')
azoffname = os.path.join( self.insar.offsetsDirname, self.insar.azimuthOffsetFilename) azoffname = os.path.join( self.insar.offsetsDirname, self.insar.azimuthOffsetFilename)
rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeOffsetFilename) if self.doRubbersheetingRange:
print('Using rubber in range sheeted offsets for resampling sub-bands')
rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeRubbersheetFilename)
else:
print('Using refined offsets for resampling sub-bands')
rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeOffsetFilename)
# ****************** End of Modification
# rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeOffsetFilename)
azpoly = self.insar.loadProduct( os.path.join(self.insar.misregDirname, self.insar.misregFilename) + '_az.xml') azpoly = self.insar.loadProduct( os.path.join(self.insar.misregDirname, self.insar.misregFilename) + '_az.xml')
rgpoly = self.insar.loadProduct( os.path.join(self.insar.misregDirname, self.insar.misregFilename) + '_rg.xml') rgpoly = self.insar.loadProduct( os.path.join(self.insar.misregDirname, self.insar.misregFilename) + '_rg.xml')
@ -124,7 +144,7 @@ def runResampleSubbandSlc(self, misreg=False):
wvlL = self.insar.lowBandRadarWavelength wvlL = self.insar.lowBandRadarWavelength
coregDir = os.path.join(self.insar.coregDirname, self.insar.lowBandSlcDirname) coregDir = os.path.join(self.insar.coregDirname, self.insar.lowBandSlcDirname)
lowbandCoregFilename = resampleSlc(masterFrame, slaveFrame, imageSlc2, wvlL, coregDir, lowbandCoregFilename = resampleSlc(self,masterFrame, slaveFrame, imageSlc2, wvlL, coregDir,
azoffname, rgoffname, azpoly=azpoly, rgpoly=rgpoly,misreg=False) azoffname, rgoffname, azpoly=azpoly, rgpoly=rgpoly,misreg=False)
imageSlc2 = os.path.join(self.insar.splitSpectrumDirname, self.insar.highBandSlcDirname, imageSlc2 = os.path.join(self.insar.splitSpectrumDirname, self.insar.highBandSlcDirname,
@ -132,7 +152,7 @@ def runResampleSubbandSlc(self, misreg=False):
wvlH = self.insar.highBandRadarWavelength wvlH = self.insar.highBandRadarWavelength
coregDir = os.path.join(self.insar.coregDirname, self.insar.highBandSlcDirname) coregDir = os.path.join(self.insar.coregDirname, self.insar.highBandSlcDirname)
highbandCoregFilename = resampleSlc(masterFrame, slaveFrame, imageSlc2, wvlH, coregDir, highbandCoregFilename = resampleSlc(self,masterFrame, slaveFrame, imageSlc2, wvlH, coregDir,
azoffname, rgoffname, azpoly=azpoly, rgpoly=rgpoly, misreg=False) azoffname, rgoffname, azpoly=azpoly, rgpoly=rgpoly, misreg=False)
self.insar.lowBandSlc2 = lowbandCoregFilename self.insar.lowBandSlc2 = lowbandCoregFilename

View File

@ -168,6 +168,7 @@ def runRubbersheet(self):
# filtAzOffsetFile to it. # filtAzOffsetFile to it.
resampleOffset(filtAzOffsetFile, geometryAzimuthOffset, sheetOffset) resampleOffset(filtAzOffsetFile, geometryAzimuthOffset, sheetOffset)
print("I'm here")
return None return None

View File

@ -0,0 +1,276 @@
#
# Author: Heresh Fattahi
# Copyright 2017
#
# Modified by V. Brancato
# Included offset filtering with no SNR
#
import isce
import isceobj
from osgeo import gdal
from scipy import ndimage
from astropy.convolution import convolve
import numpy as np
import os
def mask_filterNoSNR(denseOffsetFile,filterSize,outName):
# Masking the offsets with a data-based approach
# Open the offsets
ds = gdal.Open(denseOffsetFile+'.vrt',gdal.GA_ReadOnly)
off_az = ds.GetRasterBand(1).ReadAsArray()
off_rg = ds.GetRasterBand(2).ReadAsArray()
ds = None
# Remove missing values from ampcor
off_rg[np.where(off_rg < -9999)]=0
off_az[np.where(off_az < -9999)]=0
# Store the offsets in a complex variable
off = off_rg + 1j*off_az
# Mask the azimuth offsets based on the MAD
mask = off_masking(off,filterSize,thre=3)
xoff_masked = np.ma.array(off.real,mask=mask)
yoff_masked = np.ma.array(off.imag,mask=mask)
# Delete unused variables
mask = None
off = None
# Remove residual noisy spots with a median filter on the azimuth offmap
yoff_masked.mask = yoff_masked.mask | \
(ndimage.median_filter(xoff_masked.filled(fill_value=0),3) == 0) | \
(ndimage.median_filter(yoff_masked.filled(fill_value=0),3) == 0)
# Fill the data by iteratively using smoothed values
data = yoff_masked.data
data[yoff_masked.mask]=np.nan
off_az_filled = fill_with_smoothed(data,filterSize)
# Apply median filter to smooth the azimuth offset map
off_az_filled = ndimage.median_filter(off_az_filled,filterSize)
# Save the filtered offsets
length, width = off_az_filled.shape
# writing the masked and filtered offsets to a file
print ('writing masked and filtered offsets to: ', outName)
##Write array to offsetfile
off_az_filled.tofile(outName)
# write the xml file
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'FLOAT'
img.scheme = 'BIP'
img.renderHdr()
return
def off_masking(off,filterSize,thre=2):
# Define the mask to fill the offsets
vram = ndimage.median_filter(off.real, filterSize)
vazm = ndimage.median_filter(off.imag, filterSize)
mask = (np.abs(off.real-vram) > thre) | (np.abs(off.imag-vazm) > thre) | (off.imag == 0) | (off.real == 0)
return mask
def fill(data, invalid=None):
"""
Replace the value of invalid 'data' cells (indicated by 'invalid')
by the value of the nearest valid data cell
Input:
data: numpy array of any dimension
invalid: a binary array of same shape as 'data'.
data value are replaced where invalid is True
If None (default), use: invalid = np.isnan(data)
Output:
Return a filled array.
"""
if invalid is None: invalid = np.isnan(data)
ind = ndimage.distance_transform_edt(invalid,
return_distances=False,
return_indices=True)
return data[tuple(ind)]
def mask_filter(denseOffsetFile, snrFile, band, snrThreshold, filterSize, outName):
#masking and Filtering
##Read in the offset file
ds = gdal.Open(denseOffsetFile + '.vrt', gdal.GA_ReadOnly)
Offset = ds.GetRasterBand(band).ReadAsArray()
ds = None
##Read in the SNR file
ds = gdal.Open(snrFile + '.vrt', gdal.GA_ReadOnly)
snr = ds.GetRasterBand(1).ReadAsArray()
ds = None
# Masking the dense offsets based on SNR
print ('masking the dense offsets with SNR threshold: ', snrThreshold)
Offset[snr<snrThreshold]=np.nan
# Fill the masked region using valid neighboring pixels
Offset = fill(Offset)
############
# Median filtering the masked offsets
print ('Filtering with median filter with size : ', filterSize)
Offset = ndimage.median_filter(Offset, size=filterSize)
length, width = Offset.shape
# writing the masked and filtered offsets to a file
print ('writing masked and filtered offsets to: ', outName)
##Write array to offsetfile
Offset.tofile(outName)
# write the xml file
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'FLOAT'
img.scheme = 'BIP'
img.renderHdr()
return None
def fill_with_smoothed(off,filterSize):
off_2filt=np.copy(off)
kernel = np.ones((filterSize,filterSize),np.float32)/(filterSize*filterSize)
loop = 0
cnt2=1
while (cnt2!=0 & loop<100):
loop += 1
idx2= np.isnan(off_2filt)
cnt2 = np.sum(np.count_nonzero(np.isnan(off_2filt)))
print(cnt2)
if cnt2 != 0:
off_filt= convolve(off_2filt,kernel,boundary='extend',nan_treatment='interpolate')
off_2filt[idx2]=off_filt[idx2]
idx3 = np.where(off_filt == 0)
off_2filt[idx3]=np.nan
off_filt=None
return off_2filt
def resampleOffset(maskedFiltOffset, geometryOffset, outName):
'''
Oversample offset and add.
'''
from imageMath import IML
import logging
resampledOffset = maskedFiltOffset + ".resampled"
inimg = isceobj.createImage()
inimg.load(geometryOffset + '.xml')
length = inimg.getLength()
width = inimg.getWidth()
###Currently making the assumption that top left of dense offsets and interfeorgrams are the same.
###This is not true for now. We need to update DenseOffsets to have the ability to have same top left
###As the input images. Once that is implemente, the math here should all be consistent.
###However, this is not too far off since the skip for doing dense offsets is generally large.
###The offset is not too large to worry about right now. If the skip is decreased, this could be an issue.
print('oversampling the filtered and masked offsets to the width and length:', width, ' ', length )
cmd = 'gdal_translate -of ENVI -ot Float64 -outsize ' + str(width) + ' ' + str(length) + ' ' + maskedFiltOffset + '.vrt ' + resampledOffset
print(cmd)
os.system(cmd)
img = isceobj.createImage()
img.setFilename(resampledOffset)
img.setWidth(width)
img.setLength(length)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'DOUBLE'
img.scheme = 'BIP'
img.renderHdr()
###Adding the geometry offset and oversampled offset
geomoff = IML.mmapFromISCE(geometryOffset, logging)
osoff = IML.mmapFromISCE(resampledOffset, logging)
fid = open(outName, 'w')
for ll in range(length):
val = geomoff.bands[0][ll,:] + osoff.bands[0][ll,:]
val.tofile(fid)
fid.close()
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setLength(length)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'DOUBLE'
img.scheme = 'BIP'
img.renderHdr()
return None
def runRubbersheetAzimuth(self):
if not self.doRubbersheetingAzimuth:
print('Rubber sheeting in azimuth not requested ... skipping')
return
# denseOffset file name computeed from cross-correlation
denseOffsetFile = os.path.join(self.insar.denseOffsetsDirname , self.insar.denseOffsetFilename)
snrFile = denseOffsetFile + "_snr.bil"
denseOffsetFile = denseOffsetFile + ".bil"
# we want the azimuth offsets only which are the first band
band = [1]
snrThreshold = self.rubberSheetSNRThreshold
filterSize = self.rubberSheetFilterSize
filtAzOffsetFile = os.path.join(self.insar.denseOffsetsDirname, self._insar.filtAzimuthOffsetFilename)
# masking and median filtering the dense offsets
if not self.doRubbersheetingRange:
print('Rubber sheeting in range is off, filtering the offsets with a SNR-based mask')
mask_filter(denseOffsetFile, snrFile, band[0], snrThreshold, filterSize, filtAzOffsetFile)
else:
print('Rubber sheeting in range is on, filtering the offsets with data-based mask')
mask_filterNoSNR(denseOffsetFile, filterSize, filtAzOffsetFile)
# azimuth offsets computed from geometry
offsetsDir = self.insar.offsetsDirname
geometryAzimuthOffset = os.path.join(offsetsDir, self.insar.azimuthOffsetFilename)
sheetOffset = os.path.join(offsetsDir, self.insar.azimuthRubbersheetFilename)
# oversampling the filtAzOffsetFile to the same size of geometryAzimuthOffset
# and then update the geometryAzimuthOffset by adding the oversampled
# filtAzOffsetFile to it.
resampleOffset(filtAzOffsetFile, geometryAzimuthOffset, sheetOffset)
return None

View File

@ -0,0 +1,279 @@
#
# Author: Heresh Fattahi
# Copyright 2017
#
# Modified by V. Brancato (10.12.2019)
# Including offset filtering with no SNR masking
#
import isce
import isceobj
from osgeo import gdal
from scipy import ndimage
import numpy as np
import os
from astropy.convolution import convolve
def mask_filterNoSNR(denseOffsetFile,filterSize,outName):
# Masking the offsets with a data-based approach
# Open the offsets
ds = gdal.Open(denseOffsetFile+'.vrt',gdal.GA_ReadOnly)
off_az = ds.GetRasterBand(1).ReadAsArray()
off_rg = ds.GetRasterBand(2).ReadAsArray()
ds = None
# Remove values reported as missing data (no value data from ampcor)
off_rg[np.where(off_rg < -9999)]=0
off_az[np.where(off_az < -9999)]=0
# Store the offsets in a complex variable
off = off_rg + 1j*off_az
# Mask the offset based on MAD
mask = off_masking(off,filterSize,thre=3)
xoff_masked = np.ma.array(off.real,mask=mask)
yoff_masked = np.ma.array(off.imag,mask=mask)
# Delete not used variables
mask = None
off = None
# Remove residual noisy spots with a median filter on the range offmap
xoff_masked.mask = xoff_masked.mask | \
(ndimage.median_filter(xoff_masked.filled(fill_value=0),3) == 0) | \
(ndimage.median_filter(yoff_masked.filled(fill_value=0),3) == 0)
# Fill the range offset map iteratively with smoothed values
data = xoff_masked.data
data[xoff_masked.mask]=np.nan
off_rg_filled = fill_with_smoothed(data,filterSize)
# Apply the median filter on the offset
off_rg_filled = ndimage.median_filter(off_rg_filled,filterSize)
# Save the filtered offsets
length, width = off_rg_filled.shape
# writing the masked and filtered offsets to a file
print ('writing masked and filtered offsets to: ', outName)
##Write array to offsetfile
off_rg_filled.tofile(outName)
# write the xml file
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'FLOAT'
img.scheme = 'BIP'
img.renderHdr()
return
def off_masking(off,filterSize,thre=2):
vram = ndimage.median_filter(off.real, filterSize)
vazm = ndimage.median_filter(off.imag, filterSize)
mask = (np.abs(off.real-vram) > thre) | (np.abs(off.imag-vazm) > thre) | (off.imag == 0) | (off.real == 0)
return mask
def fill(data, invalid=None):
"""
Replace the value of invalid 'data' cells (indicated by 'invalid')
by the value of the nearest valid data cell
Input:
data: numpy array of any dimension
invalid: a binary array of same shape as 'data'.
data value are replaced where invalid is True
If None (default), use: invalid = np.isnan(data)
Output:
Return a filled array.
"""
if invalid is None: invalid = np.isnan(data)
ind = ndimage.distance_transform_edt(invalid,
return_distances=False,
return_indices=True)
return data[tuple(ind)]
def fill_with_smoothed(off,filterSize):
off_2filt=np.copy(off)
kernel = np.ones((filterSize,filterSize),np.float32)/(filterSize*filterSize)
loop = 0
cnt2=1
while (cnt2 !=0 & loop<100):
loop += 1
idx2= np.isnan(off_2filt)
cnt2 = np.sum(np.count_nonzero(np.isnan(off_2filt)))
print(cnt2)
if cnt2 != 0:
off_filt= convolve(off_2filt,kernel,boundary='extend',nan_treatment='interpolate')
off_2filt[idx2]=off_filt[idx2]
idx3 = np.where(off_filt == 0)
off_2filt[idx3]=np.nan
off_filt=None
return off_2filt
def mask_filter(denseOffsetFile, snrFile, band, snrThreshold, filterSize, outName):
#masking and Filtering
##Read in the offset file
ds = gdal.Open(denseOffsetFile + '.vrt', gdal.GA_ReadOnly)
Offset = ds.GetRasterBand(band).ReadAsArray()
ds = None
##Read in the SNR file
ds = gdal.Open(snrFile + '.vrt', gdal.GA_ReadOnly)
snr = ds.GetRasterBand(1).ReadAsArray()
ds = None
# Masking the dense offsets based on SNR
print ('masking the dense offsets with SNR threshold: ', snrThreshold)
Offset[snr<snrThreshold]=np.nan
# Fill the masked region using valid neighboring pixels
Offset = fill(Offset)
############
# Median filtering the masked offsets
print ('Filtering with median filter with size : ', filterSize)
Offset = ndimage.median_filter(Offset, size=filterSize)
length, width = Offset.shape
# writing the masked and filtered offsets to a file
print ('writing masked and filtered offsets to: ', outName)
##Write array to offsetfile
Offset.tofile(outName)
# write the xml file
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'FLOAT'
img.scheme = 'BIP'
img.renderHdr()
return None
def resampleOffset(maskedFiltOffset, geometryOffset, outName):
'''
Oversample offset and add.
'''
from imageMath import IML
import logging
resampledOffset = maskedFiltOffset + ".resampled"
inimg = isceobj.createImage()
inimg.load(geometryOffset + '.xml')
length = inimg.getLength()
width = inimg.getWidth()
###Currently making the assumption that top left of dense offsets and interfeorgrams are the same.
###This is not true for now. We need to update DenseOffsets to have the ability to have same top left
###As the input images. Once that is implemente, the math here should all be consistent.
###However, this is not too far off since the skip for doing dense offsets is generally large.
###The offset is not too large to worry about right now. If the skip is decreased, this could be an issue.
print('oversampling the filtered and masked offsets to the width and length:', width, ' ', length )
cmd = 'gdal_translate -of ENVI -ot Float64 -outsize ' + str(width) + ' ' + str(length) + ' ' + maskedFiltOffset + '.vrt ' + resampledOffset
print(cmd)
os.system(cmd)
img = isceobj.createImage()
img.setFilename(resampledOffset)
img.setWidth(width)
img.setLength(length)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'DOUBLE'
img.scheme = 'BIP'
img.renderHdr()
###Adding the geometry offset and oversampled offset
geomoff = IML.mmapFromISCE(geometryOffset, logging)
osoff = IML.mmapFromISCE(resampledOffset, logging)
fid = open(outName, 'w')
for ll in range(length):
val = geomoff.bands[0][ll,:] + osoff.bands[0][ll,:]
val.tofile(fid)
fid.close()
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setLength(length)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'DOUBLE'
img.scheme = 'BIP'
img.renderHdr()
return None
def runRubbersheetRange(self):
if not self.doRubbersheetingRange:
print('Rubber sheeting in azimuth not requested ... skipping')
return
# denseOffset file name computeed from cross-correlation
denseOffsetFile = os.path.join(self.insar.denseOffsetsDirname , self.insar.denseOffsetFilename)
snrFile = denseOffsetFile + "_snr.bil"
denseOffsetFile = denseOffsetFile + ".bil"
# we want the range offsets only which are the first band
band = [2]
snrThreshold = self.rubberSheetSNRThreshold
filterSize = self.rubberSheetFilterSize
filtRgOffsetFile = os.path.join(self.insar.denseOffsetsDirname, self._insar.filtRangeOffsetFilename)
# masking and median filtering the dense offsets
if not self.doRubbersheetingRange:
print('Rubber sheeting in range is off, applying SNR-masking for the offsets maps')
mask_filter(denseOffsetFile, snrFile, band[0], snrThreshold, filterSize, filtRgOffsetFile)
else:
print('Rubber sheeting in range is on, applying a data-based offsets-masking')
mask_filterNoSNR(denseOffsetFile,filterSize,filtRgOffsetFile)
# range offsets computed from geometry
offsetsDir = self.insar.offsetsDirname
geometryRangeOffset = os.path.join(offsetsDir, self.insar.rangeOffsetFilename)
RgsheetOffset = os.path.join(offsetsDir, self.insar.rangeRubbersheetFilename)
# oversampling the filtRgOffsetFile to the same size of geometryRangeOffset
# and then update the geometryRangeOffset by adding the oversampled
# filtRgOffsetFile to it.
resampleOffset(filtRgOffsetFile, geometryRangeOffset, RgsheetOffset)
return None

View File

@ -54,7 +54,7 @@ class snaphu(Component):
self.azimuthLooks = obj.insar.topo.numberAzimuthLooks self.azimuthLooks = obj.insar.topo.numberAzimuthLooks
azres = obj.insar.masterFrame.platform.antennaLength/2.0 azres = obj.insar.masterFrame.platform.antennaLength/2.0
azfact = obj.insar.topo.numberAzimuthLooks *azres / obj.insar.topo.azimuthSpacing azfact = azres / obj.insar.topo.azimuthSpacing
rBW = obj.insar.masterFrame.instrument.pulseLength * obj.insar.masterFrame.instrument.chirpSlope rBW = obj.insar.masterFrame.instrument.pulseLength * obj.insar.masterFrame.instrument.chirpSlope
rgres = abs(SPEED_OF_LIGHT / (2.0 * rBW)) rgres = abs(SPEED_OF_LIGHT / (2.0 * rBW))

View File

@ -54,7 +54,7 @@ class snaphu_mcf(Component):
self.azimuthLooks = obj.insar.topo.numberAzimuthLooks self.azimuthLooks = obj.insar.topo.numberAzimuthLooks
azres = obj.insar.masterFrame.platform.antennaLength/2.0 azres = obj.insar.masterFrame.platform.antennaLength/2.0
azfact = obj.insar.topo.numberAzimuthLooks *azres / obj.insar.topo.azimuthSpacing azfact = azres / obj.insar.topo.azimuthSpacing
rBW = obj.insar.masterFrame.instrument.pulseLength * obj.insar.masterFrame.instrument.chirpSlope rBW = obj.insar.masterFrame.instrument.pulseLength * obj.insar.masterFrame.instrument.chirpSlope
rgres = abs(SPEED_OF_LIGHT / (2.0 * rBW)) rgres = abs(SPEED_OF_LIGHT / (2.0 * rBW))

View File

@ -2,19 +2,19 @@
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Copyright 2010 California Institute of Technology. ALL RIGHTS RESERVED. # Copyright 2010 California Institute of Technology. ALL RIGHTS RESERVED.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
# United States Government Sponsorship acknowledged. This software is subject to # United States Government Sponsorship acknowledged. This software is subject to
# U.S. export control laws and regulations and has been classified as 'EAR99 NLR' # U.S. export control laws and regulations and has been classified as 'EAR99 NLR'
# (No [Export] License Required except when exporting to an embargoed country, # (No [Export] License Required except when exporting to an embargoed country,
@ -49,7 +49,7 @@ if envGPUampcor['GPU_ACC_ENABLED']:
build_base += "-ccbin " + envGPUampcor['NVCC_CCBIN'] + " " build_base += "-ccbin " + envGPUampcor['NVCC_CCBIN'] + " "
else: else:
print('Assuming default system compiler for nvcc.') print('Assuming default system compiler for nvcc.')
build_base += "-arch=sm_35 -shared -Xcompiler -fPIC -O3 " build_base += "-shared -Xcompiler -fPIC -O3 "
build_cmd = build_base + "-dc -m64 -o $TARGET -c $SOURCE" build_cmd = build_base + "-dc -m64 -o $TARGET -c $SOURCE"
built_path = os.path.join(build, 'gpu-ampcor.o') built_path = os.path.join(build, 'gpu-ampcor.o')
linked_path = os.path.join(build, 'gpu-ampcor-linked.o') linked_path = os.path.join(build, 'gpu-ampcor-linked.o')

View File

@ -1,2 +1,2 @@
nvcc -arch=sm_35 -Xcompiler -fPIC -o gpu-topo.o -c Topo.cu nvcc -Xcompiler -fPIC -o gpu-topo.o -c Topo.cu
cp -f gpu-topo.o .. cp -f gpu-topo.o ..

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
import os import os
@ -28,7 +28,7 @@ if envPyCuAmpcor['GPU_ACC_ENABLED']:
if not os.path.exists(initFile): if not os.path.exists(initFile):
with open(initFile, 'w') as fout: with open(initFile, 'w') as fout:
fout.write("#!/usr/bin/env python") fout.write("#!/usr/bin/env python3")
listFiles = [initFile] listFiles = [initFile]
envPyCuAmpcor.Install(install, listFiles) envPyCuAmpcor.Install(install, listFiles)

View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
#
# Test program to run ampcor with GPU
# For two GeoTiff images
#
import argparse
import numpy as np
from PyCuAmpcor import PyCuAmpcor
def main():
'''
main program
'''
objOffset = PyCuAmpcor() # create the processor
objOffset.algorithm = 0 # cross-correlation method 0=freq 1=time
objOffset.deviceID = 0 # GPU device id to be used
objOffset.nStreams = 2 # cudaStreams; multiple streams to overlap data transfer with gpu calculations
objOffset.masterImageName = "master.tif"
objOffset.masterImageHeight = 16480 # RasterYSize
objOffset.masterImageWidth = 17000 # RasterXSize
objOffset.slaveImageName = "slave.tif"
objOffset.slaveImageHeight = 16480
objOffset.slaveImageWidth = 17000
objOffset.windowSizeWidth = 64 # template window size
objOffset.windowSizeHeight = 64
objOffset.halfSearchRangeDown = 20 # search range
objOffset.halfSearchRangeAcross = 20
objOffset.derampMethod = 1 # deramping for complex signal, set to 1 for real images
objOffset.skipSampleDown = 128 # strides between windows
objOffset.skipSampleAcross = 64
# gpu processes several windows in one batch/Chunk
# total windows in Chunk = numberWindowDownInChunk*numberWindowAcrossInChunk
# the max number of windows depending on gpu memory and type
objOffset.numberWindowDownInChunk = 1
objOffset.numberWindowAcrossInChunk = 10
objOffset.corrSurfaceOverSamplingFactor = 8 # oversampling factor for correlation surface
objOffset.corrSurfaceZoomInWindow = 16 # area in correlation surface to be oversampled
objOffset.corrSufaceOverSamplingMethod = 1 # fft or sinc oversampler
objOffset.useMmap = 1 # default using memory map as buffer, if having troubles, set to 0
objOffset.mmapSize = 1 # mmap or buffer size used for transferring data from file to gpu, in GB
objOffset.numberWindowDown = 40 # number of windows to be processed
objOffset.numberWindowAcross = 100
# if to process the whole image; some math needs to be done
# margin = 0 # margins to be neglected
#objOffset.numberWindowDown = (objOffset.slaveImageHeight - 2*margin - 2*objOffset.halfSearchRangeDown - objOffset.windowSizeHeight) // objOffset.skipSampleDown
#objOffset.numberWindowAcross = (objOffset.slaveImageWidth - 2*margin - 2*objOffset.halfSearchRangeAcross - objOffset.windowSizeWidth) // objOffset.skipSampleAcross
objOffset.setupParams()
objOffset.masterStartPixelDownStatic = objOffset.halfSearchRangeDown # starting pixel offset
objOffset.masterStartPixelAcrossStatic = objOffset.halfSearchRangeDown
objOffset.setConstantGrossOffset(0, 0) # gross offset between master and slave images
objOffset.checkPixelInImageRange() # check whether there is something wrong with
objOffset.runAmpcor()
if __name__ == '__main__':

View File

@ -1,14 +1,14 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# #
# test_cuAmpcor.py # test_cuAmpcor.py
# Test program to run ampcor with GPU # Test program to run ampcor with GPU
# #
# #
import argparse import argparse
import numpy as np import numpy as np
#from PyCuAmpcor import PyCuAmpcor from PyCuAmpcor import PyCuAmpcor
from isce.components.contrib.PyCuAmpcor import PyCuAmpcor
def main(): def main():
''' '''
@ -20,10 +20,10 @@ def main():
objOffset.algorithm = 0 objOffset.algorithm = 0
objOffset.deviceID = 0 # -1:let system find the best GPU objOffset.deviceID = 0 # -1:let system find the best GPU
objOffset.nStreams = 2 #cudaStreams objOffset.nStreams = 2 #cudaStreams
objOffset.masterImageName = "master.slc" objOffset.masterImageName = "20131213.slc.vrt"
objOffset.masterImageHeight = 43008 objOffset.masterImageHeight = 43008
objOffset.masterImageWidth = 24320 objOffset.masterImageWidth = 24320
objOffset.slaveImageName = "slave.slc" objOffset.slaveImageName = "20131221.slc.vrt"
objOffset.slaveImageHeight = 43008 objOffset.slaveImageHeight = 43008
objOffset.slaveImageWidth = 24320 objOffset.slaveImageWidth = 24320
objOffset.windowSizeWidth = 64 objOffset.windowSizeWidth = 64
@ -38,8 +38,9 @@ def main():
objOffset.numberWindowDownInChunk = 10 objOffset.numberWindowDownInChunk = 10
objOffset.numberWindowAcrossInChunk = 10 objOffset.numberWindowAcrossInChunk = 10
objOffset.corrSurfaceOverSamplingFactor = 8 objOffset.corrSurfaceOverSamplingFactor = 8
objOffset.corrSurfaceZoomInWindow = 16 objOffset.corrSurfaceZoomInWindow = 16
objOffset.corrSufaceOverSamplingMethod = 1 objOffset.corrSufaceOverSamplingMethod = 1
objOffset.useMmap = 1
objOffset.mmapSize = 8 objOffset.mmapSize = 8
objOffset.setupParams() objOffset.setupParams()
@ -48,8 +49,8 @@ def main():
objOffset.setConstantGrossOffset(642, -30) objOffset.setConstantGrossOffset(642, -30)
objOffset.checkPixelInImageRange() objOffset.checkPixelInImageRange()
objOffset.runAmpcor() objOffset.runAmpcor()
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -1,27 +1,27 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# #
from PyCuAmpcor import PyCuAmpcor from PyCuAmpcor import PyCuAmpcor
import numpy as np import numpy as np
def main(): def main():
''' '''
Set parameters manually and run ampcor Set parameters manually and run ampcor
''' '''
objOffset = PyCuAmpcor() objOffset = PyCuAmpcor()
#step 1 set constant parameters #step 1 set constant parameters
objOffset.masterImageName = "master.slc" objOffset.masterImageName = "master.slc.vrt"
objOffset.masterImageHeight = 128 objOffset.masterImageHeight = 128
objOffset.masterImageWidth = 128 objOffset.masterImageWidth = 128
objOffset.slaveImageName = "slave.slc" objOffset.slaveImageName = "slave.slc.vrt"
objOffset.masterImageHeight = 128 objOffset.masterImageHeight = 128
objOffset.masterImageWidth = 128 objOffset.masterImageWidth = 128
objOffset.skipSampleDown = 2 objOffset.skipSampleDown = 2
objOffset.skipSampleAcross = 2 objOffset.skipSampleAcross = 2
objOffset.windowSizeHeight = 16 objOffset.windowSizeHeight = 16
objOffset.windowSizeWidth = 16 objOffset.windowSizeWidth = 16
objOffset.halfSearchRangeDown = 20 objOffset.halfSearchRangeDown = 20
objOffset.halfSearchRangeAcross = 20 objOffset.halfSearchRangeAcross = 20
objOffset.numberWindowDown = 2 objOffset.numberWindowDown = 2
objOffset.numberWindowAcross = 2 objOffset.numberWindowAcross = 2
@ -29,19 +29,19 @@ def main():
objOffset.numberWindowAcrossInChunk = 2 objOffset.numberWindowAcrossInChunk = 2
# 2 set other dependent parameters and allocate aray parameters # 2 set other dependent parameters and allocate aray parameters
objOffset.setupParams() objOffset.setupParams()
#3 set gross offsets: constant or varying #3 set gross offsets: constant or varying
objOffset.masterStartPixelDownStatic = objOffset.halfSearchRangeDown objOffset.masterStartPixelDownStatic = objOffset.halfSearchRangeDown
objOffset.masterStartPixelAcrossStatic = objOffset.halfSearchRangeAcross objOffset.masterStartPixelAcrossStatic = objOffset.halfSearchRangeAcross
vD = np.random.randint(0, 10, size =objOffset.numberWindows, dtype=np.int32) vD = np.random.randint(0, 10, size =objOffset.numberWindows, dtype=np.int32)
vA = np.random.randint(0, 1, size = objOffset.numberWindows, dtype=np.int32) vA = np.random.randint(0, 1, size = objOffset.numberWindows, dtype=np.int32)
objOffset.setVaryingGrossOffset(vD, vA) objOffset.setVaryingGrossOffset(vD, vA)
objOffset.checkPixelInImageRange() objOffset.checkPixelInImageRange()
#4 run ampcor #4 run ampcor
objOffset.runAmpcor() objOffset.runAmpcor()
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -0,0 +1,154 @@
#include "GDALImage.h"
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <cublas_v2.h>
#include "cudaError.h"
#include <errno.h>
#include <unistd.h>
/**
* \brief Constructor
*
* @param filename a std::string with the raster image file name
*/
GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useMmap)
: _useMmap(useMmap)
{
// open the file as dataset
_poDataset = (GDALDataset *) GDALOpen(filename.c_str(), GA_ReadOnly );
// if something is wrong, throw an exception
// GDAL reports the error message
if(!_poDataset)
throw;
// check the band info
int count = _poDataset->GetRasterCount();
if(band > count)
{
std::cout << "The desired band " << band << " is greated than " << count << " bands available";
throw;
}
// get the desired band
_poBand = _poDataset->GetRasterBand(band);
if(!_poBand)
throw;
// get the width(x), and height(y)
_width = _poBand->GetXSize();
_height = _poBand->GetYSize();
_dataType = _poBand->GetRasterDataType();
// determine the image type
_isComplex = GDALDataTypeIsComplex(_dataType);
// determine the pixel size in bytes
_pixelSize = GDALGetDataTypeSize(_dataType);
_bufferSize = 1024*1024*cacheSizeInGB;
// checking whether using memory map
if(_useMmap) {
char **papszOptions = NULL;
// if cacheSizeInGB = 0, use default
// else set the option
if(cacheSizeInGB > 0)
papszOptions = CSLSetNameValue( papszOptions,
"CACHE_SIZE",
std::to_string(_bufferSize).c_str());
// space between two lines
GIntBig pnLineSpace;
// set up the virtual mem buffer
_poBandVirtualMem = GDALGetVirtualMemAuto(
static_cast<GDALRasterBandH>(_poBand),
GF_Read,
&_pixelSize,
&pnLineSpace,
papszOptions);
// check it
if(!_poBandVirtualMem)
throw;
// get the starting pointer
_memPtr = CPLVirtualMemGetAddr(_poBandVirtualMem);
}
else { // use a buffer
checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
}
// make sure memPtr is not Null
if (!_memPtr)
throw;
// all done
}
/// load a tile of data h_tile x w_tile from CPU (mmap) to GPU
/// @param dArray pointer for array in device memory
/// @param h_offset Down/Height offset
/// @param w_offset Across/Width offset
/// @param h_tile Down/Height tile size
/// @param w_tile Across/Width tile size
/// @param stream CUDA stream for copying
void GDALImage::loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream)
{
size_t tileStartOffset = (h_offset*_width + w_offset)*_pixelSize;
char * startPtr = (char *)_memPtr ;
startPtr += tileStartOffset;
// @note
// We assume down/across directions as rows/cols. Therefore, SLC mmap and device array are both row major.
// cuBlas assumes both source and target arrays are column major.
// To use cublasSetMatrix, we need to switch w_tile/h_tile for rows/cols
// checkCudaErrors(cublasSetMatrixAsync(w_tile, h_tile, sizeof(float2), startPtr, width, dArray, w_tile, stream));
if (_useMmap)
checkCudaErrors(cudaMemcpy2DAsync(dArray, w_tile*_pixelSize, startPtr, _width*_pixelSize,
w_tile*_pixelSize, h_tile, cudaMemcpyHostToDevice,stream));
else {
// get the total tile size in bytes
size_t tileSize = h_tile*w_tile*_pixelSize;
// if the size is bigger than existing buffer, reallocate
if (tileSize > _bufferSize) {
// maybe we need to make it to fit the pagesize
_bufferSize = tileSize;
checkCudaErrors(cudaFree(_memPtr));
checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
}
// copy from file to buffer
CPLErr err = _poBand->RasterIO(GF_Read, //eRWFlag
w_offset, h_offset, //nXOff, nYOff
w_tile, h_tile, // nXSize, nYSize
_memPtr, // pData
w_tile*h_tile, 1, // nBufXSize, nBufYSize
_dataType, //eBufType
0, 0, //nPixelSpace, nLineSpace in pData
NULL //psExtraArg extra resampling callback
);
if(err != CE_None)
throw;
// copy from buffer to gpu
checkCudaErrors(cudaMemcpyAsync(dArray, _memPtr, tileSize, cudaMemcpyHostToDevice, stream));
}
}
GDALImage::~GDALImage()
{
// free the virtual memory
CPLVirtualMemFree(_poBandVirtualMem),
// free the GDAL Dataset, close the file
delete _poDataset;
}
// end of file

View File

@ -0,0 +1,79 @@
// -*- c++ -*-
/**
* \brief Class for an image described GDAL vrt
*
* only complex (pixelOffset=8) or real(pixelOffset=4) images are supported, such as SLC and single-precision TIFF
*/
#ifndef __GDALIMAGE_H
#define __GDALIMAGE_H
#include <cublas_v2.h>
#include <string>
#include <gdal/gdal_priv.h>
#include <gdal/cpl_conv.h>
class GDALImage{
public:
using size_t = std::size_t;
private:
size_t _fileSize;
int _height;
int _width;
// buffer pointer
void * _memPtr = NULL;
int _pixelSize; //in bytes
int _isComplex;
size_t _bufferSize;
int _useMmap;
GDALDataType _dataType;
CPLVirtualMem * _poBandVirtualMem = NULL;
GDALDataset * _poDataset = NULL;
GDALRasterBand * _poBand = NULL;
public:
GDALImage() = delete;
GDALImage(std::string fn, int band=1, int cacheSizeInGB=0, int useMmap=1);
void * getmemPtr()
{
return(_memPtr);
}
size_t getFileSize()
{
return (_fileSize);
}
size_t getHeight() {
return (_height);
}
size_t getWidth()
{
return (_width);
}
int getPixelSize()
{
return _pixelSize;
}
bool isComplex()
{
return _isComplex;
}
void loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream);
~GDALImage();
};
#endif //__GDALIMAGE_H

View File

@ -3,23 +3,24 @@ PROJECT = CUAMPCOR
LDFLAGS = -lcuda -lcudart -lcufft -lcublas LDFLAGS = -lcuda -lcudart -lcufft -lcublas
CXXFLAGS = -std=c++11 -fpermissive -fPIC -shared CXXFLAGS = -std=c++11 -fpermissive -fPIC -shared
NVCCFLAGS = -ccbin g++ -m64 \ NVCCFLAGS = -ccbin g++ -m64 \
-gencode arch=compute_35,code=sm_35 \ -gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_60,code=sm_60 \
-Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \ -Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \
-ftz=false -prec-div=true -prec-sqrt=true -ftz=false -prec-div=true -prec-sqrt=true
CXX=g++ CXX=g++
NVCC=nvcc NVCC=nvcc
DEPS = cudaUtil.h cudaError.h cuArrays.h SlcImage.h cuAmpcorParameter.h DEPS = cudaUtil.h cudaError.h cuArrays.h GDALImage.h cuAmpcorParameter.h
OBJS = SlcImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \ OBJS = GDALImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \
cuSincOverSampler.o cuDeramp.o cuOffset.o \ cuSincOverSampler.o cuDeramp.o cuOffset.o \
cuCorrNormalization.o cuAmpcorParameter.o cuCorrTimeDomain.o cuCorrFrequency.o \ cuCorrNormalization.o cuAmpcorParameter.o cuCorrTimeDomain.o cuCorrFrequency.o \
cuAmpcorChunk.o cuAmpcorController.o cuEstimateStats.o cuAmpcorChunk.o cuAmpcorController.o cuEstimateStats.o
all: cuampcor all: pyampcor
SlcImage.o: SlcImage.cu $(DEPS) GDALImage.o: GDALImage.cu $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ SlcImage.cu $(NVCC) $(NVCCFLAGS) -c -o $@ GDALImage.cu
cuArrays.o: cuArrays.cu $(DEPS) cuArrays.o: cuArrays.cu $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ cuArrays.cu $(NVCC) $(NVCCFLAGS) -c -o $@ cuArrays.cu
@ -45,8 +46,8 @@ cuOffset.o: cuOffset.cu $(DEPS)
cuCorrNormalization.o: cuCorrNormalization.cu $(DEPS) cuCorrNormalization.o: cuCorrNormalization.cu $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrNormalization.cu $(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrNormalization.cu
cuAmpcorParameter.o: cuAmpcorParameter.cu cuAmpcorParameter.o: cuAmpcorParameter.cu
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorParameter.cu $(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorParameter.cu
cuCorrTimeDomain.o: cuCorrTimeDomain.cu $(DEPS) cuCorrTimeDomain.o: cuCorrTimeDomain.cu $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrTimeDomain.cu $(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrTimeDomain.cu
@ -54,8 +55,8 @@ cuCorrTimeDomain.o: cuCorrTimeDomain.cu $(DEPS)
cuCorrFrequency.o: cuCorrFrequency.cu $(DEPS) cuCorrFrequency.h cuCorrFrequency.o: cuCorrFrequency.cu $(DEPS) cuCorrFrequency.h
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrFrequency.cu $(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrFrequency.cu
cuAmpcorChunk.o: cuAmpcorChunk.cu cuAmpcorUtil.h $(DEPS) cuAmpcorChunk.o: cuAmpcorChunk.cu cuAmpcorUtil.h $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorChunk.cu $(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorChunk.cu
cuAmpcorController.o: cuAmpcorController.cu cuAmpcorController.o: cuAmpcorController.cu
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorController.cu $(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorController.cu
@ -64,8 +65,8 @@ cuEstimateStats.o: cuEstimateStats.cu
$(NVCC) $(NVCCFLAGS) -c -o $@ cuEstimateStats.cu $(NVCC) $(NVCCFLAGS) -c -o $@ cuEstimateStats.cu
cuampcor: $(OBJS) pyampcor: $(OBJS)
rm -f PyCuAmpcor.cpp && python3 setup.py build_ext --inplace rm -f PyCuAmpcor.cpp && python3 setup.py build_ext --inplace
clean: clean:
rm -rf *.o *so build *~ PyCuAmpcor.cpp ctest *.dat rm -rf *.o *so build *~ PyCuAmpcor.cpp ctest *.dat

View File

@ -1,6 +1,6 @@
# #
# PYX file to control Python module interface to underlying CUDA-Ampcor code # PYX file to control Python module interface to underlying CUDA-Ampcor code
# #
from libcpp.string cimport string from libcpp.string cimport string
import numpy as np import numpy as np
cimport numpy as np cimport numpy as np
@ -9,13 +9,13 @@ cimport numpy as np
cdef extern from "cudaUtil.h": cdef extern from "cudaUtil.h":
int gpuDeviceInit(int) int gpuDeviceInit(int)
void gpuDeviceList() void gpuDeviceList()
int gpuGetMaxGflopsDeviceId() int gpuGetMaxGflopsDeviceId()
def listGPU(): def listGPU():
gpuDeviceList() gpuDeviceList()
def findGPU(): def findGPU():
return gpuGetMaxGflopsDeviceId() return gpuGetMaxGflopsDeviceId()
def setGPU(int id): def setGPU(int id):
return gpuDeviceInit(id) return gpuDeviceInit(id)
@ -24,90 +24,92 @@ def setGPU(int id):
cdef extern from "cuAmpcorParameter.h": cdef extern from "cuAmpcorParameter.h":
cdef cppclass cuAmpcorParameter: cdef cppclass cuAmpcorParameter:
cuAmpcorParameter() except + cuAmpcorParameter() except +
int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain
int deviceID ## Targeted GPU device ID: use -1 to auto select int deviceID ## Targeted GPU device ID: use -1 to auto select
int nStreams ## Number of streams to asynchonize data transfers and compute kernels int nStreams ## Number of streams to asynchonize data transfers and compute kernels
int derampMethod ## Method for deramping 0=None, 1=average, 2=phase gradient int derampMethod ## Method for deramping 0=None, 1=average, 2=phase gradient
## chip or window size for raw data ## chip or window size for raw data
int windowSizeHeightRaw ## Template window height (original size) int windowSizeHeightRaw ## Template window height (original size)
int windowSizeWidthRaw ## Template window width (original size) int windowSizeWidthRaw ## Template window width (original size)
int searchWindowSizeHeightRaw ## Search window height (original size) int searchWindowSizeHeightRaw ## Search window height (original size)
int searchWindowSizeWidthRaw ## Search window width (orignal size) int searchWindowSizeWidthRaw ## Search window width (orignal size)
int halfSearchRangeDownRaw ##(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2 int halfSearchRangeDownRaw ##(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
int halfSearchRangeAcrossRaw ##(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2 int halfSearchRangeAcrossRaw ##(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
## chip or window size after oversampling ## chip or window size after oversampling
int rawDataOversamplingFactor ## Raw data overampling factor (from original size to oversampled size) int rawDataOversamplingFactor ## Raw data overampling factor (from original size to oversampled size)
## strides between chips/windows ## strides between chips/windows
int skipSampleDownRaw ## Skip size between neighboring windows in Down direction (original size) int skipSampleDownRaw ## Skip size between neighboring windows in Down direction (original size)
int skipSampleAcrossRaw ## Skip size between neighboring windows in across direction (original size) int skipSampleAcrossRaw ## Skip size between neighboring windows in across direction (original size)
## Zoom in region near location of max correlation ## Zoom in region near location of max correlation
int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions) int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions)
int oversamplingFactor ## Oversampling factor for interpolating correlation surface int oversamplingFactor ## Oversampling factor for interpolating correlation surface
int oversamplingMethod int oversamplingMethod
float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data
##master image ##master image
string masterImageName ## master SLC image name string masterImageName ## master SLC image name
int imageDataType1 ## master image data type, 2=cfloat=complex=float2 1=float int imageDataType1 ## master image data type, 2=cfloat=complex=float2 1=float
int masterImageHeight ## master image height int masterImageHeight ## master image height
int masterImageWidth ## master image width int masterImageWidth ## master image width
##slave image ##slave image
string slaveImageName ## slave SLC image name string slaveImageName ## slave SLC image name
int imageDataType2 ## slave image data type, 2=cfloat=complex=float2 1=float int imageDataType2 ## slave image data type, 2=cfloat=complex=float2 1=float
int slaveImageHeight ## slave image height int slaveImageHeight ## slave image height
int slaveImageWidth ## slave image width int slaveImageWidth ## slave image width
int mmapSizeInGB ## mmap buffer size in unit of Gigabytes int useMmap ## whether to use mmap
int mmapSizeInGB ## mmap buffer size in unit of Gigabytes (if not mmmap, the buffer size)
## total number of chips/windows ## total number of chips/windows
int numberWindowDown ## number of total windows (down) int numberWindowDown ## number of total windows (down)
int numberWindowAcross ## number of total windows (across) int numberWindowAcross ## number of total windows (across)
int numberWindows ## numberWindowDown*numberWindowAcross int numberWindows ## numberWindowDown*numberWindowAcross
## number of chips/windows in a batch/chunk ## number of chips/windows in a batch/chunk
int numberWindowDownInChunk ## number of windows processed in a chunk (down) int numberWindowDownInChunk ## number of windows processed in a chunk (down)
int numberWindowAcrossInChunk ## number of windows processed in a chunk (across) int numberWindowAcrossInChunk ## number of windows processed in a chunk (across)
int numberWindowsInChunk ## numberWindowDownInChunk*numberWindowAcrossInChunk int numberWindowsInChunk ## numberWindowDownInChunk*numberWindowAcrossInChunk
int numberChunkDown ## number of chunks (down) int numberChunkDown ## number of chunks (down)
int numberChunkAcross ## number of chunks (across) int numberChunkAcross ## number of chunks (across)
int numberChunks int numberChunks
int *masterStartPixelDown ## master starting pixels for each window (down) int *masterStartPixelDown ## master starting pixels for each window (down)
int *masterStartPixelAcross ## master starting pixels for each window (across) int *masterStartPixelAcross ## master starting pixels for each window (across)
int *slaveStartPixelDown ## slave starting pixels for each window (down) int *slaveStartPixelDown ## slave starting pixels for each window (down)
int *slaveStartPixelAcross ## slave starting pixels for each window (across) int *slaveStartPixelAcross ## slave starting pixels for each window (across)
int *grossOffsetDown ## Gross offsets between master and slave windows (down) : slaveStartPixel - masterStartPixel int *grossOffsetDown ## Gross offsets between master and slave windows (down) : slaveStartPixel - masterStartPixel
int *grossOffsetAcross ## Gross offsets between master and slave windows (across) int *grossOffsetAcross ## Gross offsets between master and slave windows (across)
int grossOffsetDown0 ## constant gross offset (down) int grossOffsetDown0 ## constant gross offset (down)
int grossOffsetAcross0 ## constant gross offset (across) int grossOffsetAcross0 ## constant gross offset (across)
int masterStartPixelDown0 ## the first pixel of master image (down), be adjusted with margins and gross offset int masterStartPixelDown0 ## the first pixel of master image (down), be adjusted with margins and gross offset
int masterStartPixelAcross0 ## the first pixel of master image (across) int masterStartPixelAcross0 ## the first pixel of master image (across)
int *masterChunkStartPixelDown ## array of starting pixels for all master chunks (down) int *masterChunkStartPixelDown ## array of starting pixels for all master chunks (down)
int *masterChunkStartPixelAcross ## array of starting pixels for all master chunks (across) int *masterChunkStartPixelAcross ## array of starting pixels for all master chunks (across)
int *slaveChunkStartPixelDown ## array of starting pixels for all slave chunks (down) int *slaveChunkStartPixelDown ## array of starting pixels for all slave chunks (down)
int *slaveChunkStartPixelAcross ## array of starting pixels for all slave chunks (across) int *slaveChunkStartPixelAcross ## array of starting pixels for all slave chunks (across)
int *masterChunkHeight ## array of heights of all master chunks, required when loading chunk to GPU int *masterChunkHeight ## array of heights of all master chunks, required when loading chunk to GPU
int *masterChunkWidth ## array of width of all master chunks int *masterChunkWidth ## array of width of all master chunks
int *slaveChunkHeight ## array of width of all master chunks int *slaveChunkHeight ## array of width of all master chunks
int *slaveChunkWidth ## array of width of all slave chunks int *slaveChunkWidth ## array of width of all slave chunks
int maxMasterChunkHeight ## max height for all master/slave chunks, determine the size of reading cache in GPU int maxMasterChunkHeight ## max height for all master/slave chunks, determine the size of reading cache in GPU
int maxMasterChunkWidth ## max width for all master chunks, determine the size of reading cache in GPU int maxMasterChunkWidth ## max width for all master chunks, determine the size of reading cache in GPU
int maxSlaveChunkHeight int maxSlaveChunkHeight
int maxSlaveChunkWidth int maxSlaveChunkWidth
string grossOffsetImageName string grossOffsetImageName
string offsetImageName ## Output Offset fields filename string offsetImageName ## Output Offset fields filename
string snrImageName ## Output SNR filename string snrImageName ## Output SNR filename
void setStartPixels(int*, int*, int*, int*) string covImageName ## Output COV filename
void setStartPixels(int, int, int*, int*) void setStartPixels(int*, int*, int*, int*)
void setStartPixels(int, int, int, int) void setStartPixels(int, int, int*, int*)
void checkPixelInImageRange() ## check whether void setStartPixels(int, int, int, int)
void checkPixelInImageRange() ## check whether
void setupParameters() ## Process other parameters after Python Inpu void setupParameters() ## Process other parameters after Python Inpu
cdef extern from "cuAmpcorController.h": cdef extern from "cuAmpcorController.h":
@ -115,34 +117,40 @@ cdef extern from "cuAmpcorController.h":
cuAmpcorController() except + cuAmpcorController() except +
cuAmpcorParameter *param cuAmpcorParameter *param
void runAmpcor() void runAmpcor()
cdef class PyCuAmpcor(object): cdef class PyCuAmpcor(object):
''' '''
Python interface for cuda Ampcor Python interface for cuda Ampcor
''' '''
cdef cuAmpcorController c_cuAmpcor cdef cuAmpcorController c_cuAmpcor
def __cinit__(self): def __cinit__(self):
return return
@property @property
def algorithm(self): def algorithm(self):
return self.c_cuAmpcor.param.algorithm return self.c_cuAmpcor.param.algorithm
@algorithm.setter @algorithm.setter
def algorithm(self, int a): def algorithm(self, int a):
self.c_cuAmpcor.param.algorithm = a self.c_cuAmpcor.param.algorithm = a
@property @property
def deviceID(self): def deviceID(self):
return self.c_cuAmpcor.param.deviceID return self.c_cuAmpcor.param.deviceID
@deviceID.setter @deviceID.setter
def deviceID(self, int a): def deviceID(self, int a):
self.c_cuAmpcor.param.deviceID = a self.c_cuAmpcor.param.deviceID = a
@property @property
def nStreams(self): def nStreams(self):
return self.c_cuAmpcor.param.nStreams return self.c_cuAmpcor.param.nStreams
@nStreams.setter @nStreams.setter
def nStreams(self, int a): def nStreams(self, int a):
self.c_cuAmpcor.param.nStreams = a self.c_cuAmpcor.param.nStreams = a
@property @property
def useMmap(self):
return self.c_cuAmpcor.param.useMmap
@useMmap.setter
def useMmap(self, int a):
self.c_cuAmpcor.param.useMmap = a
@property
def mmapSize(self): def mmapSize(self):
return self.c_cuAmpcor.param.mmapSizeInGB return self.c_cuAmpcor.param.mmapSizeInGB
@mmapSize.setter @mmapSize.setter
@ -150,19 +158,19 @@ cdef class PyCuAmpcor(object):
self.c_cuAmpcor.param.mmapSizeInGB = a self.c_cuAmpcor.param.mmapSizeInGB = a
@property @property
def derampMethod(self): def derampMethod(self):
return self.c_cuAmpcor.param.derampMethod return self.c_cuAmpcor.param.derampMethod
@derampMethod.setter @derampMethod.setter
def derampMethod(self, int a): def derampMethod(self, int a):
self.c_cuAmpcor.param.derampMethod = a self.c_cuAmpcor.param.derampMethod = a
@property @property
def windowSizeHeight(self): def windowSizeHeight(self):
return self.c_cuAmpcor.param.windowSizeHeightRaw return self.c_cuAmpcor.param.windowSizeHeightRaw
@windowSizeHeight.setter @windowSizeHeight.setter
def windowSizeHeight(self, int a): def windowSizeHeight(self, int a):
self.c_cuAmpcor.param.windowSizeHeightRaw = a self.c_cuAmpcor.param.windowSizeHeightRaw = a
@property @property
def windowSizeWidth(self): def windowSizeWidth(self):
return self.c_cuAmpcor.param.windowSizeWidthRaw return self.c_cuAmpcor.param.windowSizeWidthRaw
@windowSizeWidth.setter @windowSizeWidth.setter
def windowSizeWidth(self, int a): def windowSizeWidth(self, int a):
self.c_cuAmpcor.param.windowSizeWidthRaw = a self.c_cuAmpcor.param.windowSizeWidthRaw = a
@ -200,7 +208,7 @@ cdef class PyCuAmpcor(object):
@skipSampleAcross.setter @skipSampleAcross.setter
def skipSampleAcross(self, int a): def skipSampleAcross(self, int a):
self.c_cuAmpcor.param.skipSampleAcrossRaw = a self.c_cuAmpcor.param.skipSampleAcrossRaw = a
@property @property
def rawDataOversamplingFactor(self): def rawDataOversamplingFactor(self):
"""anti-aliasing oversampling factor""" """anti-aliasing oversampling factor"""
@ -229,7 +237,7 @@ cdef class PyCuAmpcor(object):
@corrSufaceOverSamplingMethod.setter @corrSufaceOverSamplingMethod.setter
def corrSufaceOverSamplingMethod(self, int a): def corrSufaceOverSamplingMethod(self, int a):
self.c_cuAmpcor.param.oversamplingMethod = a self.c_cuAmpcor.param.oversamplingMethod = a
@property @property
def masterImageName(self): def masterImageName(self):
return self.c_cuAmpcor.param.masterImageName return self.c_cuAmpcor.param.masterImageName
@masterImageName.setter @masterImageName.setter
@ -241,12 +249,12 @@ cdef class PyCuAmpcor(object):
@slaveImageName.setter @slaveImageName.setter
def slaveImageName(self, str a): def slaveImageName(self, str a):
self.c_cuAmpcor.param.slaveImageName = <string> a.encode() self.c_cuAmpcor.param.slaveImageName = <string> a.encode()
@property @property
def masterImageName(self): def masterImageName(self):
return self.c_cuAmpcor.param.masterImageName return self.c_cuAmpcor.param.masterImageName
@masterImageName.setter @masterImageName.setter
def masterImageName(self, str a): def masterImageName(self, str a):
self.c_cuAmpcor.param.masterImageName = <string> a.encode() self.c_cuAmpcor.param.masterImageName = <string> a.encode()
@property @property
def masterImageHeight(self): def masterImageHeight(self):
return self.c_cuAmpcor.param.masterImageHeight return self.c_cuAmpcor.param.masterImageHeight
@ -258,7 +266,7 @@ cdef class PyCuAmpcor(object):
return self.c_cuAmpcor.param.masterImageWidth return self.c_cuAmpcor.param.masterImageWidth
@masterImageWidth.setter @masterImageWidth.setter
def masterImageWidth(self, int a): def masterImageWidth(self, int a):
self.c_cuAmpcor.param.masterImageWidth=a self.c_cuAmpcor.param.masterImageWidth=a
@property @property
def slaveImageHeight(self): def slaveImageHeight(self):
return self.c_cuAmpcor.param.slaveImageHeight return self.c_cuAmpcor.param.slaveImageHeight
@ -270,8 +278,8 @@ cdef class PyCuAmpcor(object):
return self.c_cuAmpcor.param.slaveImageWidth return self.c_cuAmpcor.param.slaveImageWidth
@slaveImageWidth.setter @slaveImageWidth.setter
def slaveImageWidth(self, int a): def slaveImageWidth(self, int a):
self.c_cuAmpcor.param.slaveImageWidth=a self.c_cuAmpcor.param.slaveImageWidth=a
@property @property
def numberWindowDown(self): def numberWindowDown(self):
return self.c_cuAmpcor.param.numberWindowDown return self.c_cuAmpcor.param.numberWindowDown
@ -283,11 +291,11 @@ cdef class PyCuAmpcor(object):
return self.c_cuAmpcor.param.numberWindowAcross return self.c_cuAmpcor.param.numberWindowAcross
@numberWindowAcross.setter @numberWindowAcross.setter
def numberWindowAcross(self, int a): def numberWindowAcross(self, int a):
self.c_cuAmpcor.param.numberWindowAcross = a self.c_cuAmpcor.param.numberWindowAcross = a
@property @property
def numberWindows(self): def numberWindows(self):
return self.c_cuAmpcor.param.numberWindows return self.c_cuAmpcor.param.numberWindows
@property @property
def numberWindowDownInChunk(self): def numberWindowDownInChunk(self):
return self.c_cuAmpcor.param.numberWindowDownInChunk return self.c_cuAmpcor.param.numberWindowDownInChunk
@ -299,7 +307,7 @@ cdef class PyCuAmpcor(object):
return self.c_cuAmpcor.param.numberWindowAcrossInChunk return self.c_cuAmpcor.param.numberWindowAcrossInChunk
@numberWindowAcrossInChunk.setter @numberWindowAcrossInChunk.setter
def numberWindowAcrossInChunk(self, int a): def numberWindowAcrossInChunk(self, int a):
self.c_cuAmpcor.param.numberWindowAcrossInChunk = a self.c_cuAmpcor.param.numberWindowAcrossInChunk = a
@property @property
def numberChunkDown(self): def numberChunkDown(self):
return self.c_cuAmpcor.param.numberChunkDown return self.c_cuAmpcor.param.numberChunkDown
@ -309,9 +317,9 @@ cdef class PyCuAmpcor(object):
@property @property
def numberChunks(self): def numberChunks(self):
return self.c_cuAmpcor.param.numberChunks return self.c_cuAmpcor.param.numberChunks
## gross offets ## gross offets
@property @property
def grossOffsetImageName(self): def grossOffsetImageName(self):
return self.c_cuAmpcor.param.grossOffsetImageName return self.c_cuAmpcor.param.grossOffsetImageName
@ -324,13 +332,21 @@ cdef class PyCuAmpcor(object):
@offsetImageName.setter @offsetImageName.setter
def offsetImageName(self, str a): def offsetImageName(self, str a):
self.c_cuAmpcor.param.offsetImageName = <string> a.encode() self.c_cuAmpcor.param.offsetImageName = <string> a.encode()
@property @property
def snrImageName(self): def snrImageName(self):
return self.c_cuAmpcor.param.snrImageName return self.c_cuAmpcor.param.snrImageName
@snrImageName.setter @snrImageName.setter
def snrImageName(self, str a): def snrImageName(self, str a):
self.c_cuAmpcor.param.snrImageName = <string> a.encode() self.c_cuAmpcor.param.snrImageName = <string> a.encode()
@property
def covImageName(self):
return self.c_cuAmpcor.param.covImageName
@covImageName.setter
def covImageName(self, str a):
self.c_cuAmpcor.param.covImageName = <string> a.encode()
@property @property
def masterStartPixelDownStatic(self): def masterStartPixelDownStatic(self):
return self.c_cuAmpcor.param.masterStartPixelDown0 return self.c_cuAmpcor.param.masterStartPixelDown0
@ -342,20 +358,20 @@ cdef class PyCuAmpcor(object):
return self.c_cuAmpcor.param.masterStartPixelAcross0 return self.c_cuAmpcor.param.masterStartPixelAcross0
@masterStartPixelAcrossStatic.setter @masterStartPixelAcrossStatic.setter
def masterStartPixelAcrossStatic(self, int a): def masterStartPixelAcrossStatic(self, int a):
self.c_cuAmpcor.param.masterStartPixelAcross0 = a self.c_cuAmpcor.param.masterStartPixelAcross0 = a
@property @property
def grossOffsetDownStatic(self): def grossOffsetDownStatic(self):
return self.c_cuAmpcor.param.grossOffsetDown0 return self.c_cuAmpcor.param.grossOffsetDown0
@grossOffsetDownStatic.setter @grossOffsetDownStatic.setter
def grossOffsetDownStatic(self, int a): def grossOffsetDownStatic(self, int a):
self.c_cuAmpcor.param.grossOffsetDown0 =a self.c_cuAmpcor.param.grossOffsetDown0 =a
@property @property
def grossOffsetAcrossStatic(self): def grossOffsetAcrossStatic(self):
return self.c_cuAmpcor.param.grossOffsetAcross0 return self.c_cuAmpcor.param.grossOffsetAcross0
@grossOffsetAcrossStatic.setter @grossOffsetAcrossStatic.setter
def grossOffsetAcrossStatic(self, int a): def grossOffsetAcrossStatic(self, int a):
self.c_cuAmpcor.param.grossOffsetAcross0 =a self.c_cuAmpcor.param.grossOffsetAcross0 =a
@property @property
def grossOffsetDownDynamic(self): def grossOffsetDownDynamic(self):
cdef int *c_data cdef int *c_data
@ -366,12 +382,12 @@ cdef class PyCuAmpcor(object):
return p_data return p_data
@grossOffsetDownDynamic.setter @grossOffsetDownDynamic.setter
def grossOffsetDownDynamic (self, np.ndarray[np.int32_t,ndim=1,mode="c"] pa): def grossOffsetDownDynamic (self, np.ndarray[np.int32_t,ndim=1,mode="c"] pa):
cdef int *c_data cdef int *c_data
cdef int *p_data cdef int *p_data
c_data = self.c_cuAmpcor.param.grossOffsetDown c_data = self.c_cuAmpcor.param.grossOffsetDown
p_data = <int *> pa.data p_data = <int *> pa.data
for i in range (self.numberWindows): for i in range (self.numberWindows):
c_data[i] = p_data[i] c_data[i] = p_data[i]
@property @property
def grossOffsetAcrossDynamic(self): def grossOffsetAcrossDynamic(self):
cdef int *c_data cdef int *c_data
@ -382,23 +398,23 @@ cdef class PyCuAmpcor(object):
return p_data return p_data
@grossOffsetAcrossDynamic.setter @grossOffsetAcrossDynamic.setter
def grossOffsetAcrossDynamic (self, np.ndarray[np.int32_t,ndim=1,mode="c"] pa): def grossOffsetAcrossDynamic (self, np.ndarray[np.int32_t,ndim=1,mode="c"] pa):
cdef int *c_data cdef int *c_data
cdef int *p_data cdef int *p_data
c_data = self.c_cuAmpcor.param.grossOffsetAcross c_data = self.c_cuAmpcor.param.grossOffsetAcross
p_data = <int *> pa.data p_data = <int *> pa.data
for i in range (self.numberWindows): for i in range (self.numberWindows):
c_data[i] = p_data[i] c_data[i] = p_data[i]
return return
def setConstantGrossOffset(self, int goDown, int goAcross): def setConstantGrossOffset(self, int goDown, int goAcross):
""" """
constant gross offsets constant gross offsets
param goDown gross offset in azimuth direction param goDown gross offset in azimuth direction
param goAcross gross offset in range direction param goAcross gross offset in range direction
""" """
self.c_cuAmpcor.param.setStartPixels(<int>self.masterStartPixelDownStatic, <int>self.masterStartPixelAcrossStatic, goDown, goAcross) self.c_cuAmpcor.param.setStartPixels(<int>self.masterStartPixelDownStatic, <int>self.masterStartPixelAcrossStatic, goDown, goAcross)
def setVaryingGrossOffset(self, np.ndarray[np.int32_t,ndim=1,mode="c"] vD, np.ndarray[np.int32_t,ndim=1,mode="c"] vA): def setVaryingGrossOffset(self, np.ndarray[np.int32_t,ndim=1,mode="c"] vD, np.ndarray[np.int32_t,ndim=1,mode="c"] vA):
""" """
varying gross offsets for each window varying gross offsets for each window
@ -411,21 +427,21 @@ cdef class PyCuAmpcor(object):
def checkPixelInImageRange(self): def checkPixelInImageRange(self):
""" check whether each window is with image range """ """ check whether each window is with image range """
self.c_cuAmpcor.param.checkPixelInImageRange() self.c_cuAmpcor.param.checkPixelInImageRange()
def setupParams(self): def setupParams(self):
""" """
set up constant parameters and allocate array parameters (offsets) set up constant parameters and allocate array parameters (offsets)
should be called after number of windows is set and before setting varying gross offsets should be called after number of windows is set and before setting varying gross offsets
""" """
self.c_cuAmpcor.param.setupParameters() self.c_cuAmpcor.param.setupParameters()
def runAmpcor(self): def runAmpcor(self):
""" main procedure to run ampcor """ """ main procedure to run ampcor """
self.c_cuAmpcor.runAmpcor() self.c_cuAmpcor.runAmpcor()

View File

@ -6,7 +6,7 @@ package = envPyCuAmpcor['PACKAGE']
project = envPyCuAmpcor['PROJECT'] project = envPyCuAmpcor['PROJECT']
build = envPyCuAmpcor['PRJ_LIB_DIR'] build = envPyCuAmpcor['PRJ_LIB_DIR']
install = envPyCuAmpcor['PRJ_SCONS_INSTALL'] + '/' + package + '/' + project install = envPyCuAmpcor['PRJ_SCONS_INSTALL'] + '/' + package + '/' + project
listFiles = ['SlcImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu', listFiles = ['GDALImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu',
'cuArraysPadding.cu', 'cuOverSampler.cu', 'cuArraysPadding.cu', 'cuOverSampler.cu',
'cuSincOverSampler.cu', 'cuDeramp.cu', 'cuSincOverSampler.cu', 'cuDeramp.cu',
'cuOffset.cu', 'cuCorrNormalization.cu', 'cuOffset.cu', 'cuCorrNormalization.cu',

View File

@ -2,58 +2,74 @@
#include "cuAmpcorUtil.h" #include "cuAmpcorUtil.h"
/** /**
* Run ampcor process for a batch of images (a chunk) * Run ampcor process for a batch of images (a chunk)
* @param[in] idxDown_ index oIDIVUP(i,j) ((i+j-1)/j)f the chunk along Down/Azimuth direction * @param[in] idxDown_ index oIDIVUP(i,j) ((i+j-1)/j)f the chunk along Down/Azimuth direction
* @param[in] idxAcross_ index of the chunk along Across/Range direction * @param[in] idxAcross_ index of the chunk along Across/Range direction
*/ */
void cuAmpcorChunk::run(int idxDown_, int idxAcross_) void cuAmpcorChunk::run(int idxDown_, int idxAcross_)
{ {
// set chunk index // set chunk index
setIndex(idxDown_, idxAcross_); setIndex(idxDown_, idxAcross_);
// load master image chunk // load master image chunk
loadMasterChunk(); loadMasterChunk();
//std::cout << "load master chunk ok\n"; //std::cout << "load master chunk ok\n";
cuArraysAbs(c_masterBatchRaw, r_masterBatchRaw, stream); cuArraysAbs(c_masterBatchRaw, r_masterBatchRaw, stream);
cuArraysSubtractMean(r_masterBatchRaw, stream); cuArraysSubtractMean(r_masterBatchRaw, stream);
// load slave image chunk // load slave image chunk
loadSlaveChunk(); loadSlaveChunk();
cuArraysAbs(c_slaveBatchRaw, r_slaveBatchRaw, stream); cuArraysAbs(c_slaveBatchRaw, r_slaveBatchRaw, stream);
//std::cout << "load slave chunk ok\n"; //std::cout << "load slave chunk ok\n";
//cross correlation for none-oversampled data //cross correlation for none-oversampled data
if(param->algorithm == 0) { if(param->algorithm == 0) {
cuCorrFreqDomain->execute(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw); cuCorrFreqDomain->execute(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw);
} }
else { else {
cuCorrTimeDomain(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw, stream); //time domain cross correlation cuCorrTimeDomain(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw, stream); //time domain cross correlation
} }
cuCorrNormalize(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw, stream); cuCorrNormalize(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw, stream);
//find the maximum location of none-oversampled correlation
cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, stream);
// Estimate SNR (Minyan Zhong)
//std::cout<< "flag stats 1" <<std::endl; // find the maximum location of none-oversampled correlation
//cuArraysCopyExtractCorr(r_corrBatchRaw, r_corrBatchZoomIn, i_corrBatchZoomInValid, offsetInit, stream); // 41 x 41, if halfsearchrange=20
//cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, stream);
cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, r_maxval, stream);
//std::cout<< "flag stats 2" <<std::endl; offsetInit->outputToFile("offsetInit1", stream);
//cuArraysSumCorr(r_corrBatchZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount, stream);
//std::cout<< "flag stats 3" <<std::endl; // Estimation of statistics
//cuEstimateSnr(r_corrBatchSum, i_corrBatchValidCount, r_maxval, r_snrValue, stream); // Author: Minyan Zhong
// Extraction of correlation surface around the peak
cuArraysCopyExtractCorr(r_corrBatchRaw, r_corrBatchRawZoomIn, i_corrBatchZoomInValid, offsetInit, stream);
// cudaDeviceSynchronize();
// debug: output the intermediate results
r_maxval->outputToFile("r_maxval",stream);
r_corrBatchRaw->outputToFile("r_corrBatchRaw",stream);
r_corrBatchRawZoomIn->outputToFile("r_corrBatchRawZoomIn",stream);
i_corrBatchZoomInValid->outputToFile("i_corrBatchZoomInValid",stream);
// Summation of correlation and data point values
cuArraysSumCorr(r_corrBatchRawZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount, stream);
// SNR
cuEstimateSnr(r_corrBatchSum, i_corrBatchValidCount, r_maxval, r_snrValue, stream);
// Variance
// cuEstimateVariance(r_corrBatchRaw, offsetInit, r_maxval, r_covValue, stream);
// Using the approximate estimation to adjust slave image (half search window size becomes only 4 pixels)
//offsetInit->debuginfo(stream); //offsetInit->debuginfo(stream);
// determine the starting pixel to extract slave images around the max location // determine the starting pixel to extract slave images around the max location
cuDetermineSlaveExtractOffset(offsetInit, cuDetermineSlaveExtractOffset(offsetInit,
param->halfSearchRangeDownRaw, // old range param->halfSearchRangeDownRaw, // old range
param->halfSearchRangeAcrossRaw, param->halfSearchRangeAcrossRaw,
param->halfZoomWindowSizeRaw, // new range param->halfZoomWindowSizeRaw, // new range
param->halfZoomWindowSizeRaw, param->halfZoomWindowSizeRaw,
stream); stream);
@ -63,58 +79,67 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_)
masterBatchOverSampler->execute(c_masterBatchRaw, c_masterBatchOverSampled, param->derampMethod); masterBatchOverSampler->execute(c_masterBatchRaw, c_masterBatchOverSampled, param->derampMethod);
cuArraysAbs(c_masterBatchOverSampled, r_masterBatchOverSampled, stream); cuArraysAbs(c_masterBatchOverSampled, r_masterBatchOverSampled, stream);
cuArraysSubtractMean(r_masterBatchOverSampled, stream); cuArraysSubtractMean(r_masterBatchOverSampled, stream);
// extract slave and oversample // extract slave and oversample
cuArraysCopyExtract(c_slaveBatchRaw, c_slaveBatchZoomIn, offsetInit, stream); cuArraysCopyExtract(c_slaveBatchRaw, c_slaveBatchZoomIn, offsetInit, stream);
slaveBatchOverSampler->execute(c_slaveBatchZoomIn, c_slaveBatchOverSampled, param->derampMethod); slaveBatchOverSampler->execute(c_slaveBatchZoomIn, c_slaveBatchOverSampled, param->derampMethod);
cuArraysAbs(c_slaveBatchOverSampled, r_slaveBatchOverSampled, stream); cuArraysAbs(c_slaveBatchOverSampled, r_slaveBatchOverSampled, stream);
// correlate oversampled images // correlate oversampled images
if(param->algorithm == 0) { if(param->algorithm == 0) {
cuCorrFreqDomain_OverSampled->execute(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn); cuCorrFreqDomain_OverSampled->execute(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn);
} }
else { else {
cuCorrTimeDomain(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream); cuCorrTimeDomain(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream);
} }
cuCorrNormalize(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream); cuCorrNormalize(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream);
//std::cout << "debug correlation oversample\n"; //std::cout << "debug correlation oversample\n";
//std::cout << r_masterBatchOverSampled->height << " " << r_masterBatchOverSampled->width << "\n"; //std::cout << r_masterBatchOverSampled->height << " " << r_masterBatchOverSampled->width << "\n";
//std::cout << r_slaveBatchOverSampled->height << " " << r_slaveBatchOverSampled->width << "\n"; //std::cout << r_slaveBatchOverSampled->height << " " << r_slaveBatchOverSampled->width << "\n";
//std::cout << r_corrBatchZoomIn->height << " " << r_corrBatchZoomIn->width << "\n"; //std::cout << r_corrBatchZoomIn->height << " " << r_corrBatchZoomIn->width << "\n";
// oversample the correlation surface // oversample the correlation surface
cuArraysCopyExtract(r_corrBatchZoomIn, r_corrBatchZoomInAdjust, make_int2(0,0), stream); cuArraysCopyExtract(r_corrBatchZoomIn, r_corrBatchZoomInAdjust, make_int2(0,0), stream);
//std::cout << "debug oversampling " << r_corrBatchZoomInAdjust << " " << r_corrBatchZoomInOverSampled << "\n"; //std::cout << "debug oversampling " << r_corrBatchZoomInAdjust << " " << r_corrBatchZoomInOverSampled << "\n";
if(param->oversamplingMethod) { if(param->oversamplingMethod) {
corrSincOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled); corrSincOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
} }
else { else {
corrOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled); corrOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
} }
//find the max again //find the max again
cuArraysMaxloc2D(r_corrBatchZoomInOverSampled, offsetZoomIn, corrMaxValue, stream); cuArraysMaxloc2D(r_corrBatchZoomInOverSampled, offsetZoomIn, corrMaxValue, stream);
// determine the final offset from non-oversampled (pixel) and oversampled (sub-pixel) // determine the final offset from non-oversampled (pixel) and oversampled (sub-pixel)
cuSubPixelOffset(offsetInit, offsetZoomIn, offsetFinal, cuSubPixelOffset(offsetInit, offsetZoomIn, offsetFinal,
param->oversamplingFactor, param->rawDataOversamplingFactor, param->oversamplingFactor, param->rawDataOversamplingFactor,
param->halfSearchRangeDownRaw, param->halfSearchRangeAcrossRaw, param->halfSearchRangeDownRaw, param->halfSearchRangeAcrossRaw,
param->halfZoomWindowSizeRaw, param->halfZoomWindowSizeRaw, param->halfZoomWindowSizeRaw, param->halfZoomWindowSizeRaw,
stream); stream);
//offsetInit->debuginfo(stream); //offsetInit->debuginfo(stream);
//offsetZoomIn->debuginfo(stream); //offsetZoomIn->debuginfo(stream);
//offsetFinal->debuginfo(stream); //offsetFinal->debuginfo(stream);
// Do insertion.
// Offsetfields.
cuArraysCopyInsert(offsetFinal, offsetImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); cuArraysCopyInsert(offsetFinal, offsetImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
// Minyan Zhong // Debugging matrix.
//cuArraysCopyInsert(corrMaxValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); cuArraysCopyInsert(r_corrBatchSum, floatImage1, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
//cuArraysCopyInsert(r_snrValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream); cuArraysCopyInsert(i_corrBatchValidCount, intImage1, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
// Old: save max correlation coefficients.
//cuArraysCopyInsert(corrMaxValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
// New: save SNR
cuArraysCopyInsert(r_snrValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
// Variance.
cuArraysCopyInsert(r_covValue, covImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
} }
void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_) void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
@ -122,14 +147,14 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
idxChunkDown = idxDown_; idxChunkDown = idxDown_;
idxChunkAcross = idxAcross_; idxChunkAcross = idxAcross_;
idxChunk = idxChunkAcross + idxChunkDown*param->numberChunkAcross; idxChunk = idxChunkAcross + idxChunkDown*param->numberChunkAcross;
if(idxChunkDown == param->numberChunkDown -1) { if(idxChunkDown == param->numberChunkDown -1) {
nWindowsDown = param->numberWindowDown - param->numberWindowDownInChunk*(param->numberChunkDown -1); nWindowsDown = param->numberWindowDown - param->numberWindowDownInChunk*(param->numberChunkDown -1);
} }
else { else {
nWindowsDown = param->numberWindowDownInChunk; nWindowsDown = param->numberWindowDownInChunk;
} }
if(idxChunkAcross == param->numberChunkAcross -1) { if(idxChunkAcross == param->numberChunkAcross -1) {
nWindowsAcross = param->numberWindowAcross - param->numberWindowAcrossInChunk*(param->numberChunkAcross -1); nWindowsAcross = param->numberWindowAcross - param->numberWindowAcrossInChunk*(param->numberChunkAcross -1);
} }
@ -137,20 +162,20 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
nWindowsAcross = param->numberWindowAcrossInChunk; nWindowsAcross = param->numberWindowAcrossInChunk;
} }
//std::cout << "DEBUG setIndex" << idxChunk << " " << nWindowsDown << " " << nWindowsAcross << "\n"; //std::cout << "DEBUG setIndex" << idxChunk << " " << nWindowsDown << " " << nWindowsAcross << "\n";
} }
/// obtain the starting pixels for each chip /// obtain the starting pixels for each chip
/// @param[in] oStartPixel /// @param[in] oStartPixel
/// ///
void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff) void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff)
{ {
for(int i=0; i<param->numberWindowDownInChunk; ++i) { for(int i=0; i<param->numberWindowDownInChunk; ++i) {
int iDown = i; int iDown = i;
if(i>=nWindowsDown) iDown = nWindowsDown-1; if(i>=nWindowsDown) iDown = nWindowsDown-1;
for(int j=0; j<param->numberWindowAcrossInChunk; ++j){ for(int j=0; j<param->numberWindowAcrossInChunk; ++j){
int iAcross = j; int iAcross = j;
if(j>=nWindowsAcross) iAcross = nWindowsAcross-1; if(j>=nWindowsAcross) iAcross = nWindowsAcross-1;
int idxInChunk = iDown*param->numberWindowAcrossInChunk+iAcross; int idxInChunk = iDown*param->numberWindowAcrossInChunk+iAcross;
int idxInAll = (iDown+idxChunkDown*param->numberWindowDownInChunk)*param->numberWindowAcross int idxInAll = (iDown+idxChunkDown*param->numberWindowDownInChunk)*param->numberWindowAcross
+ idxChunkAcross*param->numberWindowAcrossInChunk+iAcross; + idxChunkAcross*param->numberWindowAcrossInChunk+iAcross;
@ -158,108 +183,179 @@ void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel,
//fprintf(stderr, "relative offset %d %d %d %d\n", i, j, rStartPixel[idxInChunk], diff); //fprintf(stderr, "relative offset %d %d %d %d\n", i, j, rStartPixel[idxInChunk], diff);
} }
} }
} }
void cuAmpcorChunk::loadMasterChunk() void cuAmpcorChunk::loadMasterChunk()
{ {
//load a chunk from mmap to gpu
int startD = param->masterChunkStartPixelDown[idxChunk]; // we first load the whole chunk of image from cpu to a gpu buffer c(r)_masterChunkRaw
int startA = param->masterChunkStartPixelAcross[idxChunk]; // then copy to a batch of windows with (nImages, height, width) (leading dimension on the right)
int height = param->masterChunkHeight[idxChunk];
int width = param->masterChunkWidth[idxChunk]; // get the chunk size to be loaded to gpu
masterImage->loadToDevice(c_masterChunkRaw->devData, startD, startA, height, width, stream); int startD = param->masterChunkStartPixelDown[idxChunk]; //start pixel down (along height)
std::cout << "debug load master: " << startD << " " << startA << " " << height << " " << width << "\n"; int startA = param->masterChunkStartPixelAcross[idxChunk]; // start pixel across (along width)
//copy the chunk to a batch of images format (nImages, height, width) int height = param->masterChunkHeight[idxChunk]; // number of pixels along height
//use cpu for some simple math int width = param->masterChunkWidth[idxChunk]; // number of pixels along width
//use cpu to compute the starting positions for each window
getRelativeOffset(ChunkOffsetDown->hostData, param->masterStartPixelDown, param->masterChunkStartPixelDown[idxChunk]); getRelativeOffset(ChunkOffsetDown->hostData, param->masterStartPixelDown, param->masterChunkStartPixelDown[idxChunk]);
// copy the positions to gpu
ChunkOffsetDown->copyToDevice(stream); ChunkOffsetDown->copyToDevice(stream);
// same for the across direction
getRelativeOffset(ChunkOffsetAcross->hostData, param->masterStartPixelAcross, param->masterChunkStartPixelAcross[idxChunk]); getRelativeOffset(ChunkOffsetAcross->hostData, param->masterStartPixelAcross, param->masterChunkStartPixelAcross[idxChunk]);
ChunkOffsetAcross->copyToDevice(stream); ChunkOffsetAcross->copyToDevice(stream);
// if derampMethod = 0 (no deramp), take amplitudes; otherwise, copy complex data
if(param->derampMethod == 0) { // check whether the image is complex (e.g., SLC) or real( e.g. TIFF)
cuArraysCopyToBatchAbsWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk], if(masterImage->isComplex())
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); {
// allocate a gpu buffer to load data from cpu/file
// try allocate/deallocate the buffer on the fly to save gpu memory 07/09/19
c_masterChunkRaw = new cuArrays<float2> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
c_masterChunkRaw->allocate();
// load the data from cpu
masterImage->loadToDevice((void *)c_masterChunkRaw->devData, startD, startA, height, width, stream);
//std::cout << "debug load master: " << startD << " " << startA << " " << height << " " << width << "\n";
//copy the chunk to a batch format (nImages, height, width)
// if derampMethod = 0 (no deramp), take amplitudes; otherwise, copy complex data
if(param->derampMethod == 0) {
cuArraysCopyToBatchAbsWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk],
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
else {
cuArraysCopyToBatchWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk],
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
// deallocate the gpu buffer
delete c_masterChunkRaw;
} }
// if the image is real
else { else {
cuArraysCopyToBatchWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk], r_masterChunkRaw = new cuArrays<float> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); r_masterChunkRaw->allocate();
// load the data from cpu
masterImage->loadToDevice((void *)r_masterChunkRaw->devData, startD, startA, height, width, stream);
// copy the chunk (real) to a batch format (complex)
cuArraysCopyToBatchWithOffsetR2C(r_masterChunkRaw, param->masterChunkWidth[idxChunk],
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
// deallocate the gpu buffer
delete r_masterChunkRaw;
} }
} }
void cuAmpcorChunk::loadSlaveChunk() void cuAmpcorChunk::loadSlaveChunk()
{ {
//load a chunk from mmap to gpu
slaveImage->loadToDevice(c_slaveChunkRaw->devData,
param->slaveChunkStartPixelDown[idxChunk],
param->slaveChunkStartPixelAcross[idxChunk],
param->slaveChunkHeight[idxChunk],
param->slaveChunkWidth[idxChunk],
stream);
//copy to a batch format (nImages, height, width) //copy to a batch format (nImages, height, width)
getRelativeOffset(ChunkOffsetDown->hostData, param->slaveStartPixelDown, param->slaveChunkStartPixelDown[idxChunk]); getRelativeOffset(ChunkOffsetDown->hostData, param->slaveStartPixelDown, param->slaveChunkStartPixelDown[idxChunk]);
ChunkOffsetDown->copyToDevice(stream); ChunkOffsetDown->copyToDevice(stream);
getRelativeOffset(ChunkOffsetAcross->hostData, param->slaveStartPixelAcross, param->slaveChunkStartPixelAcross[idxChunk]); getRelativeOffset(ChunkOffsetAcross->hostData, param->slaveStartPixelAcross, param->slaveChunkStartPixelAcross[idxChunk]);
ChunkOffsetAcross->copyToDevice(stream); ChunkOffsetAcross->copyToDevice(stream);
if(param->derampMethod == 0) {
cuArraysCopyToBatchAbsWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk], if(slaveImage->isComplex())
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
else
{ {
cuArraysCopyToBatchWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk], c_slaveChunkRaw = new cuArrays<float2> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream); c_slaveChunkRaw->allocate();
}
//load a chunk from mmap to gpu
slaveImage->loadToDevice(c_slaveChunkRaw->devData,
param->slaveChunkStartPixelDown[idxChunk],
param->slaveChunkStartPixelAcross[idxChunk],
param->slaveChunkHeight[idxChunk],
param->slaveChunkWidth[idxChunk],
stream);
if(param->derampMethod == 0) {
cuArraysCopyToBatchAbsWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
else {
cuArraysCopyToBatchWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
delete c_slaveChunkRaw;
}
else { //real image
//allocate the gpu buffer
r_slaveChunkRaw = new cuArrays<float> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
r_slaveChunkRaw->allocate();
//load a chunk from mmap to gpu
slaveImage->loadToDevice(r_slaveChunkRaw->devData,
param->slaveChunkStartPixelDown[idxChunk],
param->slaveChunkStartPixelAcross[idxChunk],
param->slaveChunkHeight[idxChunk],
param->slaveChunkWidth[idxChunk],
stream);
// convert to the batch format
cuArraysCopyToBatchWithOffsetR2C(r_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
delete r_slaveChunkRaw;
}
} }
cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_, cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *master_, GDALImage *slave_,
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_, cudaStream_t stream_) cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_, cuArrays<float3> *covImage_, cuArrays<int> *intImage1_, cuArrays<float> *floatImage1_, cudaStream_t stream_)
{ {
param = param_; param = param_;
masterImage = master_; masterImage = master_;
slaveImage = slave_; slaveImage = slave_;
offsetImage = offsetImage_; offsetImage = offsetImage_;
snrImage = snrImage_; snrImage = snrImage_;
covImage = covImage_;
intImage1 = intImage1_;
floatImage1 = floatImage1_;
stream = stream_; stream = stream_;
std::cout << "debug Chunk creator " << param->maxMasterChunkHeight << " " << param->maxMasterChunkWidth << "\n"; // std::cout << "debug Chunk creator " << param->maxMasterChunkHeight << " " << param->maxMasterChunkWidth << "\n";
c_masterChunkRaw = new cuArrays<float2> (param->maxMasterChunkHeight, param->maxMasterChunkWidth); // try allocate/deallocate on the fly to save gpu memory 07/09/19
c_masterChunkRaw->allocate(); // c_masterChunkRaw = new cuArrays<float2> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
// c_masterChunkRaw->allocate();
c_slaveChunkRaw = new cuArrays<float2> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
c_slaveChunkRaw->allocate(); // c_slaveChunkRaw = new cuArrays<float2> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
// c_slaveChunkRaw->allocate();
ChunkOffsetDown = new cuArrays<int> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); ChunkOffsetDown = new cuArrays<int> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
ChunkOffsetDown->allocate(); ChunkOffsetDown->allocate();
ChunkOffsetDown->allocateHost(); ChunkOffsetDown->allocateHost();
ChunkOffsetAcross = new cuArrays<int> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); ChunkOffsetAcross = new cuArrays<int> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
ChunkOffsetAcross->allocate(); ChunkOffsetAcross->allocate();
ChunkOffsetAcross->allocateHost(); ChunkOffsetAcross->allocateHost();
c_masterBatchRaw = new cuArrays<float2> ( c_masterBatchRaw = new cuArrays<float2> (
param->windowSizeHeightRaw, param->windowSizeWidthRaw, param->windowSizeHeightRaw, param->windowSizeWidthRaw,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
c_masterBatchRaw->allocate(); c_masterBatchRaw->allocate();
c_slaveBatchRaw = new cuArrays<float2> ( c_slaveBatchRaw = new cuArrays<float2> (
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw, param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
c_slaveBatchRaw->allocate(); c_slaveBatchRaw->allocate();
r_masterBatchRaw = new cuArrays<float> ( r_masterBatchRaw = new cuArrays<float> (
param->windowSizeHeightRaw, param->windowSizeWidthRaw, param->windowSizeHeightRaw, param->windowSizeWidthRaw,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_masterBatchRaw->allocate(); r_masterBatchRaw->allocate();
r_slaveBatchRaw = new cuArrays<float> ( r_slaveBatchRaw = new cuArrays<float> (
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw, param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_slaveBatchRaw->allocate(); r_slaveBatchRaw->allocate();
c_slaveBatchZoomIn = new cuArrays<float2> ( c_slaveBatchZoomIn = new cuArrays<float2> (
param->searchWindowSizeHeightRawZoomIn, param->searchWindowSizeWidthRawZoomIn, param->searchWindowSizeHeightRawZoomIn, param->searchWindowSizeWidthRawZoomIn,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
c_slaveBatchZoomIn->allocate(); c_slaveBatchZoomIn->allocate();
c_masterBatchOverSampled = new cuArrays<float2> ( c_masterBatchOverSampled = new cuArrays<float2> (
param->windowSizeHeight, param->windowSizeWidth, param->windowSizeHeight, param->windowSizeWidth,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
@ -269,7 +365,7 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm
param->searchWindowSizeHeight, param->searchWindowSizeWidth, param->searchWindowSizeHeight, param->searchWindowSizeWidth,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
c_slaveBatchOverSampled->allocate(); c_slaveBatchOverSampled->allocate();
r_masterBatchOverSampled = new cuArrays<float> ( r_masterBatchOverSampled = new cuArrays<float> (
param->windowSizeHeight, param->windowSizeWidth, param->windowSizeHeight, param->windowSizeWidth,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
@ -279,66 +375,114 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm
param->searchWindowSizeHeight, param->searchWindowSizeWidth, param->searchWindowSizeHeight, param->searchWindowSizeWidth,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_slaveBatchOverSampled->allocate(); r_slaveBatchOverSampled->allocate();
masterBatchOverSampler = new cuOverSamplerC2C( masterBatchOverSampler = new cuOverSamplerC2C(
c_masterBatchRaw->height, c_masterBatchRaw->width, //orignal size c_masterBatchRaw->height, c_masterBatchRaw->width, //orignal size
c_masterBatchOverSampled->height, c_masterBatchOverSampled->width, //oversampled size c_masterBatchOverSampled->height, c_masterBatchOverSampled->width, //oversampled size
c_masterBatchRaw->count, stream); c_masterBatchRaw->count, stream);
slaveBatchOverSampler = new cuOverSamplerC2C(c_slaveBatchZoomIn->height, c_slaveBatchZoomIn->width, slaveBatchOverSampler = new cuOverSamplerC2C(c_slaveBatchZoomIn->height, c_slaveBatchZoomIn->width,
c_slaveBatchOverSampled->height, c_slaveBatchOverSampled->width, c_slaveBatchRaw->count, stream); c_slaveBatchOverSampled->height, c_slaveBatchOverSampled->width, c_slaveBatchRaw->count, stream);
r_corrBatchRaw = new cuArrays<float> ( r_corrBatchRaw = new cuArrays<float> (
param->searchWindowSizeHeightRaw-param->windowSizeHeightRaw+1, param->searchWindowSizeHeightRaw-param->windowSizeHeightRaw+1,
param->searchWindowSizeWidthRaw-param->windowSizeWidthRaw+1, param->searchWindowSizeWidthRaw-param->windowSizeWidthRaw+1,
param->numberWindowDownInChunk, param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk); param->numberWindowAcrossInChunk);
r_corrBatchRaw->allocate(); r_corrBatchRaw->allocate();
r_corrBatchZoomIn = new cuArrays<float> ( r_corrBatchZoomIn = new cuArrays<float> (
param->searchWindowSizeHeight - param->windowSizeHeight+1, param->searchWindowSizeHeight - param->windowSizeHeight+1,
param->searchWindowSizeWidth - param->windowSizeWidth+1, param->searchWindowSizeWidth - param->windowSizeWidth+1,
param->numberWindowDownInChunk, param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk); param->numberWindowAcrossInChunk);
r_corrBatchZoomIn->allocate(); r_corrBatchZoomIn->allocate();
r_corrBatchZoomInAdjust = new cuArrays<float> ( r_corrBatchZoomInAdjust = new cuArrays<float> (
param->searchWindowSizeHeight - param->windowSizeHeight, param->searchWindowSizeHeight - param->windowSizeHeight,
param->searchWindowSizeWidth - param->windowSizeWidth, param->searchWindowSizeWidth - param->windowSizeWidth,
param->numberWindowDownInChunk, param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk); param->numberWindowAcrossInChunk);
r_corrBatchZoomInAdjust->allocate(); r_corrBatchZoomInAdjust->allocate();
r_corrBatchZoomInOverSampled = new cuArrays<float> ( r_corrBatchZoomInOverSampled = new cuArrays<float> (
param->zoomWindowSize * param->oversamplingFactor,
param->zoomWindowSize * param->oversamplingFactor, param->zoomWindowSize * param->oversamplingFactor,
param->numberWindowDownInChunk, param->zoomWindowSize * param->oversamplingFactor,
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk); param->numberWindowAcrossInChunk);
r_corrBatchZoomInOverSampled->allocate(); r_corrBatchZoomInOverSampled->allocate();
offsetInit = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); offsetInit = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
offsetInit->allocate(); offsetInit->allocate();
offsetZoomIn = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); offsetZoomIn = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
offsetZoomIn->allocate(); offsetZoomIn->allocate();
offsetFinal = new cuArrays<float2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); offsetFinal = new cuArrays<float2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
offsetFinal->allocate(); offsetFinal->allocate();
corrMaxValue = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk); corrMaxValue = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
corrMaxValue->allocate(); corrMaxValue->allocate();
// new arrays due to snr estimation
std::cout<< "corrRawZoomInHeight: " << param->corrRawZoomInHeight << "\n";
std::cout<< "corrRawZoomInWidth: " << param->corrRawZoomInWidth << "\n";
r_corrBatchRawZoomIn = new cuArrays<float> (
param->corrRawZoomInHeight,
param->corrRawZoomInWidth,
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
r_corrBatchRawZoomIn->allocate();
i_corrBatchZoomInValid = new cuArrays<int> (
param->corrRawZoomInHeight,
param->corrRawZoomInWidth,
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
i_corrBatchZoomInValid->allocate();
r_corrBatchSum = new cuArrays<float> (
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
r_corrBatchSum->allocate();
i_corrBatchValidCount = new cuArrays<int> (
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
i_corrBatchValidCount->allocate();
i_maxloc = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
i_maxloc->allocate();
r_maxval = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_maxval->allocate();
r_snrValue = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_snrValue->allocate();
r_covValue = new cuArrays<float3> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_covValue->allocate();
// end of new arrays
if(param->oversamplingMethod) { if(param->oversamplingMethod) {
corrSincOverSampler = new cuSincOverSamplerR2R(param->zoomWindowSize, param->oversamplingFactor, stream); corrSincOverSampler = new cuSincOverSamplerR2R(param->zoomWindowSize, param->oversamplingFactor, stream);
} }
else { else {
corrOverSampler= new cuOverSamplerR2R(param->zoomWindowSize, param->zoomWindowSize, corrOverSampler= new cuOverSamplerR2R(param->zoomWindowSize, param->zoomWindowSize,
(param->zoomWindowSize)*param->oversamplingFactor, (param->zoomWindowSize)*param->oversamplingFactor,
(param->zoomWindowSize)*param->oversamplingFactor, (param->zoomWindowSize)*param->oversamplingFactor,
param->numberWindowDownInChunk*param->numberWindowAcrossInChunk, param->numberWindowDownInChunk*param->numberWindowAcrossInChunk,
stream); stream);
} }
if(param->algorithm == 0) { if(param->algorithm == 0) {
cuCorrFreqDomain = new cuFreqCorrelator( cuCorrFreqDomain = new cuFreqCorrelator(
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw, param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
@ -347,10 +491,10 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm
cuCorrFreqDomain_OverSampled = new cuFreqCorrelator( cuCorrFreqDomain_OverSampled = new cuFreqCorrelator(
param->searchWindowSizeHeight, param->searchWindowSizeWidth, param->searchWindowSizeHeight, param->searchWindowSizeWidth,
param->numberWindowDownInChunk*param->numberWindowAcrossInChunk, param->numberWindowDownInChunk*param->numberWindowAcrossInChunk,
stream); stream);
} }
debugmsg("all objects in chunk are created ...\n"); debugmsg("all objects in chunk are created ...\n");

View File

@ -1,4 +1,4 @@
/* /*
* cuAmpcorChunk.h * cuAmpcorChunk.h
* Purpose: a group of chips processed at the same time * Purpose: a group of chips processed at the same time
*/ */
@ -6,7 +6,7 @@
#ifndef __CUAMPCORCHUNK_H #ifndef __CUAMPCORCHUNK_H
#define __CUAMPCORCHUNK_H #define __CUAMPCORCHUNK_H
#include "SlcImage.h" #include "GDALImage.h"
#include "cuArrays.h" #include "cuArrays.h"
#include "cuAmpcorParameter.h" #include "cuAmpcorParameter.h"
#include "cuOverSampler.h" #include "cuOverSampler.h"
@ -22,64 +22,81 @@ private:
int nWindowsAcross; int nWindowsAcross;
int devId; int devId;
cudaStream_t stream; cudaStream_t stream;
SlcImage *masterImage; GDALImage *masterImage;
SlcImage *slaveImage; GDALImage *slaveImage;
cuAmpcorParameter *param; cuAmpcorParameter *param;
cuArrays<float2> *offsetImage; cuArrays<float2> *offsetImage;
cuArrays<float> *snrImage; cuArrays<float> *snrImage;
cuArrays<float3> *covImage;
cuArrays<float2> * c_masterChunkRaw, * c_slaveChunkRaw;
// added for test
cuArrays<int> *intImage1;
cuArrays<float> *floatImage1;
// gpu buffer
cuArrays<float2> * c_masterChunkRaw, * c_slaveChunkRaw;
cuArrays<float> * r_masterChunkRaw, * r_slaveChunkRaw;
// gpu windows raw data
cuArrays<float2> * c_masterBatchRaw, * c_slaveBatchRaw, * c_slaveBatchZoomIn; cuArrays<float2> * c_masterBatchRaw, * c_slaveBatchRaw, * c_slaveBatchZoomIn;
cuArrays<float> * r_masterBatchRaw, * r_slaveBatchRaw; cuArrays<float> * r_masterBatchRaw, * r_slaveBatchRaw;
cuArrays<float2> * c_masterBatchOverSampled, * c_slaveBatchOverSampled;
// gpu windows oversampled data
cuArrays<float2> * c_masterBatchOverSampled, * c_slaveBatchOverSampled;
cuArrays<float> * r_masterBatchOverSampled, * r_slaveBatchOverSampled; cuArrays<float> * r_masterBatchOverSampled, * r_slaveBatchOverSampled;
cuArrays<float> * r_corrBatchRaw, * r_corrBatchZoomIn, * r_corrBatchZoomInOverSampled, * r_corrBatchZoomInAdjust; cuArrays<float> * r_corrBatchRaw, * r_corrBatchZoomIn, * r_corrBatchZoomInOverSampled, * r_corrBatchZoomInAdjust;
cuArrays<int> *ChunkOffsetDown, *ChunkOffsetAcross; cuArrays<int> *ChunkOffsetDown, *ChunkOffsetAcross;
cuOverSamplerC2C *masterBatchOverSampler, *slaveBatchOverSampler; cuOverSamplerC2C *masterBatchOverSampler, *slaveBatchOverSampler;
cuOverSamplerR2R *corrOverSampler; cuOverSamplerR2R *corrOverSampler;
cuSincOverSamplerR2R *corrSincOverSampler; cuSincOverSamplerR2R *corrSincOverSampler;
//for frequency domain //for frequency domain
cuFreqCorrelator *cuCorrFreqDomain, *cuCorrFreqDomain_OverSampled; cuFreqCorrelator *cuCorrFreqDomain, *cuCorrFreqDomain_OverSampled;
cuArrays<int2> *offsetInit; cuArrays<int2> *offsetInit;
cuArrays<int2> *offsetZoomIn; cuArrays<int2> *offsetZoomIn;
cuArrays<float2> *offsetFinal; cuArrays<float2> *offsetFinal;
cuArrays<float> *corrMaxValue;
//SNR estimation
cuArrays<float> *r_corrBatchRawZoomIn;
cuArrays<float> *r_corrBatchSum;
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
cuArrays<float> *r_snrValue;
//corr statistics
cuArrays<int2> *i_maxloc; cuArrays<int2> *i_maxloc;
cuArrays<float> *r_maxval; cuArrays<float> *r_maxval;
cuArrays<float> *r_corrBatchSum; // Varince estimation.
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount; cuArrays<float3> *r_covValue;
cuArrays<float> *corrMaxValue;
cuArrays<float> *r_snrValue;
public: public:
cuAmpcorChunk() {} cuAmpcorChunk() {}
//cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_); //cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_);
void setIndex(int idxDown_, int idxAcross_); void setIndex(int idxDown_, int idxAcross_);
cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *master_, GDALImage *slave_, cuArrays<float2> *offsetImage_,
cuArrays<float> *snrImage_, cuArrays<float3> *covImage_, cuArrays<int> *intImage1_, cuArrays<float> *floatImage1_, cudaStream_t stream_);
cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_, cuArrays<float2> *offsetImage_,
cuArrays<float> *snrImage_, cudaStream_t stream_);
void loadMasterChunk(); void loadMasterChunk();
void loadSlaveChunk(); void loadSlaveChunk();
void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff); void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff);
~cuAmpcorChunk(); ~cuAmpcorChunk();
void run(int, int); void run(int, int);
}; };
#endif #endif

View File

@ -1,113 +1,142 @@
// Implementation of cuAmpcorController // Implementation of cuAmpcorController
#include "cuAmpcorController.h" #include "cuAmpcorController.h"
#include "SlcImage.h" #include "GDALImage.h"
#include "cuArrays.h" #include "cuArrays.h"
#include "cudaUtil.h" #include "cudaUtil.h"
#include "cuAmpcorChunk.h" #include "cuAmpcorChunk.h"
#include "cuAmpcorUtil.h" #include "cuAmpcorUtil.h"
#include <iostream> #include <iostream>
cuAmpcorController::cuAmpcorController() { param = new cuAmpcorParameter();} cuAmpcorController::cuAmpcorController() { param = new cuAmpcorParameter();}
cuAmpcorController::~cuAmpcorController() { delete param; } cuAmpcorController::~cuAmpcorController() { delete param; }
void cuAmpcorController::runAmpcor() { void cuAmpcorController::runAmpcor() {
// set the gpu id
param->deviceID = gpuDeviceInit(param->deviceID); param->deviceID = gpuDeviceInit(param->deviceID);
SlcImage *masterImage; // initialize the gdal driver
SlcImage *slaveImage; GDALAllRegister();
// master and slave images; use band=1 as default
// TODO: selecting band
GDALImage *masterImage = new GDALImage(param->masterImageName, 1, param->mmapSizeInGB);
GDALImage *slaveImage = new GDALImage(param->slaveImageName, 1, param->mmapSizeInGB);
cuArrays<float2> *offsetImage, *offsetImageRun; cuArrays<float2> *offsetImage, *offsetImageRun;
cuArrays<float> *snrImage, *snrImageRun; cuArrays<float> *snrImage, *snrImageRun;
cuArrays<float3> *covImage, *covImageRun;
// cuArrays<float> *floatImage; // For debugging.
// cuArrays<int> *intImage; cuArrays<int> *intImage1;
cuArrays<float> *floatImage1;
int nWindowsDownRun = param->numberChunkDown * param->numberWindowDownInChunk;
int nWindowsAcrossRun = param->numberChunkAcross * param->numberWindowAcrossInChunk;
masterImage = new SlcImage(param->masterImageName, param->masterImageHeight, param->masterImageWidth, param->mmapSizeInGB);
slaveImage = new SlcImage(param->slaveImageName, param->slaveImageHeight, param->slaveImageWidth, param->mmapSizeInGB);
int nWindowsDownRun = param->numberChunkDown*param->numberWindowDownInChunk;
int nWindowsAcrossRun = param->numberChunkAcross*param->numberWindowAcrossInChunk;
std::cout << "Debug " << nWindowsDownRun << " " << param->numberWindowDown << "\n"; std::cout << "Debug " << nWindowsDownRun << " " << param->numberWindowDown << "\n";
offsetImageRun = new cuArrays<float2>(nWindowsDownRun, nWindowsAcrossRun); offsetImageRun = new cuArrays<float2>(nWindowsDownRun, nWindowsAcrossRun);
snrImageRun = new cuArrays<float>(nWindowsDownRun, nWindowsAcrossRun);
offsetImageRun->allocate(); offsetImageRun->allocate();
snrImageRun = new cuArrays<float>(nWindowsDownRun, nWindowsAcrossRun);
snrImageRun->allocate(); snrImageRun->allocate();
covImageRun = new cuArrays<float3>(nWindowsDownRun, nWindowsAcrossRun);
covImageRun->allocate();
// intImage 1 and floatImage 1 are added for debugging issues
intImage1 = new cuArrays<int>(nWindowsDownRun, nWindowsAcrossRun);
intImage1->allocate();
floatImage1 = new cuArrays<float>(nWindowsDownRun, nWindowsAcrossRun);
floatImage1->allocate();
// Offsetfields.
offsetImage = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross); offsetImage = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross);
snrImage = new cuArrays<float>(param->numberWindowDown, param->numberWindowAcross);
offsetImage->allocate(); offsetImage->allocate();
// SNR.
snrImage = new cuArrays<float>(param->numberWindowDown, param->numberWindowAcross);
snrImage->allocate(); snrImage->allocate();
// Minyan Zhong // Variance.
// floatImage = new cuArrays<float>(param->numberWindowDown, param->numberWindowAcross); covImage = new cuArrays<float3>(param->numberWindowDown, param->numberWindowAcross);
// intImage = new cuArrays<int>(param->numberWindowDown, param->numberWindowAcross); covImage->allocate();
// floatImage->allocate();
// intImage->allocate();
//
cudaStream_t streams[param->nStreams]; cudaStream_t streams[param->nStreams];
cuAmpcorChunk *chunk[param->nStreams]; cuAmpcorChunk *chunk[param->nStreams];
for(int ist=0; ist<param->nStreams; ist++) for(int ist=0; ist<param->nStreams; ist++)
{ {
cudaStreamCreate(&streams[ist]); cudaStreamCreate(&streams[ist]);
chunk[ist]= new cuAmpcorChunk(param, masterImage, slaveImage, offsetImageRun, snrImageRun, streams[ist]); chunk[ist]= new cuAmpcorChunk(param, masterImage, slaveImage, offsetImageRun, snrImageRun, covImageRun, intImage1, floatImage1, streams[ist]);
} }
int nChunksDown = param->numberChunkDown; int nChunksDown = param->numberChunkDown;
int nChunksAcross = param->numberChunkAcross; int nChunksAcross = param->numberChunkAcross;
std::cout << "Total number of windows (azimuth x range): " <<param->numberWindowDown << " x " << param->numberWindowAcross << std::endl; std::cout << "Total number of windows (azimuth x range): " <<param->numberWindowDown << " x " << param->numberWindowAcross << std::endl;
std::cout << "to be processed in the number of chunks: " <<nChunksDown << " x " << nChunksAcross << std::endl; std::cout << "to be processed in the number of chunks: " <<nChunksDown << " x " << nChunksAcross << std::endl;
for(int i = 60; i<nChunksDown; i++) for(int i = 0; i<nChunksDown; i++)
{ {
std::cout << "Processing chunk (" << i <<", x" << ")" << std::endl; std::cout << "Processing chunk (" << i <<", x" << ")" << std::endl;
for(int j=0; j<nChunksAcross; j+=param->nStreams) for(int j=0; j<nChunksAcross; j+=param->nStreams)
{ {
//std::cout << "Processing chunk(" << i <<", " << j <<")" << std::endl; //std::cout << "Processing chunk(" << i <<", " << j <<")" << std::endl;
for(int ist = 0; ist<param->nStreams; ist++) for(int ist = 0; ist<param->nStreams; ist++)
{ {
if(j+ist < nChunksAcross) { if(j+ist < nChunksAcross) {
chunk[ist]->run(i, j+ist); chunk[ist]->run(i, j+ist);
} }
} }
} }
} }
cudaDeviceSynchronize(); cudaDeviceSynchronize();
// Do extraction.
cuArraysCopyExtract(offsetImageRun, offsetImage, make_int2(0,0), streams[0]); cuArraysCopyExtract(offsetImageRun, offsetImage, make_int2(0,0), streams[0]);
cuArraysCopyExtract(snrImageRun, snrImage, make_int2(0,0), streams[0]); cuArraysCopyExtract(snrImageRun, snrImage, make_int2(0,0), streams[0]);
cuArraysCopyExtract(covImageRun, covImage, make_int2(0,0), streams[0]);
offsetImage->outputToFile(param->offsetImageName, streams[0]); offsetImage->outputToFile(param->offsetImageName, streams[0]);
snrImage->outputToFile(param->snrImageName, streams[0]); snrImage->outputToFile(param->snrImageName, streams[0]);
covImage->outputToFile(param->covImageName, streams[0]);
// Minyan Zhong // Output debugging arrays.
// floatImage->allocate(); intImage1->outputToFile("intImage1", streams[0]);
// intImage->allocate(); floatImage1->outputToFile("floatImage1", streams[0]);
//
outputGrossOffsets(); outputGrossOffsets();
// Delete arrays.
delete offsetImage; delete offsetImage;
delete snrImage; delete snrImage;
delete covImage;
delete intImage1;
delete floatImage1;
delete offsetImageRun; delete offsetImageRun;
delete snrImageRun; delete snrImageRun;
delete covImageRun;
for (int ist=0; ist<param->nStreams; ist++) for (int ist=0; ist<param->nStreams; ist++)
delete chunk[ist]; delete chunk[ist];
delete masterImage; delete masterImage;
delete slaveImage; delete slaveImage;
}
}
void cuAmpcorController::outputGrossOffsets() void cuAmpcorController::outputGrossOffsets()
{ {
cuArrays<float2> *grossOffsets = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross); cuArrays<float2> *grossOffsets = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross);
grossOffsets->allocateHost(); grossOffsets->allocateHost();
for(int i=0; i< param->numberWindows; i++) for(int i=0; i< param->numberWindows; i++)
grossOffsets->hostData[i] = make_float2(param->grossOffsetDown[i], param->grossOffsetAcross[i]); grossOffsets->hostData[i] = make_float2(param->grossOffsetDown[i], param->grossOffsetAcross[i]);
grossOffsets->outputHostToFile(param->grossOffsetImageName); grossOffsets->outputHostToFile(param->grossOffsetImageName);
@ -176,7 +205,7 @@ void cuAmpcorController::setGrossOffsets(int *in, int size) {
param->grossOffsets = (int *)malloc(size*sizeof(int)); param->grossOffsets = (int *)malloc(size*sizeof(int));
mempcpy(param->grossOffsets, in, size*sizeof(int)); mempcpy(param->grossOffsets, in, size*sizeof(int));
fprintf(stderr, "copy grossOffsets %d\n", size); fprintf(stderr, "copy grossOffsets %d\n", size);
} }
void cuAmpcorController::setOffsetImageName(std::string s) { param->offsetImageName = s; } void cuAmpcorController::setOffsetImageName(std::string s) { param->offsetImageName = s; }
void cuAmpcorController::setSNRImageName(std::string s) { param->snrImageName = s; } void cuAmpcorController::setSNRImageName(std::string s) { param->snrImageName = s; }
//void cuAmpcorController::setMargin(int n) { param->margin = n; } //void cuAmpcorController::setMargin(int n) { param->margin = n; }

View File

@ -1,6 +1,6 @@
/** /**
* cuAmpcorParameter.cu * cuAmpcorParameter.cu
* Input parameters for ampcor * Input parameters for ampcor
*/ */
#include "cuAmpcorParameter.h" #include "cuAmpcorParameter.h"
@ -11,17 +11,19 @@
#endif #endif
/// ///
/// Constructor for cuAmpcorParameter class /// Constructor for cuAmpcorParameter class
/// also sets the default/initial values of various parameters /// also sets the default/initial values of various parameters
/// ///
cuAmpcorParameter::cuAmpcorParameter() cuAmpcorParameter::cuAmpcorParameter()
{ {
algorithm = 0; //0 freq; 1 time // default settings
deviceID = 0; // will be changed if they are set by python scripts
nStreams = 1; algorithm = 0; //0 freq; 1 time
deviceID = 0;
nStreams = 1;
derampMethod = 1; derampMethod = 1;
windowSizeWidthRaw = 64; windowSizeWidthRaw = 64;
windowSizeHeightRaw = 64; windowSizeHeightRaw = 64;
halfSearchRangeDownRaw = 20; halfSearchRangeDownRaw = 20;
@ -31,9 +33,9 @@ cuAmpcorParameter::cuAmpcorParameter()
skipSampleDownRaw = 64; skipSampleDownRaw = 64;
rawDataOversamplingFactor = 2; rawDataOversamplingFactor = 2;
zoomWindowSize = 8; zoomWindowSize = 8;
oversamplingFactor = 16; oversamplingFactor = 16;
oversamplingMethod = 0; oversamplingMethod = 0;
masterImageName = "master.slc"; masterImageName = "master.slc";
masterImageWidth = 1000; masterImageWidth = 1000;
masterImageHeight = 1000; masterImageHeight = 1000;
@ -43,50 +45,58 @@ cuAmpcorParameter::cuAmpcorParameter()
offsetImageName = "DenseOffset.off"; offsetImageName = "DenseOffset.off";
grossOffsetImageName = "GrossOffset.off"; grossOffsetImageName = "GrossOffset.off";
snrImageName = "snr.snr"; snrImageName = "snr.snr";
covImageName = "cov.cov";
numberWindowDown = 1; numberWindowDown = 1;
numberWindowAcross = 1; numberWindowAcross = 1;
numberWindowDownInChunk = 1; numberWindowDownInChunk = 1;
numberWindowAcrossInChunk = 1 ; numberWindowAcrossInChunk = 1 ;
masterStartPixelDown0 = 0; masterStartPixelDown0 = 0;
masterStartPixelAcross0 = 0; masterStartPixelAcross0 = 0;
corrRawZoomInHeight = 17; // 8*2+1
corrRawZoomInWidth = 17;
useMmap = 1; // use mmap
mmapSizeInGB = 1;
} }
/** /**
* To determine other process parameters after reading essential parameters from python * To determine other process parameters after reading essential parameters from python
*/ */
void cuAmpcorParameter::setupParameters() void cuAmpcorParameter::setupParameters()
{ {
zoomWindowSize *= rawDataOversamplingFactor; //8 * 2 zoomWindowSize *= rawDataOversamplingFactor; //8 * 2
halfZoomWindowSizeRaw = zoomWindowSize/(2*rawDataOversamplingFactor); // 8*2/(2*2) = 4 halfZoomWindowSizeRaw = zoomWindowSize/(2*rawDataOversamplingFactor); // 8*2/(2*2) = 4
windowSizeWidth = windowSizeWidthRaw*rawDataOversamplingFactor; // windowSizeWidth = windowSizeWidthRaw*rawDataOversamplingFactor; //
windowSizeHeight = windowSizeHeightRaw*rawDataOversamplingFactor; windowSizeHeight = windowSizeHeightRaw*rawDataOversamplingFactor;
searchWindowSizeWidthRaw = windowSizeWidthRaw + 2*halfSearchRangeDownRaw; searchWindowSizeWidthRaw = windowSizeWidthRaw + 2*halfSearchRangeDownRaw;
searchWindowSizeHeightRaw = windowSizeHeightRaw + 2*halfSearchRangeAcrossRaw; searchWindowSizeHeightRaw = windowSizeHeightRaw + 2*halfSearchRangeAcrossRaw;
searchWindowSizeWidthRawZoomIn = windowSizeWidthRaw + 2*halfZoomWindowSizeRaw; searchWindowSizeWidthRawZoomIn = windowSizeWidthRaw + 2*halfZoomWindowSizeRaw;
searchWindowSizeHeightRawZoomIn = windowSizeHeightRaw + 2*halfZoomWindowSizeRaw; searchWindowSizeHeightRawZoomIn = windowSizeHeightRaw + 2*halfZoomWindowSizeRaw;
searchWindowSizeWidth = searchWindowSizeWidthRawZoomIn*rawDataOversamplingFactor; searchWindowSizeWidth = searchWindowSizeWidthRawZoomIn*rawDataOversamplingFactor;
searchWindowSizeHeight = searchWindowSizeHeightRawZoomIn*rawDataOversamplingFactor; searchWindowSizeHeight = searchWindowSizeHeightRawZoomIn*rawDataOversamplingFactor;
numberWindows = numberWindowDown*numberWindowAcross; numberWindows = numberWindowDown*numberWindowAcross;
if(numberWindows <=0) { if(numberWindows <=0) {
fprintf(stderr, "Incorrect number of windows! (%d, %d)\n", numberWindowDown, numberWindowAcross); fprintf(stderr, "Incorrect number of windows! (%d, %d)\n", numberWindowDown, numberWindowAcross);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
// modified 02/12/2018 to include one more chunk // modified 02/12/2018 to include one more chunk
// e.g. numberWindowDownInChunk=102, numberWindowDown=10, results in numberChunkDown=11 // e.g. numberWindowDownInChunk=102, numberWindowDown=10, results in numberChunkDown=11
// the last chunk will include 2 windows, numberWindowDownInChunkRun = 2. // the last chunk will include 2 windows, numberWindowDownInChunkRun = 2.
numberChunkDown = IDIVUP(numberWindowDown, numberWindowDownInChunk); numberChunkDown = IDIVUP(numberWindowDown, numberWindowDownInChunk);
numberChunkAcross = IDIVUP(numberWindowAcross, numberWindowAcrossInChunk); numberChunkAcross = IDIVUP(numberWindowAcross, numberWindowAcrossInChunk);
numberChunks = numberChunkDown*numberChunkAcross; numberChunks = numberChunkDown*numberChunkAcross;
allocateArrays(); allocateArrays();
} }
@ -99,7 +109,7 @@ void cuAmpcorParameter::allocateArrays()
masterStartPixelAcross = (int *)malloc(arraySize); masterStartPixelAcross = (int *)malloc(arraySize);
slaveStartPixelDown = (int *)malloc(arraySize); slaveStartPixelDown = (int *)malloc(arraySize);
slaveStartPixelAcross = (int *)malloc(arraySize); slaveStartPixelAcross = (int *)malloc(arraySize);
int arraySizeChunk = numberChunks*sizeof(int); int arraySizeChunk = numberChunks*sizeof(int);
masterChunkStartPixelDown = (int *)malloc(arraySizeChunk); masterChunkStartPixelDown = (int *)malloc(arraySizeChunk);
masterChunkStartPixelAcross = (int *)malloc(arraySizeChunk); masterChunkStartPixelAcross = (int *)malloc(arraySizeChunk);
@ -130,18 +140,18 @@ void cuAmpcorParameter::deallocateArrays()
} }
/// Set starting pixels for master and slave windows from arrays /// Set starting pixels for master and slave windows from arrays
/// set also gross offsets between master and slave windows /// set also gross offsets between master and slave windows
/// ///
void cuAmpcorParameter::setStartPixels(int *mStartD, int *mStartA, int *gOffsetD, int *gOffsetA) void cuAmpcorParameter::setStartPixels(int *mStartD, int *mStartA, int *gOffsetD, int *gOffsetA)
{ {
for(int i=0; i<numberWindows; i++) for(int i=0; i<numberWindows; i++)
{ {
masterStartPixelDown[i] = mStartD[i]; masterStartPixelDown[i] = mStartD[i];
grossOffsetDown[i] = gOffsetD[i]; grossOffsetDown[i] = gOffsetD[i];
slaveStartPixelDown[i] = masterStartPixelDown[i] + grossOffsetDown[i] - halfSearchRangeDownRaw; slaveStartPixelDown[i] = masterStartPixelDown[i] + grossOffsetDown[i] - halfSearchRangeDownRaw;
masterStartPixelAcross[i] = mStartA[i]; masterStartPixelAcross[i] = mStartA[i];
grossOffsetAcross[i] = gOffsetA[i]; grossOffsetAcross[i] = gOffsetA[i];
slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw; slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
} }
setChunkStartPixels(); setChunkStartPixels();
@ -160,7 +170,7 @@ void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int *gOffsetD,
masterStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw; masterStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw;
grossOffsetAcross[i] = gOffsetA[i]; grossOffsetAcross[i] = gOffsetA[i];
slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw; slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
} }
} }
setChunkStartPixels(); setChunkStartPixels();
} }
@ -179,60 +189,60 @@ void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int gOffsetD, i
masterStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw; masterStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw;
grossOffsetAcross[i] = gOffsetA; grossOffsetAcross[i] = gOffsetA;
slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw; slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
} }
} }
setChunkStartPixels(); setChunkStartPixels();
} }
void cuAmpcorParameter::setChunkStartPixels() void cuAmpcorParameter::setChunkStartPixels()
{ {
maxMasterChunkHeight = 0; maxMasterChunkHeight = 0;
maxMasterChunkWidth = 0; maxMasterChunkWidth = 0;
maxSlaveChunkHeight = 0; maxSlaveChunkHeight = 0;
maxSlaveChunkWidth = 0; maxSlaveChunkWidth = 0;
for(int ichunk=0; ichunk <numberChunkDown; ichunk++) for(int ichunk=0; ichunk <numberChunkDown; ichunk++)
{ {
for (int jchunk =0; jchunk<numberChunkAcross; jchunk++) for (int jchunk =0; jchunk<numberChunkAcross; jchunk++)
{ {
int idxChunk = ichunk*numberChunkAcross+jchunk; int idxChunk = ichunk*numberChunkAcross+jchunk;
int mChunkSD = masterImageHeight; int mChunkSD = masterImageHeight;
int mChunkSA = masterImageWidth; int mChunkSA = masterImageWidth;
int mChunkED = 0; int mChunkED = 0;
int mChunkEA = 0; int mChunkEA = 0;
int sChunkSD = slaveImageHeight; int sChunkSD = slaveImageHeight;
int sChunkSA = slaveImageWidth; int sChunkSA = slaveImageWidth;
int sChunkED = 0; int sChunkED = 0;
int sChunkEA = 0; int sChunkEA = 0;
// modified 02/12/2018 // modified 02/12/2018
int numberWindowDownInChunkRun = numberWindowDownInChunk; int numberWindowDownInChunkRun = numberWindowDownInChunk;
int numberWindowAcrossInChunkRun = numberWindowAcrossInChunk; int numberWindowAcrossInChunkRun = numberWindowAcrossInChunk;
// modify the number of windows in last chunk // modify the number of windows in last chunk
if(ichunk == numberChunkDown -1) if(ichunk == numberChunkDown -1)
numberWindowDownInChunkRun = numberWindowDown - numberWindowDownInChunk*(numberChunkDown -1); numberWindowDownInChunkRun = numberWindowDown - numberWindowDownInChunk*(numberChunkDown -1);
if(jchunk == numberChunkAcross -1) if(jchunk == numberChunkAcross -1)
numberWindowAcrossInChunkRun = numberWindowAcross - numberWindowAcrossInChunk*(numberChunkAcross -1); numberWindowAcrossInChunkRun = numberWindowAcross - numberWindowAcrossInChunk*(numberChunkAcross -1);
for(int i=0; i<numberWindowDownInChunkRun; i++) for(int i=0; i<numberWindowDownInChunkRun; i++)
{ {
for(int j=0; j<numberWindowAcrossInChunkRun; j++) for(int j=0; j<numberWindowAcrossInChunkRun; j++)
{ {
int idxWindow = (ichunk*numberWindowDownInChunk+i)*numberWindowAcross + (jchunk*numberWindowAcrossInChunk+j); int idxWindow = (ichunk*numberWindowDownInChunk+i)*numberWindowAcross + (jchunk*numberWindowAcrossInChunk+j);
int vpixel = masterStartPixelDown[idxWindow]; int vpixel = masterStartPixelDown[idxWindow];
if(mChunkSD > vpixel) mChunkSD = vpixel; if(mChunkSD > vpixel) mChunkSD = vpixel;
if(mChunkED < vpixel) mChunkED = vpixel; if(mChunkED < vpixel) mChunkED = vpixel;
vpixel = masterStartPixelAcross[idxWindow]; vpixel = masterStartPixelAcross[idxWindow];
if(mChunkSA > vpixel) mChunkSA = vpixel; if(mChunkSA > vpixel) mChunkSA = vpixel;
if(mChunkEA < vpixel) mChunkEA = vpixel; if(mChunkEA < vpixel) mChunkEA = vpixel;
vpixel = slaveStartPixelDown[idxWindow]; vpixel = slaveStartPixelDown[idxWindow];
if(sChunkSD > vpixel) sChunkSD = vpixel; if(sChunkSD > vpixel) sChunkSD = vpixel;
if(sChunkED < vpixel) sChunkED = vpixel; if(sChunkED < vpixel) sChunkED = vpixel;
vpixel = slaveStartPixelAcross[idxWindow]; vpixel = slaveStartPixelAcross[idxWindow];
if(sChunkSA > vpixel) sChunkSA = vpixel; if(sChunkSA > vpixel) sChunkSA = vpixel;
if(sChunkEA < vpixel) sChunkEA = vpixel; if(sChunkEA < vpixel) sChunkEA = vpixel;
} }
} }
@ -261,58 +271,58 @@ void cuAmpcorParameter::checkPixelInImageRange()
for(int col = 0; col < numberWindowAcross; col++) for(int col = 0; col < numberWindowAcross; col++)
{ {
int i = row*numberWindowAcross + col; int i = row*numberWindowAcross + col;
if(masterStartPixelDown[i] <0) if(masterStartPixelDown[i] <0)
{ {
fprintf(stderr, "Master Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, masterStartPixelDown[i]); fprintf(stderr, "Master Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, masterStartPixelDown[i]);
exit(EXIT_FAILURE); //or raise range error exit(EXIT_FAILURE); //or raise range error
} }
if(masterStartPixelAcross[i] <0) if(masterStartPixelAcross[i] <0)
{ {
fprintf(stderr, "Master Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, masterStartPixelAcross[i]); fprintf(stderr, "Master Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, masterStartPixelAcross[i]);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
endPixel = masterStartPixelDown[i] + windowSizeHeightRaw; endPixel = masterStartPixelDown[i] + windowSizeHeightRaw;
if(endPixel >= masterImageHeight) if(endPixel >= masterImageHeight)
{ {
fprintf(stderr, "Master Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel); fprintf(stderr, "Master Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
endPixel = masterStartPixelAcross[i] + windowSizeWidthRaw; endPixel = masterStartPixelAcross[i] + windowSizeWidthRaw;
if(endPixel >= masterImageWidth) if(endPixel >= masterImageWidth)
{ {
fprintf(stderr, "Master Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel); fprintf(stderr, "Master Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
//slave //slave
if(slaveStartPixelDown[i] <0) if(slaveStartPixelDown[i] <0)
{ {
fprintf(stderr, "Slave Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, slaveStartPixelDown[i]); fprintf(stderr, "Slave Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, slaveStartPixelDown[i]);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if(slaveStartPixelAcross[i] <0) if(slaveStartPixelAcross[i] <0)
{ {
fprintf(stderr, "Slave Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, slaveStartPixelAcross[i]); fprintf(stderr, "Slave Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, slaveStartPixelAcross[i]);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
endPixel = slaveStartPixelDown[i] + searchWindowSizeHeightRaw; endPixel = slaveStartPixelDown[i] + searchWindowSizeHeightRaw;
if(endPixel >= slaveImageHeight) if(endPixel >= slaveImageHeight)
{ {
fprintf(stderr, "Slave Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel); fprintf(stderr, "Slave Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
endPixel = slaveStartPixelAcross[i] + searchWindowSizeWidthRaw; endPixel = slaveStartPixelAcross[i] + searchWindowSizeWidthRaw;
if(endPixel >= slaveImageWidth) if(endPixel >= slaveImageWidth)
{ {
fprintf(stderr, "Slave Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel); fprintf(stderr, "Slave Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
} }
} }
} }
cuAmpcorParameter::~cuAmpcorParameter() cuAmpcorParameter::~cuAmpcorParameter()
{ {
deallocateArrays(); deallocateArrays();
} }

View File

@ -1,7 +1,7 @@
/** /**
* cuAmpcorParameter.h * cuAmpcorParameter.h
* Header file for Ampcor Parameter Class * Header file for Ampcor Parameter Class
* *
* Author: Lijun Zhu @ Seismo Lab, Caltech * Author: Lijun Zhu @ Seismo Lab, Caltech
* March 2017 * March 2017
*/ */
@ -12,13 +12,13 @@
#include <string> #include <string>
/// Class container for all parameters /// Class container for all parameters
/// ///
/// @note /// @note
/// The dimension/direction names used are: /// The dimension/direction names used are:
/// The inner-most dimension: x, row, height, down, azimuth, along the track. /// The inner-most dimension: x, row, height, down, azimuth, along the track.
/// The outer-most dimension: y, column, width, across, range, along the sight. /// The outer-most dimension: y, column, width, across, range, along the sight.
/// C/C++/Python use row-major indexing: a[i][j] -> a[i*WIDTH+j] /// C/C++/Python use row-major indexing: a[i][j] -> a[i*WIDTH+j]
/// FORTRAN/BLAS/CUBLAS use column-major indexing: a[i][j]->a[i+j*LENGTH] /// FORTRAN/BLAS/CUBLAS use column-major indexing: a[i][j]->a[i+j*LENGTH]
/// @note /// @note
/// Common procedures to use cuAmpcorParameter /// Common procedures to use cuAmpcorParameter
@ -27,72 +27,74 @@
/// 3. Call setupParameters() to determine related parameters and allocate starting pixels for each window: param->setupParameters() /// 3. Call setupParameters() to determine related parameters and allocate starting pixels for each window: param->setupParameters()
/// 4. Provide/set Master window starting pixel(s), and gross offset(s): param->setStartPixels(masterStartDown, masterStartAcross, grossOffsetDown, grossOffsetAcross) /// 4. Provide/set Master window starting pixel(s), and gross offset(s): param->setStartPixels(masterStartDown, masterStartAcross, grossOffsetDown, grossOffsetAcross)
/// 4a. Optionally, check the range of windows is within the SLC image range: param->checkPixelInImageRange() /// 4a. Optionally, check the range of windows is within the SLC image range: param->checkPixelInImageRange()
/// Steps 1, 3, 4 are mandatory. If step 2 is missing, default values will be used /// Steps 1, 3, 4 are mandatory. If step 2 is missing, default values will be used
class cuAmpcorParameter{ class cuAmpcorParameter{
public: public:
int algorithm; /// Cross-correlation algorithm: 0=freq domain (default) 1=time domain int algorithm; /// Cross-correlation algorithm: 0=freq domain (default) 1=time domain
int deviceID; /// Targeted GPU device ID: use -1 to auto select int deviceID; /// Targeted GPU device ID: use -1 to auto select
int nStreams; /// Number of streams to asynchonize data transfers and compute kernels int nStreams; /// Number of streams to asynchonize data transfers and compute kernels
int derampMethod; /// Method for deramping 0=None, 1=average, 2=phase gradient int derampMethod; /// Method for deramping 0=None, 1=average, 2=phase gradient
// chip or window size for raw data // chip or window size for raw data
int windowSizeHeightRaw; /// Template window height (original size) int windowSizeHeightRaw; /// Template window height (original size)
int windowSizeWidthRaw; /// Template window width (original size) int windowSizeWidthRaw; /// Template window width (original size)
int searchWindowSizeHeightRaw; /// Search window height (original size) int searchWindowSizeHeightRaw; /// Search window height (original size)
int searchWindowSizeWidthRaw; /// Search window width (orignal size) int searchWindowSizeWidthRaw; /// Search window width (orignal size)
int halfSearchRangeDownRaw; ///(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2 int halfSearchRangeDownRaw; ///(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
int halfSearchRangeAcrossRaw; ///(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2 int halfSearchRangeAcrossRaw; ///(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
// search range is (-halfSearchRangeRaw, halfSearchRangeRaw) // search range is (-halfSearchRangeRaw, halfSearchRangeRaw)
int searchWindowSizeHeightRawZoomIn; int searchWindowSizeHeightRawZoomIn;
int searchWindowSizeWidthRawZoomIn; int searchWindowSizeWidthRawZoomIn;
int corrRawZoomInHeight; // window to estimate snr
int corrRawZoomInWidth;
// chip or window size after oversampling // chip or window size after oversampling
int rawDataOversamplingFactor; /// Raw data overampling factor (from original size to oversampled size) int rawDataOversamplingFactor; /// Raw data overampling factor (from original size to oversampled size)
int windowSizeHeight; /// Template window length (oversampled size) int windowSizeHeight; /// Template window length (oversampled size)
int windowSizeWidth; /// Template window width (original size) int windowSizeWidth; /// Template window width (original size)
int searchWindowSizeHeight; /// Search window height (oversampled size) int searchWindowSizeHeight; /// Search window height (oversampled size)
int searchWindowSizeWidth; /// Search window width (oversampled size) int searchWindowSizeWidth; /// Search window width (oversampled size)
// strides between chips/windows // strides between chips/windows
int skipSampleDownRaw; /// Skip size between neighboring windows in Down direction (original size) int skipSampleDownRaw; /// Skip size between neighboring windows in Down direction (original size)
int skipSampleAcrossRaw; /// Skip size between neighboring windows in across direction (original size) int skipSampleAcrossRaw; /// Skip size between neighboring windows in across direction (original size)
//int skipSampleDown; /// Skip size between neighboring windows in Down direction (oversampled size) //int skipSampleDown; /// Skip size between neighboring windows in Down direction (oversampled size)
//int skipSampleAcross; /// Skip size between neighboring windows in Across direction (oversampled size) //int skipSampleAcross; /// Skip size between neighboring windows in Across direction (oversampled size)
// Zoom in region near location of max correlation // Zoom in region near location of max correlation
int zoomWindowSize; /// Zoom-in window size in correlation surface (same for down and across directions) int zoomWindowSize; /// Zoom-in window size in correlation surface (same for down and across directions)
int halfZoomWindowSizeRaw; /// = half of zoomWindowSize/rawDataOversamplingFactor int halfZoomWindowSizeRaw; /// = half of zoomWindowSize/rawDataOversamplingFactor
int oversamplingFactor; /// Oversampling factor for interpolating correlation surface int oversamplingFactor; /// Oversampling factor for interpolating correlation surface
int oversamplingMethod; /// 0 = fft (default) 1 = sinc int oversamplingMethod; /// 0 = fft (default) 1 = sinc
float thresholdSNR; /// Threshold of Signal noise ratio to remove noisy data float thresholdSNR; /// Threshold of Signal noise ratio to remove noisy data
//master image //master image
std::string masterImageName; /// master SLC image name std::string masterImageName; /// master SLC image name
int imageDataType1; /// master image data type, 2=cfloat=complex=float2 1=float int imageDataType1; /// master image data type, 2=cfloat=complex=float2 1=float
int masterImageHeight; /// master image height int masterImageHeight; /// master image height
int masterImageWidth; /// master image width int masterImageWidth; /// master image width
//slave image //slave image
std::string slaveImageName; /// slave SLC image name std::string slaveImageName; /// slave SLC image name
int imageDataType2; /// slave image data type, 2=cfloat=complex=float2 1=float int imageDataType2; /// slave image data type, 2=cfloat=complex=float2 1=float
int slaveImageHeight; /// slave image height int slaveImageHeight; /// slave image height
int slaveImageWidth; /// slave image width int slaveImageWidth; /// slave image width
// total number of chips/windows // total number of chips/windows
int numberWindowDown; /// number of total windows (down) int numberWindowDown; /// number of total windows (down)
int numberWindowAcross; /// number of total windows (across) int numberWindowAcross; /// number of total windows (across)
int numberWindows; /// numberWindowDown*numberWindowAcross int numberWindows; /// numberWindowDown*numberWindowAcross
// number of chips/windows in a batch/chunk // number of chips/windows in a batch/chunk
int numberWindowDownInChunk; /// number of windows processed in a chunk (down) int numberWindowDownInChunk; /// number of windows processed in a chunk (down)
int numberWindowAcrossInChunk; /// number of windows processed in a chunk (across) int numberWindowAcrossInChunk; /// number of windows processed in a chunk (across)
@ -100,20 +102,21 @@ public:
int numberChunkDown; /// number of chunks (down) int numberChunkDown; /// number of chunks (down)
int numberChunkAcross; /// number of chunks (across) int numberChunkAcross; /// number of chunks (across)
int numberChunks; int numberChunks;
int mmapSizeInGB; int useMmap; /// whether to use mmap 0=not 1=yes (default = 0)
int mmapSizeInGB; /// size for mmap buffer(useMmap=1) or a cpu memory buffer (useMmap=0)
int masterStartPixelDown0; int masterStartPixelDown0;
int masterStartPixelAcross0; int masterStartPixelAcross0;
int *masterStartPixelDown; /// master starting pixels for each window (down) int *masterStartPixelDown; /// master starting pixels for each window (down)
int *masterStartPixelAcross;/// master starting pixels for each window (across) int *masterStartPixelAcross;/// master starting pixels for each window (across)
int *slaveStartPixelDown; /// slave starting pixels for each window (down) int *slaveStartPixelDown; /// slave starting pixels for each window (down)
int *slaveStartPixelAcross; /// slave starting pixels for each window (across) int *slaveStartPixelAcross; /// slave starting pixels for each window (across)
int grossOffsetDown0; int grossOffsetDown0;
int grossOffsetAcross0; int grossOffsetAcross0;
int *grossOffsetDown; /// Gross offsets between master and slave windows (down) : slaveStartPixel - masterStartPixel int *grossOffsetDown; /// Gross offsets between master and slave windows (down) : slaveStartPixel - masterStartPixel
int *grossOffsetAcross; /// Gross offsets between master and slave windows (across) int *grossOffsetAcross; /// Gross offsets between master and slave windows (across)
int *masterChunkStartPixelDown; int *masterChunkStartPixelDown;
int *masterChunkStartPixelAcross; int *masterChunkStartPixelAcross;
int *slaveChunkStartPixelDown; int *slaveChunkStartPixelDown;
@ -124,18 +127,19 @@ public:
int *slaveChunkWidth; int *slaveChunkWidth;
int maxMasterChunkHeight, maxMasterChunkWidth; int maxMasterChunkHeight, maxMasterChunkWidth;
int maxSlaveChunkHeight, maxSlaveChunkWidth; int maxSlaveChunkHeight, maxSlaveChunkWidth;
std::string grossOffsetImageName; std::string grossOffsetImageName;
std::string offsetImageName; /// Output Offset fields filename std::string offsetImageName; /// Output Offset fields filename
std::string snrImageName; /// Output SNR filename std::string snrImageName; /// Output SNR filename
std::string covImageName;
cuAmpcorParameter(); /// Class constructor and default parameters setter cuAmpcorParameter(); /// Class constructor and default parameters setter
~cuAmpcorParameter(); /// Class descontructor ~cuAmpcorParameter(); /// Class descontructor
void allocateArrays(); /// Allocate various arrays after the number of Windows is given void allocateArrays(); /// Allocate various arrays after the number of Windows is given
void deallocateArrays(); /// Deallocate arrays on exit void deallocateArrays(); /// Deallocate arrays on exit
/// Three methods to set master/slave starting pixels and gross offsets from input master start pixel(s) and gross offset(s) /// Three methods to set master/slave starting pixels and gross offsets from input master start pixel(s) and gross offset(s)
/// 1 (int *, int *, int *, int *): varying master start pixels and gross offsets /// 1 (int *, int *, int *, int *): varying master start pixels and gross offsets
/// 2 (int, int, int *, int *): fixed master start pixel (first window) and varying gross offsets /// 2 (int, int, int *, int *): fixed master start pixel (first window) and varying gross offsets
@ -144,7 +148,7 @@ public:
void setStartPixels(int, int, int*, int*); void setStartPixels(int, int, int*, int*);
void setStartPixels(int, int, int, int); void setStartPixels(int, int, int, int);
void setChunkStartPixels(); void setChunkStartPixels();
void checkPixelInImageRange(); /// check whether void checkPixelInImageRange(); /// check whether
void setupParameters(); /// Process other parameters after Python Input void setupParameters(); /// Process other parameters after Python Input
}; };

View File

@ -1,10 +1,10 @@
/* /*
* cuAmpcorUtil.h * cuAmpcorUtil.h
* header file to include the various routines for ampcor * header file to include the various routines for ampcor
* serves as an index * serves as an index
*/ */
#ifndef __CUAMPCORUTIL_H #ifndef __CUAMPCORUTIL_H
#define __CUMAPCORUTIL_H #define __CUMAPCORUTIL_H
@ -18,20 +18,27 @@
//in cuArraysCopy.cu: various utitlies for copy images file in gpu memory //in cuArraysCopy.cu: various utitlies for copy images file in gpu memory
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2, int strideH, int strideW, cudaStream_t stream); void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2, int strideH, int strideW, cudaStream_t stream);
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2, void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream); const int *offsetH, const int* offsetW, cudaStream_t stream);
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2, void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream);
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream); const int *offsetH, const int* offsetW, cudaStream_t stream);
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2, int strideH, int strideW, cudaStream_t stream); void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2, int strideH, int strideW, cudaStream_t stream);
// same routine name overloaded for different data type
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offset, cudaStream_t stream); void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offset, cudaStream_t stream);
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream); void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream);
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream); void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream);
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int2 offset, cudaStream_t stream); void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int2 offset, cudaStream_t stream);
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream); void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream);
void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut, int2 offset, cudaStream_t stream);
void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, int offsetX, int offersetY, cudaStream_t stream); void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int offsetX, int offsetY, cudaStream_t stream); void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int offsetX, int offsetY, cudaStream_t stream);
void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
void cuArraysCopyInversePadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream); void cuArraysCopyInversePadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream);
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream); void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream);
@ -46,8 +53,8 @@ void cuDerampMethod2(cuArrays<float2> *images, cudaStream_t stream);
void cpuDerampMethod3(cuArrays<float2> *images, cudaStream_t stream); void cpuDerampMethod3(cuArrays<float2> *images, cudaStream_t stream);
//in cuArraysPadding.cu: various utilities for oversampling padding //in cuArraysPadding.cu: various utilities for oversampling padding
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream); void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream); void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream); void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream); void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream);
void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream); void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream);
@ -57,21 +64,21 @@ void cuArraysSubtractMean(cuArrays<float> *images, cudaStream_t stream);
void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream); void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
//in cuOffset.cu: utitilies for determining the max locaiton of cross correlations or the offset //in cuOffset.cu: utitilies for determining the max locaiton of cross correlations or the offset
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cudaStream_t stream); void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cudaStream_t stream);
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStream_t stream); void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStream_t stream);
void cuSubPixelOffset(cuArrays<int2> *offsetInit, cuArrays<int2> *offsetZoomIn, cuArrays<float2> *offsetFinal, void cuSubPixelOffset(cuArrays<int2> *offsetInit, cuArrays<int2> *offsetZoomIn, cuArrays<float2> *offsetFinal,
int OverSampleRatioZoomin, int OverSampleRatioRaw, int OverSampleRatioZoomin, int OverSampleRatioRaw,
int xHalfRangeInit, int yHalfRangeInit, int xHalfRangeZoomIn, int yHalfRangeZoomIn, int xHalfRangeInit, int yHalfRangeInit, int xHalfRangeZoomIn, int yHalfRangeZoomIn,
cudaStream_t stream); cudaStream_t stream);
void cuDetermineInterpZone(cuArrays<int2> *maxloc, cuArrays<int2> *zoomInOffset, cuArrays<float> *corrOrig, cuArrays<float> *corrZoomIn, cudaStream_t stream); void cuDetermineInterpZone(cuArrays<int2> *maxloc, cuArrays<int2> *zoomInOffset, cuArrays<float> *corrOrig, cuArrays<float> *corrZoomIn, cudaStream_t stream);
void cuDetermineSlaveExtractOffset(cuArrays<int2> *maxLoc, int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream); void cuDetermineSlaveExtractOffset(cuArrays<int2> *maxLoc, int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream);
//in cuCorrTimeDomain.cu: cross correlation in time domain //in cuCorrTimeDomain.cu: cross correlation in time domain
void cuCorrTimeDomain(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream); void cuCorrTimeDomain(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
//in cuCorrFrequency.cu: cross correlation in freq domain, also include fft correlatior class //in cuCorrFrequency.cu: cross correlation in freq domain, also include fft correlatior class
void cuArraysElementMultiply(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream); void cuArraysElementMultiply(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2> *image2, float coef, cudaStream_t stream); void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2> *image2, float coef, cudaStream_t stream);
@ -80,7 +87,11 @@ void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2>
void cuArraysCopyExtractCorr(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int> *imagesValid, cuArrays<int2> *maxloc, cudaStream_t stream); void cuArraysCopyExtractCorr(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int> *imagesValid, cuArrays<int2> *maxloc, cudaStream_t stream);
// implemented in cuCorrNormalization.cu // implemented in cuCorrNormalization.cu
void cuArraysSumCorr(cuArrays<float> *images, cuArrays<int> *imagesValid, cuArrays<float> *imagesSum, cuArrays<int> *imagesValidCount, cudaStream_t stream); void cuArraysSumCorr(cuArrays<float> *images, cuArrays<int> *imagesValid, cuArrays<float> *imagesSum, cuArrays<int> *imagesValidCount, cudaStream_t stream);
// implemented in cuEstimateStats.cu // implemented in cuEstimateStats.cu
void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuArrays<float> *maxval, cuArrays<float> *snrValue, cudaStream_t stream); void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuArrays<float> *maxval, cuArrays<float> *snrValue, cudaStream_t stream);
#endif // implemented in cuEstimateStats.cu
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream);
#endif

View File

@ -154,8 +154,21 @@
file.write((char *)data, size*count*sizeof(float2)); file.write((char *)data, size*count*sizeof(float2));
file.close(); file.close();
} }
template<>
void cuArrays<float3>::outputToFile(std::string fn, cudaStream_t stream)
{
float *data;
data = (float *)malloc(size*count*sizeof(float3));
checkCudaErrors(cudaMemcpyAsync(data, devData, size*count*sizeof(float3), cudaMemcpyDeviceToHost, stream));
std::ofstream file;
file.open(fn.c_str(), std::ios_base::binary);
file.write((char *)data, size*count*sizeof(float3));
file.close();
}
template class cuArrays<float>; template class cuArrays<float>;
template class cuArrays<float2>; template class cuArrays<float2>;
template class cuArrays<float3>;
template class cuArrays<int2>; template class cuArrays<int2>;
template class cuArrays<int>; template class cuArrays<int>;

View File

@ -4,7 +4,7 @@
* Lijun Zhu @ Seismo Lab, Caltech * Lijun Zhu @ Seismo Lab, Caltech
* v1.0 Jan 2017 * v1.0 Jan 2017
*/ */
#include "cuArrays.h" #include "cuArrays.h"
#include "cudaUtil.h" #include "cudaUtil.h"
#include "cudaError.h" #include "cudaError.h"
@ -16,14 +16,14 @@ inline __device__ float cuAbs(float2 a)
return sqrtf(a.x*a.x+a.y*a.y); return sqrtf(a.x*a.x+a.y*a.y);
}*/ }*/
//copy a chunk into a series of chips // copy a chunk into a batch of chips for a given stride
__global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX, const int inNY, __global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX, const int inNY,
float2 *imageOut, const int outNX, const int outNY, float2 *imageOut, const int outNX, const int outNY,
const int nImagesX, const int nImagesY, const int nImagesX, const int nImagesY,
const int strideX, const int strideY) const int strideX, const int strideY)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(idxImage >=nImagesX*nImagesY|| outx >= outNX || outy >= outNY) return; if(idxImage >=nImagesX*nImagesY|| outx >= outNX || outy >= outNY) return;
int idxOut = idxImage*outNX*outNY + outx*outNY + outy; int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
@ -33,8 +33,7 @@ __global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX
imageOut[idxOut] = imageIn[idxIn]; imageOut[idxOut] = imageIn[idxIn];
} }
//tested void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
int strideH, int strideW, cudaStream_t stream) int strideH, int strideW, cudaStream_t stream)
{ {
const int nthreads = NTHREADS2D; const int nthreads = NTHREADS2D;
@ -48,12 +47,14 @@ void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
getLastCudaError("cuArraysCopyToBatch_kernel"); getLastCudaError("cuArraysCopyToBatch_kernel");
} }
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages, // copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to complex
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int *offsetX, const int *offsetY) const int *offsetX, const int *offsetY)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(idxImage>=nImages || outx >= outNX || outy >= outNY) return; if(idxImage>=nImages || outx >= outNX || outy >= outNY) return;
int idxOut = idxImage*outNX*outNY + outx*outNY + outy; int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
@ -61,11 +62,8 @@ __global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, cons
imageOut[idxOut] = imageIn[idxIn]; imageOut[idxOut] = imageIn[idxIn];
} }
/// @param[in] image1 input image in a large chunk // lda1 (inNY) is the leading dimension of image1, usually, its width
/// @param[in] lda1 width of image 1 void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
/// @param[out] image2 output image with a batch of small windows
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream) const int *offsetH, const int* offsetW, cudaStream_t stream)
{ {
const int nthreads = 16; const int nthreads = 16;
@ -79,12 +77,13 @@ void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuA
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel"); getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
} }
__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY, // copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to real(take amplitudes)
float2 *imageOut, const int outNX, const int outNY, const int nImages, __global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int *offsetX, const int *offsetY) const int *offsetX, const int *offsetY)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(idxImage>=nImages || outx >= outNX || outy >= outNY) return; if(idxImage>=nImages || outx >= outNX || outy >= outNY) return;
int idxOut = idxImage*outNX*outNY + outx*outNY + outy; int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
@ -92,7 +91,7 @@ __global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, c
imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0); imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0);
} }
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2, void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream) const int *offsetH, const int* offsetW, cudaStream_t stream)
{ {
const int nthreads = 16; const int nthreads = 16;
@ -106,14 +105,42 @@ void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1,
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel"); getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
} }
// copy a chunk into a batch of chips for a set of offsets (varying strides), from real to complex(to real part)
__global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int *offsetX, const int *offsetY)
{
int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(idxImage>=nImages || outx >= outNX || outy >= outNY) return;
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
int idxIn = (offsetX[idxImage]+outx)*inNY + offsetY[idxImage] + outy;
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
}
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream)
{
const int nthreads = 16;
dim3 blockSize(nthreads, nthreads, 1);
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
//fprintf(stderr, "copy tile to batch, %d %d\n", lda1, image2->count);
cuArraysCopyToBatchWithOffsetR2C_kernel<<<gridSize,blockSize, 0 , stream>>> (
image1->devData, lda1,
image2->devData, image2->height, image2->width, image2->count,
offsetH, offsetW);
getLastCudaError("cuArraysCopyToBatchWithOffsetR2C_kernel");
}
//copy a chunk into a series of chips //copy a chunk into a series of chips
__global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, const int inNY, __global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, float *imageOut, const int outNX, const int outNY,
const int nImagesX, const int nImagesY, const int nImagesX, const int nImagesY,
const int strideX, const int strideY, const float factor) const int strideX, const int strideY, const float factor)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(idxImage >=nImagesX*nImagesY|| outx >= outNX || outy >= outNY) return; if(idxImage >=nImagesX*nImagesY|| outx >= outNX || outy >= outNY) return;
int idxOut = idxImage*outNX*outNY + outx*outNY + outy; int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
@ -121,17 +148,17 @@ __global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, co
int idxImageY = idxImage%nImagesY; int idxImageY = idxImage%nImagesY;
int idxIn = (idxImageX*strideX+outx)*inNY + idxImageY*strideY+outy; int idxIn = (idxImageX*strideX+outx)*inNY + idxImageY*strideY+outy;
imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor; imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor;
//printf( "%d\n", idxOut); //printf( "%d\n", idxOut);
} }
//tested //tested
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2, void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
int strideH, int strideW, cudaStream_t stream) int strideH, int strideW, cudaStream_t stream)
{ {
const int nthreads = 16; const int nthreads = 16;
dim3 blockSize(nthreads, nthreads, 1); dim3 blockSize(nthreads, nthreads, 1);
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count); dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
float factor = 1.0f/image1->size; //the FFT factor float factor = 1.0f/image1->size; //the FFT factor
cuArraysCopyC2R_kernel<<<gridSize,blockSize, 0 , stream>>> ( cuArraysCopyC2R_kernel<<<gridSize,blockSize, 0 , stream>>> (
image1->devData, image1->height, image1->width, image1->devData, image1->height, image1->width,
image2->devData, image2->height, image2->width, image2->devData, image2->height, image2->width,
@ -141,22 +168,22 @@ void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
} }
__global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int inNX, const int inNY, __global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, const int nImages, float *imageOut, const int outNX, const int outNY, const int nImages,
const int2 *offsets) const int2 *offsets)
{ {
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY) if(outx < outNX && outy < outNY)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y; int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y;
imageOut[idxOut] = imageIn[idxIn]; imageOut[idxOut] = imageIn[idxIn];
} }
} }
/* copy a tile of images to another image, with starting pixels offsets /* copy a tile of images to another image, with starting pixels offsets
* param[in] imageIn inut images * param[in] imageIn inut images
* param[out] imageOut output images of dimension nImages*outNX*outNY * param[out] imageOut output images of dimension nImages*outNX*outNY
*/ */
@ -166,24 +193,24 @@ void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
const int nthreads = 16; const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads,1); dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count); dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
cuArraysCopyExtractVaryingOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width, cuArraysCopyExtractVaryingOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offsets->devData); imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offsets->devData);
getLastCudaError("cuArraysCopyExtract error"); getLastCudaError("cuArraysCopyExtract error");
} }
__global__ void cuArraysCopyExtractVaryingOffset_C2C(const float2 *imageIn, const int inNX, const int inNY, __global__ void cuArraysCopyExtractVaryingOffset_C2C(const float2 *imageIn, const int inNX, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages, float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int2 *offsets) const int2 *offsets)
{ {
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY) if(outx < outNX && outy < outNY)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y; int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y;
imageOut[idxOut] = imageIn[idxIn]; imageOut[idxOut] = imageIn[idxIn];
} }
} }
@ -194,7 +221,7 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
const int nthreads = 16; const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads,1); dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count); dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
cuArraysCopyExtractVaryingOffset_C2C<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width, cuArraysCopyExtractVaryingOffset_C2C<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offsets->devData); imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offsets->devData);
getLastCudaError("cuArraysCopyExtractC2C error"); getLastCudaError("cuArraysCopyExtractC2C error");
@ -202,26 +229,29 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
// correlation surface extraction (Minyan Zhong) // correlation surface extraction (Minyan Zhong)
__global__ void cuArraysCopyExtractVaryingOffsetCorr(const float *imageIn, const int inNX, const int inNY, __global__ void cuArraysCopyExtractVaryingOffsetCorr(const float *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, int *imageValid, const int nImages, float *imageOut, const int outNX, const int outNY, int *imageValid, const int nImages,
const int2 *maxloc) const int2 *maxloc)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x; // One thread per out point. Find the coordinates within the current image.
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
// Find the correponding input.
int inx = outx + maxloc[idxImage].x - outNX/2; int inx = outx + maxloc[idxImage].x - outNX/2;
int iny = outy + maxloc[idxImage].y - outNY/2; int iny = outy + maxloc[idxImage].y - outNY/2;
if (outx < outNX && outy < outNY) if (outx < outNX && outy < outNY)
{ {
// Find the location in full array.
int idxOut = ( blockIdx.z * outNX + outx ) * outNY + outy; int idxOut = ( blockIdx.z * outNX + outx ) * outNY + outy;
int idxIn = ( blockIdx.z * inNX + inx ) * inNY + iny; int idxIn = ( blockIdx.z * inNX + inx ) * inNY + iny;
if (inx>=0 && iny>=0 && inx<inNX && iny<inNY) { if (inx>=0 && iny>=0 && inx<inNX && iny<inNY) {
imageOut[idxOut] = imageIn[idxIn]; imageOut[idxOut] = imageIn[idxIn];
imageValid[idxOut] = 1; imageValid[idxOut] = 1;
} }
@ -255,21 +285,21 @@ void cuArraysCopyExtractCorr(cuArrays<float> *imagesIn, cuArrays<float> *imagesO
__global__ void cuArraysCopyExtractFixedOffset(const float *imageIn, const int inNX, const int inNY, __global__ void cuArraysCopyExtractFixedOffset(const float *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, const int nImages, float *imageOut, const int outNX, const int outNY, const int nImages,
const int offsetX, const int offsetY) const int offsetX, const int offsetY)
{ {
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY) if(outx < outNX && outy < outNY)
{ {
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY; int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
imageOut[idxOut] = imageIn[idxIn]; imageOut[idxOut] = imageIn[idxIn];
} }
} }
/* copy a tile of images to another image, with starting pixels offsets /* copy a tile of images to another image, with starting pixels offsets
* param[in] imageIn inut images * param[in] imageIn inut images
* param[out] imageOut output images of dimension nImages*outNX*outNY * param[out] imageOut output images of dimension nImages*outNX*outNY
*/ */
@ -279,23 +309,24 @@ void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
const int nthreads = 16; const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads,1); dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count); dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
cuArraysCopyExtractFixedOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width, cuArraysCopyExtractFixedOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y); imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
getLastCudaError("cuArraysCopyExtract error"); getLastCudaError("cuArraysCopyExtract error");
} }
//
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY, __global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages, float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int offsetX, const int offsetY) const int offsetX, const int offsetY)
{ {
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY) if(outx < outNX && outy < outNY)
{ {
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY; int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
imageOut[idxOut] = imageIn[idxIn]; imageOut[idxOut] = imageIn[idxIn];
} }
} }
@ -311,27 +342,64 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
//imagesIn->debuginfo(stream); //imagesIn->debuginfo(stream);
//imagesOut->debuginfo(stream); //imagesOut->debuginfo(stream);
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>> cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
(imagesIn->devData, imagesIn->height, imagesIn->width, (imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y); imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
getLastCudaError("cuArraysCopyExtractC2C error"); getLastCudaError("cuArraysCopyExtractC2C error");
} }
//
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY, // float3
float *imageOut, const int outNX, const int outNY, const int nImages, __global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const int inNX, const int inNY,
float3 *imageOut, const int outNX, const int outNY, const int nImages,
const int offsetX, const int offsetY) const int offsetX, const int offsetY)
{ {
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY) if(outx < outNX && outy < outNY)
{ {
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy; int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY; int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
imageOut[idxOut] = imageIn[idxIn];
}
}
void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut, int2 offset, cudaStream_t stream)
{
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
const int nthreads = NTHREADS2D;
dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
//std::cout << "debug copyExtract" << imagesOut->width << imagesOut->height << "\n";
//imagesIn->debuginfo(stream);
//imagesOut->debuginfo(stream);
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
getLastCudaError("cuArraysCopyExtractFloat3 error");
}
//
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, const int nImages,
const int offsetX, const int offsetY)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
imageOut[idxOut] = imageIn[idxIn].x; imageOut[idxOut] = imageIn[idxIn].x;
} }
} }
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream) void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream)
{ {
//assert(imagesIn->height >= imagesOut && inNY >= outNY); //assert(imagesIn->height >= imagesOut && inNY >= outNY);
@ -339,16 +407,16 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut,
dim3 threadsperblock(nthreads, nthreads,1); dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count); dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
cuArraysCopyExtract_C2R_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>> cuArraysCopyExtract_C2R_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
(imagesIn->devData, imagesIn->height, imagesIn->width, (imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y); imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
getLastCudaError("cuArraysCopyExtractC2C error"); getLastCudaError("cuArraysCopyExtractC2C error");
} }
//
__global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY, __global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY,
float2* imageOut, const int outNY, const int offsetX, const int offsetY) float2* imageOut, const int outNY, const int offsetX, const int offsetY)
{ {
int inx = threadIdx.x + blockDim.x*blockIdx.x; int inx = threadIdx.x + blockDim.x*blockIdx.x;
int iny = threadIdx.y + blockDim.y*blockIdx.y; int iny = threadIdx.y + blockDim.y*blockIdx.y;
if(inx < inNX && iny < inNY) { if(inx < inNX && iny < inNY) {
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY); int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
@ -363,16 +431,40 @@ void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, i
const int nthreads = 16; const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads); dim3 threadsperblock(nthreads, nthreads);
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads)); dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
imageOut->devData, imageOut->width, offsetX, offsetY);
getLastCudaError("cuArraysCopyInsert error");
}
//
// float3
__global__ void cuArraysCopyInsert_kernel(const float3* imageIn, const int inNX, const int inNY,
float3* imageOut, const int outNY, const int offsetX, const int offsetY)
{
int inx = threadIdx.x + blockDim.x*blockIdx.x;
int iny = threadIdx.y + blockDim.y*blockIdx.y;
if(inx < inNX && iny < inNY) {
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
int idxIn = IDX2R(inx, iny, inNY);
imageOut[idxOut] = make_float3(imageIn[idxIn].x, imageIn[idxIn].y, imageIn[idxIn].z);
}
}
void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
{
const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads);
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
imageOut->devData, imageOut->width, offsetX, offsetY); imageOut->devData, imageOut->width, offsetX, offsetY);
getLastCudaError("cuArraysCopyInsert error"); getLastCudaError("cuArraysCopyInsert error");
} }
//
__global__ void cuArraysCopyInsert_kernel(const float* imageIn, const int inNX, const int inNY, __global__ void cuArraysCopyInsert_kernel(const float* imageIn, const int inNX, const int inNY,
float* imageOut, const int outNY, const int offsetX, const int offsetY) float* imageOut, const int outNY, const int offsetX, const int offsetY)
{ {
int inx = threadIdx.x + blockDim.x*blockIdx.x; int inx = threadIdx.x + blockDim.x*blockIdx.x;
int iny = threadIdx.y + blockDim.y*blockIdx.y; int iny = threadIdx.y + blockDim.y*blockIdx.y;
if(inx < inNX && iny < inNY) { if(inx < inNX && iny < inNY) {
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY); int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
@ -387,18 +479,44 @@ void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int
const int nthreads = 16; const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads); dim3 threadsperblock(nthreads, nthreads);
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads)); dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
imageOut->devData, imageOut->width, offsetX, offsetY); imageOut->devData, imageOut->width, offsetX, offsetY);
getLastCudaError("cuArraysCopyInsert Float error"); getLastCudaError("cuArraysCopyInsert Float error");
} }
//
__global__ void cuArraysCopyInsert_kernel(const int* imageIn, const int inNX, const int inNY,
int* imageOut, const int outNY, const int offsetX, const int offsetY)
{
int inx = threadIdx.x + blockDim.x*blockIdx.x;
int iny = threadIdx.y + blockDim.y*blockIdx.y;
if(inx < inNX && iny < inNY) {
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
int idxIn = IDX2R(inx, iny, inNY);
imageOut[idxOut] = imageIn[idxIn];
}
}
void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
{
const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads);
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
imageOut->devData, imageOut->width, offsetX, offsetY);
getLastCudaError("cuArraysCopyInsert Integer error");
}
//
__global__ void cuArraysCopyInversePadded_kernel(float *imageIn, int inNX, int inNY, int sizeIn, __global__ void cuArraysCopyInversePadded_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
float *imageOut, int outNX, int outNY, int sizeOut, int nImages) float *imageOut, int outNX, int outNY, int sizeOut, int nImages)
{ {
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY) if(outx < outNX && outy < outNY)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
@ -409,27 +527,27 @@ __global__ void cuArraysCopyInversePadded_kernel(float *imageIn, int inNX, int i
} }
else else
{ imageOut[idxOut] = 0.0f; } { imageOut[idxOut] = 0.0f; }
} }
} }
void cuArraysCopyInversePadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream) void cuArraysCopyInversePadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream)
{ {
const int nthreads = 16; const int nthreads = 16;
int nImages = imageIn->count; int nImages = imageIn->count;
dim3 blockSize(nthreads, nthreads,1); dim3 blockSize(nthreads, nthreads,1);
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages); dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
cuArraysCopyInversePadded_kernel<<<gridSize, blockSize, 0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size, cuArraysCopyInversePadded_kernel<<<gridSize, blockSize, 0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages); imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
getLastCudaError("cuArraysCopyInversePadded error"); getLastCudaError("cuArraysCopyInversePadded error");
} }
__global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY, int sizeIn, __global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
float *imageOut, int outNX, int outNY, int sizeOut, int nImages) float *imageOut, int outNX, int outNY, int sizeOut, int nImages)
{ {
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY) if(outx < outNX && outy < outNY)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
@ -440,26 +558,26 @@ __global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY
} }
else else
{ imageOut[idxOut] = 0.0f; } { imageOut[idxOut] = 0.0f; }
} }
} }
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream) void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream)
{ {
const int nthreads = 16; const int nthreads = 16;
int nImages = imageIn->count; int nImages = imageIn->count;
dim3 blockSize(nthreads, nthreads,1); dim3 blockSize(nthreads, nthreads,1);
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages); dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
cuArraysCopyPadded_R2R_kernel<<<gridSize, blockSize, 0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size, cuArraysCopyPadded_R2R_kernel<<<gridSize, blockSize, 0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages); imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
getLastCudaError("cuArraysCopyPaddedR2R error"); getLastCudaError("cuArraysCopyPaddedR2R error");
} }
__global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inNY, int sizeIn, __global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inNY, int sizeIn,
float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages) float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages)
{ {
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY) if(outx < outNX && outy < outNY)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
@ -468,31 +586,31 @@ __global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inN
int idxIn = IDX2R(outx, outy, inNY)+idxImage*sizeIn; int idxIn = IDX2R(outx, outy, inNY)+idxImage*sizeIn;
imageOut[idxOut] = imageIn[idxIn]; imageOut[idxOut] = imageIn[idxIn];
} }
else{ else{
imageOut[idxOut] = make_float2(0.0f, 0.0f); imageOut[idxOut] = make_float2(0.0f, 0.0f);
} }
} }
} }
void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream) void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
{ {
const int nthreads = NTHREADS2D; const int nthreads = NTHREADS2D;
int nImages = imageIn->count; int nImages = imageIn->count;
dim3 blockSize(nthreads, nthreads,1); dim3 blockSize(nthreads, nthreads,1);
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages); dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
cuArraysCopyPadded_C2C_kernel<<<gridSize, blockSize, 0, stream>>> cuArraysCopyPadded_C2C_kernel<<<gridSize, blockSize, 0, stream>>>
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size, (imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages); imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
getLastCudaError("cuArraysCopyInversePadded error"); getLastCudaError("cuArraysCopyInversePadded error");
} }
__global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY, int sizeIn, __global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages) float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages)
{ {
int outx = threadIdx.x + blockDim.x*blockIdx.x; int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y; int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY) if(outx < outNX && outy < outNY)
{ {
int idxImage = blockIdx.z; int idxImage = blockIdx.z;
@ -501,42 +619,42 @@ __global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY
int idxIn = IDX2R(outx, outy, inNY)+idxImage*sizeIn; int idxIn = IDX2R(outx, outy, inNY)+idxImage*sizeIn;
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f); imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
} }
else{ else{
imageOut[idxOut] = make_float2(0.0f, 0.0f); imageOut[idxOut] = make_float2(0.0f, 0.0f);
} }
} }
} }
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream) void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
{ {
const int nthreads = NTHREADS2D; const int nthreads = NTHREADS2D;
int nImages = imageIn->count; int nImages = imageIn->count;
dim3 blockSize(nthreads, nthreads,1); dim3 blockSize(nthreads, nthreads,1);
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages); dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
cuArraysCopyPadded_R2C_kernel<<<gridSize, blockSize, 0, stream>>> cuArraysCopyPadded_R2C_kernel<<<gridSize, blockSize, 0, stream>>>
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size, (imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages); imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
getLastCudaError("cuArraysCopyPadded error"); getLastCudaError("cuArraysCopyPadded error");
} }
__global__ void cuArraysSetConstant_kernel(float *image, int size, float value) __global__ void cuArraysSetConstant_kernel(float *image, int size, float value)
{ {
int idx = threadIdx.x + blockDim.x*blockIdx.x; int idx = threadIdx.x + blockDim.x*blockIdx.x;
if(idx < size) if(idx < size)
{ {
image[idx] = value; image[idx] = value;
} }
} }
void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t stream) void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t stream)
{ {
const int nthreads = 256; const int nthreads = 256;
int size = imageIn->getSize(); int size = imageIn->getSize();
cuArraysSetConstant_kernel<<<IDIVUP(size, nthreads), nthreads, 0, stream>>> cuArraysSetConstant_kernel<<<IDIVUP(size, nthreads), nthreads, 0, stream>>>
(imageIn->devData, imageIn->size, value); (imageIn->devData, imageIn->size, value);
getLastCudaError("cuArraysCopyPadded error"); getLastCudaError("cuArraysCopyPadded error");
} }

View File

@ -195,7 +195,6 @@ __device__ float2 partialSums(const float v, volatile float* shmem, const int st
return make_float2(Sum, Sum2); return make_float2(Sum, Sum2);
} }
__forceinline__ __device__ int __mul(const int a, const int b) { return a*b; }
template<const int Nthreads2> template<const int Nthreads2>
__global__ void cuCorrNormalize_kernel( __global__ void cuCorrNormalize_kernel(
@ -232,7 +231,7 @@ __global__ void cuCorrNormalize_kernel(
templateSum += templateD[i]; templateSum += templateD[i];
} }
templateSum = sumReduceBlock<Nthreads>(templateSum, shmem); templateSum = sumReduceBlock<Nthreads>(templateSum, shmem);
__syncthreads();
float templateSum2 = 0.0f; float templateSum2 = 0.0f;
for (int i = tid; i < templateSize; i += Nthreads) for (int i = tid; i < templateSize; i += Nthreads)
@ -241,11 +240,12 @@ __global__ void cuCorrNormalize_kernel(
templateSum2 += t*t; templateSum2 += t*t;
} }
templateSum2 = sumReduceBlock<Nthreads>(templateSum2, shmem); templateSum2 = sumReduceBlock<Nthreads>(templateSum2, shmem);
__syncthreads();
//if(tid ==0) printf("template sum %d %g %g \n", imageIdx, templateSum, templateSum2); //if(tid ==0) printf("template sum %d %g %g \n", imageIdx, templateSum, templateSum2);
/*********/ /*********/
shmem[tid] = shmem[tid + Nthreads] = 0.0f; shmem[tid] = shmem[tid + Nthreads] = shmem[tid + 2*Nthreads] = 0.0f;
__syncthreads(); __syncthreads();
float imageSum = 0.0f; float imageSum = 0.0f;
@ -281,7 +281,7 @@ __global__ void cuCorrNormalize_kernel(
if (tid < resultNY) if (tid < resultNY)
{ {
const int ix = iaddr/imageNY; const int ix = iaddr/imageNY;
const int addr = __mul(ix-templateNX, resultNY); const int addr = (ix-templateNX)*resultNY;
//printf("test norm %d %d %d %d %f\n", tid, ix, addr, addr+tid, resultD[addr + tid]); //printf("test norm %d %d %d %d %f\n", tid, ix, addr, addr+tid, resultD[addr + tid]);

View File

@ -25,7 +25,7 @@ __global__ void cudaKernel_estimateSnr(const float* corrSum, const int* corrVali
float mean = (corrSum[idx] - maxval[idx] * maxval[idx]) / (corrValidCount[idx] - 1); float mean = (corrSum[idx] - maxval[idx] * maxval[idx]) / (corrValidCount[idx] - 1);
snrValue[idx] = maxval[idx] / mean; snrValue[idx] = maxval[idx] * maxval[idx] / mean;
} }
void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuArrays<float> *maxval, cuArrays<float> *snrValue, cudaStream_t stream) void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuArrays<float> *maxval, cuArrays<float> *snrValue, cudaStream_t stream)
@ -55,7 +55,7 @@ void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuAr
//for (int i=0; i<size; i++){ //for (int i=0; i<size; i++){
// std::cout<<corrSum->hostData[i]<<std::endl; // std::cout<<corrSum->hostData[i]<<std::endl;
// std::cout<<corrValidCount->hostData[i]<<std::endl; // std::cout<<corrValidCount->hostData[i]<<std::endl;
@ -68,3 +68,80 @@ void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuAr
getLastCudaError("cuda kernel estimate stats error\n"); getLastCudaError("cuda kernel estimate stats error\n");
} }
template <const int BLOCKSIZE> // number of threads per block.
__global__ void cudaKernel_estimateVar(const float* corrBatchRaw, const int NX, const int NY, const int2* maxloc, const float* maxval, float3* covValue, const int size)
{
// Find image id.
int idxImage = threadIdx.x + blockDim.x*blockIdx.x;
if (idxImage >= size) return;
// Preparation.
int px = maxloc[idxImage].x;
int py = maxloc[idxImage].y;
float peak = maxval[idxImage];
// Check if maxval is on the margin.
if (px-1 < 0 || py-1 <0 || px + 1 >=NX || py+1 >=NY) {
covValue[idxImage] = make_float3(99.0, 99.0, 99.0);
}
else {
int offset = NX * NY * idxImage;
int idx00 = offset + (px - 1) * NY + py - 1;
int idx01 = offset + (px - 1) * NY + py ;
int idx02 = offset + (px - 1) * NY + py + 1;
int idx10 = offset + (px ) * NY + py - 1;
int idx11 = offset + (px ) * NY + py ;
int idx12 = offset + (px ) * NY + py + 1;
int idx20 = offset + (px + 1) * NY + py - 1;
int idx21 = offset + (px + 1) * NY + py ;
int idx22 = offset + (px + 1) * NY + py + 1;
float dxx = - ( corrBatchRaw[idx21] + corrBatchRaw[idx01] - 2*corrBatchRaw[idx11] ) * 0.5;
float dyy = - ( corrBatchRaw[idx12] + corrBatchRaw[idx10] - 2*corrBatchRaw[idx11] ) * 0.5;
float dxy = - ( corrBatchRaw[idx22] + corrBatchRaw[idx00] - corrBatchRaw[idx20] - corrBatchRaw[idx02] ) *0.25;
float n2 = fmaxf(1 - peak, 0.0);
int winSize = NX*NY;
dxx = dxx * winSize;
dyy = dyy * winSize;
dxy = dxy * winSize;
float n4 = n2*n2;
n2 = n2 * 2;
n4 = n4 * 0.5 * winSize;
float u = dxy * dxy - dxx * dyy;
float u2 = u*u;
if (fabsf(u) < 1e-2) {
covValue[idxImage] = make_float3(99.0, 99.0, 99.0);
}
else {
float cov_xx = (- n2 * u * dyy + n4 * ( dyy*dyy + dxy*dxy) ) / u2;
float cov_yy = (- n2 * u * dxx + n4 * ( dxx*dxx + dxy*dxy) ) / u2;
float cov_xy = ( n2 * u * dxy - n4 * ( dxx + dyy ) * dxy ) / u2;
covValue[idxImage] = make_float3(cov_xx, cov_yy, cov_xy);
}
}
}
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream)
{
int size = corrBatchRaw->count;
// One dimensional launching parameters to loop over every correlation surface.
cudaKernel_estimateVar<NTHREADS><<< IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>
(corrBatchRaw->devData, corrBatchRaw->height, corrBatchRaw->width, maxloc->devData, maxval->devData, covValue->devData, size);
getLastCudaError("cudaKernel_estimateVar error\n");
}

View File

@ -7,20 +7,21 @@
from distutils.core import setup from distutils.core import setup
from distutils.extension import Extension from distutils.extension import Extension
from Cython.Build import cythonize from Cython.Build import cythonize
import os
os.environ["CC"] = "g++" import numpy
setup( name = 'PyCuAmpcor', setup( name = 'PyCuAmpcor',
ext_modules = cythonize(Extension( ext_modules = cythonize(Extension(
"PyCuAmpcor", "PyCuAmpcor",
sources=['PyCuAmpcor.pyx'], sources=['PyCuAmpcor.pyx'],
include_dirs=['/usr/local/cuda/include'], # REPLACE WITH YOUR PATH TO YOUR CUDA LIBRARY HEADERS include_dirs=['/usr/local/cuda/include', numpy.get_include()], # REPLACE WITH YOUR PATH TO YOUR CUDA LIBRARY HEADERS
extra_compile_args=['-fPIC','-fpermissive'], extra_compile_args=['-fPIC','-fpermissive'],
extra_objects=['SlcImage.o','cuAmpcorChunk.o','cuAmpcorParameter.o','cuCorrFrequency.o', extra_objects=['GDALImage.o','cuAmpcorChunk.o','cuAmpcorParameter.o','cuCorrFrequency.o',
'cuCorrNormalization.o','cuCorrTimeDomain.o','cuArraysCopy.o', 'cuCorrNormalization.o','cuCorrTimeDomain.o','cuArraysCopy.o',
'cuArrays.o','cuArraysPadding.o','cuOffset.o','cuOverSampler.o', 'cuArrays.o','cuArraysPadding.o','cuOffset.o','cuOverSampler.o',
'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o'], 'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o','cuEstimateStats.o'],
extra_link_args=['-L/usr/local/cuda/lib64','-lcuda','-lcudart','-lcufft','-lcublas'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES extra_link_args=['-L/usr/local/cuda/lib64',
'-L/usr/lib64/nvidia',
'-lcuda','-lcudart','-lcufft','-lcublas','-lgdal'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES
language='c++' language='c++'
))) )))

View File

@ -78,3 +78,6 @@ SConscript(rfi)
SConscript('PyCuAmpcor/SConscript') SConscript('PyCuAmpcor/SConscript')
SConscript('splitSpectrum/SConscript') SConscript('splitSpectrum/SConscript')
SConscript('alos2proc/SConscript') SConscript('alos2proc/SConscript')
if os.path.exists('geo_autoRIFT'):
SConscript('geo_autoRIFT/SConscript')

View File

@ -139,7 +139,7 @@ def main(iargs=None):
direction = np.sign(np.dot( np.cross(targxyz-mxyz, sxyz-mxyz), mvel)) direction = np.sign(np.dot( np.cross(targxyz-mxyz, sxyz-mxyz), mvel))
Bperp[ii,jj] = direction*perp Bperp[ii,jj] = direction*perp
Bperp.tofile(fid) Bperp.tofile(fid)
fid.close() fid.close()
####Write XML ####Write XML

View File

@ -33,6 +33,8 @@ def cmdLineParse(iargs = None):
def run(imageSlc1, imageSlc2, resampName, azLooks, rgLooks): def run(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
objSlc1 = isceobj.createSlcImage() objSlc1 = isceobj.createSlcImage()
#right now imageSlc1 and 2 are just text files, need to open them as image
IU.copyAttributes(imageSlc1, objSlc1) IU.copyAttributes(imageSlc1, objSlc1)
objSlc1.setAccessMode('read') objSlc1.setAccessMode('read')
objSlc1.createImage() objSlc1.createImage()
@ -81,7 +83,6 @@ def run(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
def main(iargs=None): def main(iargs=None):
inps = cmdLineParse(iargs) inps = cmdLineParse(iargs)
img1 = isceobj.createImage() img1 = isceobj.createImage()
@ -96,9 +97,8 @@ def main(iargs=None):
run(img1, img2, inps.prefix, inps.azlooks, inps.rglooks) run(img1, img2, inps.prefix, inps.azlooks, inps.rglooks)
if __name__ == '__main__': if __name__ == '__main__':
main()
''' '''
Main driver. Main driver.
''' '''

View File

@ -52,7 +52,7 @@ def generate(env):
# default flags for the NVCC compiler # default flags for the NVCC compiler
env['STATICNVCCFLAGS'] = '' env['STATICNVCCFLAGS'] = ''
env['SHAREDNVCCFLAGS'] = '' env['SHAREDNVCCFLAGS'] = ''
env['ENABLESHAREDNVCCFLAG'] = '-arch=sm_35 -shared -Xcompiler -fPIC' env['ENABLESHAREDNVCCFLAG'] = '-shared -Xcompiler -fPIC'
# default NVCC commands # default NVCC commands
env['STATICNVCCCMD'] = '$NVCC -o $TARGET -c $NVCCFLAGS $STATICNVCCFLAGS $SOURCES' env['STATICNVCCCMD'] = '$NVCC -o $TARGET -c $NVCCFLAGS $STATICNVCCFLAGS $SOURCES'
@ -153,7 +153,7 @@ def generate(env):
#env.Append(LIBPATH=[cudaSDKPath + '/lib', cudaSDKPath + '/common/lib' + cudaSDKSubLibDir, cudaToolkitPath + '/lib']) #env.Append(LIBPATH=[cudaSDKPath + '/lib', cudaSDKPath + '/common/lib' + cudaSDKSubLibDir, cudaToolkitPath + '/lib'])
env.Append(CUDACPPPATH=[cudaToolkitPath + '/include']) env.Append(CUDACPPPATH=[cudaToolkitPath + '/include'])
env.Append(CUDALIBPATH=[cudaToolkitPath + '/lib', cudaToolkitPath + '/lib64']) env.Append(CUDALIBPATH=[cudaToolkitPath + '/lib', cudaToolkitPath + '/lib64', '/lib64'])
env.Append(CUDALIBS=['cudart']) env.Append(CUDALIBS=['cudart'])
def exists(env): def exists(env):

View File

@ -12,7 +12,7 @@
from __future__ import print_function from __future__ import print_function
import sys import sys
import os import os
import urllib2 import urllib
import getopt import getopt
import re import re
import shutil import shutil
@ -57,7 +57,7 @@ def print2log(msg, withtime=True, cmd=False):
if withtime: if withtime:
now = datetime.datetime.today() now = datetime.datetime.today()
msg = "%s >> %s" % (now.isoformat(), msg) msg = "%s >> %s" % (now.isoformat(), msg)
LOGFILE.write(msg + '\n') LOGFILE.write((msg + '\n').encode('utf-8'))
LOGFILE.flush() LOGFILE.flush()
os.fsync(LOGFILE) os.fsync(LOGFILE)
@ -157,9 +157,9 @@ def downloadfile(url, fname, repeat=1):
counter = 0 counter = 0
while counter < repeat: while counter < repeat:
try: try:
response = urllib2.urlopen(url) response = urllib.request.urlopen(url)
break break
except urllib2.URLError, e: except urllib.request.URLError as e:
counter += 1 counter += 1
if hasattr(e, 'reason'): if hasattr(e, 'reason'):
print2log("Failed to reach server. Reason: %s" % e.reason) print2log("Failed to reach server. Reason: %s" % e.reason)
@ -851,7 +851,7 @@ class ISCEDeps(object):
f = open(self.config, 'rb') f = open(self.config, 'rb')
lines = f.readlines() lines = f.readlines()
for line in lines: for line in lines:
m = re.match("([^#].*?)=([^#]+?)$", line.strip()) m = re.match("([^#].*?)=([^#]+?)$", line.strip().decode('utf-8'))
if m: if m:
var = m.group(1).strip() var = m.group(1).strip()
val = m.group(2).strip() val = m.group(2).strip()
@ -867,7 +867,7 @@ def readSetupConfig(setup_config):
f = open(setup_config, 'rb') f = open(setup_config, 'rb')
lines = f.readlines() lines = f.readlines()
for line in lines: for line in lines:
m = re.match("([^#].*?)=([^#]+?)$", line.strip()) m = re.match("([^#].*?)=([^#]+?)$", line.strip().decode('utf-8'))
if m: if m:
var = m.group(1).strip() var = m.group(1).strip()
val = m.group(2).strip().replace('"', '') val = m.group(2).strip().replace('"', '')
@ -885,7 +885,7 @@ def checkArgs(args):
""" """
try: try:
opts, args = getopt.getopt(args, "h", ["help", "prefix=", "ping=", "config=", "uname=", "download=", "unpack=", "install=", "gcc=", "gpp=", "verbose"]) opts, args = getopt.getopt(args, "h", ["help", "prefix=", "ping=", "config=", "uname=", "download=", "unpack=", "install=", "gcc=", "gpp=", "verbose"])
except getopt.GetoptError, err: except getopt.GetoptError as err:
print2log("ProgError: %s" % str(err)) print2log("ProgError: %s" % str(err))
usage() usage()
sys.exit(2) sys.exit(2)