Merge branch 'master' of https://github.com/isce-framework/isce2 into UAVSAR

LT1AB
Eric J. Fielding 2019-12-03 16:41:41 -08:00
commit 36e7012e66
49 changed files with 2239 additions and 742 deletions

View File

@ -23,7 +23,7 @@ jobs:
pwd
mkdir config build install
. /opt/conda/bin/activate root
conda install --yes cython gdal h5py libgdal pytest numpy fftw scipy basemap scons opencv hdf4 hdf5 netcdf4 libgcc libstdcxx-ng cmake
conda install --yes cython gdal h5py libgdal pytest numpy fftw scipy basemap scons opencv hdf4 hdf5 netcdf4 libgcc libstdcxx-ng cmake astropy
yum install -y uuid-devel x11-devel motif-devel jq gcc-gfortran
ln -s /opt/conda/bin/cython /opt/conda/bin/cython3
cd /opt/conda/lib

View File

@ -623,7 +623,23 @@ between three files as follows:
<property name="OUTPUT">20061231</property>
</component>
```
### rtcApp.xml
The inputs are Sentinel GRD zipfiles
```xml
<rtcApp>
<constant name="dir">/Users/data/sentinel1 </constant>
<component name="rtcApp">
<property name="posting">20</property>
<property name="sensor name">sentinel1</property>
<component name="master">
<property name="safe">$dir$/rtcApp/data/S1A_IW_GRDH_1SDV_20181221T225104_20181221T225129_025130_02C664_B46C.zip</property>
<property name="orbit directory">$dir$/orbits</property>
<property name="output directory">$dir$/rtcApp/output</property>
<property name="polarization">[VV, VH]</property>
</component>
</component>
</rtcApp>
```
-----
## Component Configurability

View File

@ -293,6 +293,7 @@ def main(args, files):
#######Determine number of input and output bands
bandList = []
iMath['equations'] = []
for ii,expr in enumerate(args.equation.split(';')):
#####Now parse the equation to get the file names used
@ -319,7 +320,11 @@ def main(args, files):
######Create input memmaps
for ii,infile in enumerate(fileList):
fstr, files = parseInputFile(infile, files)
if type(files) == list:
fstr, files = parseInputFile(infile, files)
else:
fstr = getattr(files, infile)
logger.debug('Input string for File %d: %s: %s'%(ii, infile, fstr))
if len(fstr.split(';')) > 1:
@ -341,8 +346,9 @@ def main(args, files):
if bbox is not None:
iMath['bboxes'].append(bbox)
if len(files):
raise IOError('Unused input variables set:\n'+ ' '.join(files))
if type(files) == list:
if len(files):
raise IOError('Unused input variables set:\n'+ ' '.join(files))
#######Some debugging
logger.debug('List of available bands: ' + str(iMath['inBands'].keys()))

View File

@ -155,7 +155,7 @@ NUMBER_RANGE_LOOKS = Application.Parameter('numberRangeLooks',
)
POSTING = Application.Parameter('posting',
public_name='azimuth looks',
public_name='posting',
default = 20.0,
type = float,
mandatory = False,
@ -363,6 +363,7 @@ class GRDSAR(Application):
self.verifyDEM = RtcProc.createVerifyDEM(self)
self.multilook = RtcProc.createLooks(self)
self.runTopo = RtcProc.createTopo(self)
self.runNormalize = RtcProc.createNormalize(self)
# self.runGeocode = RtcProc.createGeocode(self)
return None
@ -392,6 +393,9 @@ class GRDSAR(Application):
##Run topo for each bursts
self.step('topo', func=self.runTopo)
##Run normalize to get gamma0
self.step('normalize', func=self.runNormalize)
# Geocode
# self.step('geocode', func=self.runGeocode,
# args=(self.geocode_list, self.do_unwrap, self.geocode_bbox))
@ -416,6 +420,9 @@ class GRDSAR(Application):
##Run topo for each burst
self.runTopo()
##Run normalize to get gamma0
self.runNormalize()
###Compute covariance
# self.runEstimateCovariance()

View File

@ -242,14 +242,20 @@ FILTER_STRENGTH = Application.Parameter('filterStrength',
mandatory=False,
doc='')
DO_RUBBERSHEETING = Application.Parameter('doRubbersheeting',
public_name='do rubbersheeting',
############################################## Modified by V.Brancato 10.07.2019
DO_RUBBERSHEETINGAZIMUTH = Application.Parameter('doRubbersheetingAzimuth',
public_name='do rubbersheetingAzimuth',
default=False,
type=bool,
mandatory=False,
doc='')
DO_RUBBERSHEETINGRANGE = Application.Parameter('doRubbersheetingRange',
public_name='do rubbersheetingRange',
default=False,
type=bool,
mandatory=False,
doc='')
#################################################################################
RUBBERSHEET_SNR_THRESHOLD = Application.Parameter('rubberSheetSNRThreshold',
public_name='rubber sheet SNR Threshold',
default = 5.0,
@ -533,7 +539,8 @@ class _RoiBase(Application, FrameMixin):
GEOCODE_BOX,
REGION_OF_INTEREST,
HEIGHT_RANGE,
DO_RUBBERSHEETING,
DO_RUBBERSHEETINGRANGE, #Modified by V. Brancato 10.07.2019
DO_RUBBERSHEETINGAZIMUTH, #Modified by V. Brancato 10.07.2019
RUBBERSHEET_SNR_THRESHOLD,
RUBBERSHEET_FILTER_SIZE,
DO_DENSEOFFSETS,
@ -724,7 +731,8 @@ class _RoiBase(Application, FrameMixin):
self.runResampleSlc = StripmapProc.createResampleSlc(self)
self.runRefineSlaveTiming = StripmapProc.createRefineSlaveTiming(self)
self.runDenseOffsets = StripmapProc.createDenseOffsets(self)
self.runRubbersheet = StripmapProc.createRubbersheet(self)
self.runRubbersheetRange = StripmapProc.createRubbersheetRange(self) #Modified by V. Brancato 10.07.2019
self.runRubbersheetAzimuth =StripmapProc.createRubbersheetAzimuth(self) #Modified by V. Brancato 10.07.2019
self.runResampleSubbandSlc = StripmapProc.createResampleSubbandSlc(self)
self.runInterferogram = StripmapProc.createInterferogram(self)
self.runFilter = StripmapProc.createFilter(self)
@ -774,8 +782,11 @@ class _RoiBase(Application, FrameMixin):
args=('refined',))
self.step('dense_offsets', func=self.runDenseOffsets)
self.step('rubber_sheet', func=self.runRubbersheet)
######################################################################## Modified by V. Brancato 10.07.2019
self.step('rubber_sheet_range', func=self.runRubbersheetRange)
self.step('rubber_sheet_azimuth',func=self.runRubbersheetAzimuth)
#########################################################################
self.step('fine_resample', func=self.runResampleSlc,
args=('fine',))
@ -852,10 +863,14 @@ class _RoiBase(Application, FrameMixin):
# run dense offsets
self.runDenseOffsets()
# adding the azimuth offsets computed from cross correlation to geometry offsets
self.runRubbersheet()
############ Modified by V. Brancato 10.07.2019
# adding the azimuth offsets computed from cross correlation to geometry offsets
self.runRubbersheetAzimuth()
# adding the range offsets computed from cross correlation to geometry offsets
self.runRubbersheetRange()
####################################################################################
# resampling using rubbersheeted offsets
# which include geometry + constant range + constant azimuth
# + dense azimuth offsets

View File

@ -1061,7 +1061,7 @@ class Orbit(Component):
###This wont break the old interface but could cause
###issues at midnight crossing
if reference is None:
reference = self.minTime()
reference = self.minTime
refEpoch = reference.replace(hour=0, minute=0, second=0, microsecond=0)

View File

@ -46,5 +46,6 @@ createPreprocessor = _factory("runPreprocessor")
createVerifyDEM = _factory("runVerifyDEM")
createLooks = _factory("runLooks")
createTopo = _factory("runTopo")
createNormalize = _factory("runNormalize")
#createGeocode = _factory("runGeocode")

View File

@ -69,7 +69,7 @@ INC_FILENAME = Component.Parameter(
GAMMA0_FILENAME = Component.Parameter(
'gamma0FileName',
public_name='Gamma0 backscatter file',
default = 'gamma0.rdr',
default = 'gamma0.img',
type = str,
mandatory = False,
doc = 'Unmasked gamma0 backscatter file')

View File

@ -1,4 +1,4 @@
#
#!/usr/bin/env python3
# Author: Piyush Agram
# Copyright 2016
#
@ -6,19 +6,23 @@
import logging
import isceobj
import mroipac
from .runTopo import filenameWithLooks
from .runLooks import takeLooks
import os
import itertools
import numpy as np
from isceobj.Util.decorators import use_api
from applications import imageMath
logger = logging.getLogger('isce.grdsar.looks')
class Dummy:
pass
def runNormalize(self):
'''
Make sure that a DEM is available for processing the given data.
'''
refPol = self._grd.polarizations[0]
master = self._grd.loadProduct( os.path.join(self._grd.outputFolder, 'beta_{0}.xml'.format(refPol)))
@ -26,17 +30,31 @@ def runNormalize(self):
azlooks, rglooks = self._grd.getLooks( self.posting, master.groundRangePixelSize, master.azimuthPixelSize, self.numberAzimuthLooks, self.numberRangeLooks)
if (azlooks == 1) and (rglooks == 1):
return
slantRange = False
for pol in self._grd.polarizations:
inname = os.path.join( self._grd.outputFolder, 'beta_{0}.img'.format(pol) )
takeLooks(inname, azlooks, rglooks)
if (azlooks == 1) and (rglooks == 1):
inname = os.path.join( self._grd.outputFolder, 'beta_{0}.img'.format(pol))
else:
inname = os.path.join( self._grd.outputFolder, filenameWithLooks('beta_{0}.img'.format(pol), azlooks, rglooks))
if not slantRange:
inname = master.slantRangeImage.filename
takeLooks(inname, azlooks, rglooks)
slantRange = True
basefolder, output = os.path.split(self._grd.outputFolder)
incname = os.path.join(basefolder, self._grd.geometryFolder, self._grd.incFileName)
outname = os.path.join(self._grd.outputFolder, filenameWithLooks('gamma_{0}'.format(pol)+'.img', azlooks, rglooks))
maskname = os.path.join(basefolder, self._grd.geometryFolder, self._grd.slMaskFileName)
args = imageMath.createNamespace()
args.equation = 'a*cos(b_0*PI/180.)/cos(b_1*PI/180.) * (c==0)'
args.dtype = np.float32
args.scheme = 'BIL'
args.out = outname
#args.debug = True
files = Dummy()
files.a = inname
files.b = incname
files.c = maskname
imageMath.main(args, files)
return

View File

@ -261,10 +261,10 @@ class Sentinel1(Component):
self.validateUserInputs()
if self.xml.startswith('/vsizip'): #Read from zip file
if '.zip' in self.xml:
try:
parts = self.xml.split(os.path.sep)
zipname = os.path.join(*(parts[2:-3]))
zipname = os.path.join('/',*(parts[:-3]))
fname = os.path.join(*(parts[-3:]))
with zipfile.ZipFile(zipname, 'r') as zf:
@ -283,23 +283,22 @@ class Sentinel1(Component):
self.populateMetadata()
self.populateBbox()
####Tru and locate an orbit file
####Try and locate an orbit file
if self.orbitFile is None:
if self.orbitDir is not None:
self.orbitFile = self.findOrbitFile()
print('Found this orbitfile: %s' %self.orbitFile)
####Read in the orbits
if self.orbitFile:
if '_POEORB_' in self.orbitFile:
orb = self.extractPreciseOrbit()
else:
elif '_RESORB_' in self.orbitFile:
orb = self.extractOrbit()
self.product.orbit.setOrbitSource('Header')
for sv in orb:
self.product.orbit.addStateVector(sv)
self.populateIPFVersion()
self.extractBetaLUT()
self.extractNoiseLUT()
@ -423,10 +422,11 @@ class Sentinel1(Component):
nsp = "{http://www.esa.int/safe/sentinel-1.0}"
if self.manifest.startswith('/vsizip'):
if '.zip' in self.manifest:
import zipfile
parts = self.manifest.split(os.path.sep)
zipname = os.path.join(*(parts[2:-2]))
zipname = os.path.join('/',*(parts[:-2]))
fname = os.path.join(*(parts[-2:]))
try:
@ -462,38 +462,40 @@ class Sentinel1(Component):
datefmt = "%Y%m%dT%H%M%S"
types = ['POEORB', 'RESORB']
filelist = []
match = []
timeStamp = self.product.sensingMid
timeStamp = self.product.sensingStart+(self.product.sensingStop - self.product.sensingStart)/2.
for orbType in types:
files = glob.glob( os.path.join(self.orbitDir, 'S1A_OPER_AUX_' + orbType + '_OPOD*'))
filelist.extend(files)
###List all orbit files
for result in files:
fields = result.split('_')
taft = datetime.datetime.strptime(fields[-1][0:15], datefmt)
tbef = datetime.datetime.strptime(fields[-2][1:16], datefmt)
#####Get all files that span the acquisition
if (tbef <= timeStamp) and (taft >= timeStamp):
tmid = tbef + 0.5 * (taft - tbef)
match.append((result, abs((timeStamp-tmid).total_seconds())))
#####Return the file with the image is aligned best to the middle of the file
if len(match) != 0:
bestmatch = min(match, key = lambda x: x[1])
return bestmatch[0]
for result in filelist:
fields = result.split('_')
taft = datetime.datetime.strptime(fields[-1][0:15], datefmt)
tbef = datetime.datetime.strptime(fields[-2][1:16], datefmt)
print(taft, tbef)
#####Get all files that span the acquisition
if (tbef <= timeStamp) and (taft >= timeStamp):
tmid = tbef + 0.5 * (taft - tbef)
match.append((result, abs((timeStamp-tmid).total_seconds())))
#####Return the file with the image is aligned best to the middle of the file
if len(match) != 0:
bestmatch = min(match, key = lambda x: x[1])
return bestmatch[0]
if len(match) == 0:
raise Exception('No suitable orbit file found. If you want to process anyway - unset the orbitdir parameter')
if len(match) == 0:
raise Exception('No suitable orbit file found. If you want to process anyway - unset the orbitdir parameter')
def extractOrbit(self):
'''
Extract orbit information from xml node.
'''
node = self._xml_root.find('generalAnnotation/orbitList')
print('Extracting orbit from annotation XML file')
frameOrbit = Orbit()
frameOrbit.configure()
@ -516,13 +518,7 @@ class Sentinel1(Component):
vec.setVelocity(vel)
frameOrbit.addStateVector(vec)
orbExt = OrbitExtender(planet=Planet(pname='Earth'))
orbExt.configure()
newOrb = orbExt.extendOrbit(frameOrbit)
return newOrb
return frameOrbit
def extractPreciseOrbit(self):
'''
@ -534,11 +530,10 @@ class Sentinel1(Component):
print("IOError: %s" % strerr)
return
_xml_root = ElementTree(file=fp).getroot()
_xml_root = ElementTree.ElementTree(file=fp).getroot()
node = _xml_root.find('Data_Block/List_of_OSVs')
print('Extracting orbit from Orbit File: ', self.orbitFile)
orb = Orbit()
orb.configure()
@ -582,10 +577,10 @@ class Sentinel1(Component):
if self.calibrationXml is None:
raise Exception('No calibration file provided')
if self.calibrationXml.startswith('/vsizip'):
if '.zip' in self.calibrationXml:
import zipfile
parts = self.calibrationXml.split(os.path.sep)
zipname = os.path.join(*(parts[2:-4]))
zipname = os.path.join('/',*(parts[:-4]))
fname = os.path.join(*(parts[-4:]))
try:
@ -723,7 +718,7 @@ class Sentinel1(Component):
print('Extracting normalized image ....')
src = gdal.Open(self.tiff.strip(), gdal.GA_ReadOnly)
src = gdal.Open('/vsizip//'+self.tiff.strip(), gdal.GA_ReadOnly)
band = src.GetRasterBand(1)
if self.product.numberOfSamples != src.RasterXSize:

View File

@ -21,6 +21,7 @@ c get alos position and times
integer*1 indata(32768)
integer statb(13),stat
integer numdata,rowPos,colPos,eof
integer*4 unpackBytes
c read the leader file descriptor record
!!!!!!!!!!!!!!!!!!
@ -106,12 +107,9 @@ c read in the raw data file line by line
do i=1,nlines
! jng ierr=ioread(ichandata,indata,len)
call getLineSequential(rawAccessor,indata,eof)
iyear=iand(indata(40),255)*256*256*256+iand(indata(39),255)*256*256+
$ iand(indata(38),255)*256+iand(indata(37),255)
idoy=iand(indata(44),255)*256*256*256+iand(indata(43),255)*256*256+
$ iand(indata(42),255)*256+iand(indata(41),255)
ims=iand(indata(48),255)*256*256*256+iand(indata(47),255)*256*256+
$ iand(indata(46),255)*256+iand(indata(45),255)
iyear = unpackBytes(indata(40), indata(39), indata(38), indata(37))
idoy = unpackBytes(indata(44), indata(43), indata(42), indata(41))
ims = unpackBytes(indata(48), indata(47), indata(46), indata(45))
ddate(2) = ims*1000.0 !we save days in the year and microsec in the day
ddate(1) = 1.*idoy
call setLineSequential(auxAccessor,ddate)
@ -144,3 +142,9 @@ c print *,val
return
end
integer*4 function unpackBytes(i1, i2, i3, i4)
integer*4 i1, i2, i3, i4
unpackBytes = iand(i1, 255)*256*256*256 + iand(i2, 255)*256*256 +
$ iand(i3, 255)*256 + iand(i4, 255)
end function

View File

@ -112,7 +112,8 @@ createResampleSlc = _factory("runResampleSlc")
createResampleSubbandSlc = _factory("runResampleSubbandSlc")
createRefineSlaveTiming = _factory("runRefineSlaveTiming")
createDenseOffsets = _factory("runDenseOffsets")
createRubbersheet = _factory("runRubbersheet")
createRubbersheetAzimuth = _factory("runRubbersheetAzimuth") # Modified by V. Brancato (10.07.2019)
createRubbersheetRange = _factory("runRubbersheetRange") # Modified by V. Brancato (10.07.2019)
createInterferogram = _factory("runInterferogram")
createCoherence = _factory("runCoherence")
createFilter = _factory("runFilter")

View File

@ -49,7 +49,7 @@ listFiles = ['StripmapProc.py', 'runPreprocessor.py', 'runSplitSpectrum.py',
'Factories.py' , 'runDenseOffsets.py', 'runResampleSlc.py' , 'runUnwrapGrass.py',
'__init__.py' , 'runDispersive.py' , 'runResampleSubbandSlc.py', 'runUnwrapIcu.py',
'runFilter.py' , 'runROI.py' , 'runUnwrapSnaphu.py', 'runCrop.py',
'runGeo2rdr.py', 'runRubbersheet.py', '__StripmapProc.py' , 'runInterferogram.py',
'runGeo2rdr.py', 'runRubbersheetRange.py', 'runRubbersheetAzimuth.py', '__StripmapProc.py' , 'runInterferogram.py',
'runVerifyDEM.py', 'runGeocode.py', 'Sensor.py'
]

View File

@ -325,14 +325,21 @@ AZIMUTH_OFFSET_FILENAME = Component.Parameter('azimuthOffsetFilename',
doc='')
# Modified by V. Brancato 10.07.2019
AZIMUTH_RUBBERSHEET_FILENAME = Component.Parameter('azimuthRubbersheetFilename',
public_name='azimuth Rubbersheet Image Name',
default = 'azimuth_sheet.off',
type=str,
mandatory=False,
doc='')
RANGE_RUBBERSHEET_FILENAME = Component.Parameter('rangeRubbersheetFilename',
public_name='range Rubbersheet Image Name',
default = 'range_sheet.off',
type=str,
mandatory=False,
doc='')
# End of modification
MISREG_FILENAME = Component.Parameter('misregFilename',
public_name='misreg file name',
default='misreg',
@ -346,14 +353,21 @@ DENSE_OFFSET_FILENAME = Component.Parameter('denseOffsetFilename',
type=str,
mandatory=False,
doc='file name of dense offsets computed from cross correlating two SLC images')
# Modified by V. Brancato 10.07.2019
FILT_AZIMUTH_OFFSET_FILENAME = Component.Parameter('filtAzimuthOffsetFilename',
public_name='filtered azimuth offset filename',
default='filtAzimuth.off',
type=str,
mandatory=False,
doc='Filtered azimuth dense offsets')
FILT_RANGE_OFFSET_FILENAME = Component.Parameter('filtRangeOffsetFilename',
public_name='filtered range offset filename',
default='filtRange.off',
type=str,
mandatory=False,
doc='Filtered range dense offsets')
# End of modification
DISPERSIVE_FILENAME = Component.Parameter('dispersiveFilename',
public_name = 'dispersive phase filename',
default='dispersive.bil',
@ -470,8 +484,10 @@ class StripmapProc(Component, FrameMixin):
LOS_FILENAME,
RANGE_OFFSET_FILENAME,
AZIMUTH_OFFSET_FILENAME,
AZIMUTH_RUBBERSHEET_FILENAME,
FILT_AZIMUTH_OFFSET_FILENAME,
AZIMUTH_RUBBERSHEET_FILENAME, # Added by V. Brancato 10.07.2019
RANGE_RUBBERSHEET_FILENAME, # Added by V. Brancato 10.07.2019
FILT_AZIMUTH_OFFSET_FILENAME, # Added by V. Brancato 10.07.2019
FILT_RANGE_OFFSET_FILENAME, # Added by V. Brancato 10.07.2019
DENSE_OFFSET_FILENAME,
MISREG_FILENAME,
DISPERSIVE_FILENAME,

View File

@ -1,14 +1,73 @@
#
# Author: Heresh Fattahi, 2017
#
# Modified by V. Brancato (10.2019)
# (Included flattening when rubbersheeting in range is turned on
import isceobj
import logging
from components.stdproc.stdproc import crossmul
from iscesys.ImageUtil.ImageUtil import ImageUtil as IU
import os
import gdal
import numpy as np
logger = logging.getLogger('isce.insar.runInterferogram')
# Added by V. Brancato 10.09.2019
def write_xml(fileName,width,length,bands,dataType,scheme):
img = isceobj.createImage()
img.setFilename(fileName)
img.setWidth(width)
img.setLength(length)
img.setAccessMode('READ')
img.bands = bands
img.dataType = dataType
img.scheme = scheme
img.renderHdr()
img.renderVRT()
return None
def compute_FlatEarth(self,ifgFilename,width,length,radarWavelength):
from imageMath import IML
import logging
# If rubbersheeting has been performed add back the range sheet offsets
info = self._insar.loadProduct(self._insar.slaveSlcCropProduct)
#radarWavelength = info.getInstrument().getRadarWavelength()
rangePixelSize = info.getInstrument().getRangePixelSize()
fact = 4 * np.pi* rangePixelSize / radarWavelength
cJ = np.complex64(-1j)
# Open the range sheet offset
rngOff = os.path.join(self.insar.offsetsDirname, self.insar.rangeOffsetFilename )
print(rngOff)
if os.path.exists(rngOff):
rng2 = np.memmap(rngOff, dtype=np.float64, mode='r', shape=(length,width))
else:
print('No range offsets provided')
rng2 = np.zeros((length,width))
# Open the interferogram
#ifgFilename= os.path.join(self.insar.ifgDirname, self.insar.ifgFilename)
intf = np.memmap(ifgFilename+'.full',dtype=np.complex64,mode='r+',shape=(length,width))
for ll in range(length):
intf[ll,:] *= np.exp(cJ*fact*rng2[ll,:])
del rng2
del intf
return
def multilook(infile, outname=None, alks=5, rlks=15):
'''
Take looks.
@ -66,8 +125,9 @@ def computeCoherence(slc1name, slc2name, corname, virtual=True):
slc2.finalizeImage()
return
def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
# Modified by V. Brancato on 10.09.2019 (added self)
# Modified by V. Brancato on 11.13.2019 (added radar wavelength for low and high band flattening
def generateIgram(self,imageSlc1, imageSlc2, resampName, azLooks, rgLooks,radarWavelength):
objSlc1 = isceobj.createSlcImage()
IU.copyAttributes(imageSlc1, objSlc1)
objSlc1.setAccessMode('read')
@ -79,8 +139,13 @@ def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
objSlc2.createImage()
slcWidth = imageSlc1.getWidth()
intWidth = int(slcWidth / rgLooks)
if not self.doRubbersheetingRange:
intWidth = int(slcWidth/rgLooks) # Modified by V. Brancato intWidth = int(slcWidth / rgLooks)
else:
intWidth = int(slcWidth)
lines = min(imageSlc1.getLength(), imageSlc2.getLength())
if '.flat' in resampName:
@ -93,7 +158,7 @@ def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
resampInt = resampName
objInt = isceobj.createIntImage()
objInt.setFilename(resampInt)
objInt.setFilename(resampInt+'.full')
objInt.setWidth(intWidth)
imageInt = isceobj.createIntImage()
IU.copyAttributes(objInt, imageInt)
@ -101,28 +166,48 @@ def generateIgram(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
objInt.createImage()
objAmp = isceobj.createAmpImage()
objAmp.setFilename(resampAmp)
objAmp.setFilename(resampAmp+'.full')
objAmp.setWidth(intWidth)
imageAmp = isceobj.createAmpImage()
IU.copyAttributes(objAmp, imageAmp)
objAmp.setAccessMode('write')
objAmp.createImage()
if not self.doRubbersheetingRange:
print('Rubbersheeting in range is off, interferogram is already flattened')
objCrossmul = crossmul.createcrossmul()
objCrossmul.width = slcWidth
objCrossmul.length = lines
objCrossmul.LooksDown = azLooks
objCrossmul.LooksAcross = rgLooks
objCrossmul = crossmul.createcrossmul()
objCrossmul.width = slcWidth
objCrossmul.length = lines
objCrossmul.LooksDown = azLooks
objCrossmul.LooksAcross = rgLooks
objCrossmul.crossmul(objSlc1, objSlc2, objInt, objAmp)
objCrossmul.crossmul(objSlc1, objSlc2, objInt, objAmp)
else:
# Modified by V. Brancato 10.09.2019 (added option to add Range Rubber sheet Flat-earth back)
print('Rubbersheeting in range is on, removing flat-Earth phase')
objCrossmul = crossmul.createcrossmul()
objCrossmul.width = slcWidth
objCrossmul.length = lines
objCrossmul.LooksDown = 1
objCrossmul.LooksAcross = 1
objCrossmul.crossmul(objSlc1, objSlc2, objInt, objAmp)
# Remove Flat-Earth component
compute_FlatEarth(self,resampInt,intWidth,lines,radarWavelength)
# Perform Multilook
multilook(resampInt+'.full', outname=resampInt, alks=azLooks, rlks=rgLooks) #takeLooks(objAmp,azLooks,rgLooks)
multilook(resampAmp+'.full', outname=resampAmp, alks=azLooks, rlks=rgLooks) #takeLooks(objInt,azLooks,rgLooks)
#os.system('rm ' + resampInt+'.full* ' + resampAmp + '.full* ')
# End of modification
for obj in [objInt, objAmp, objSlc1, objSlc2]:
obj.finalizeImage()
return imageInt, imageAmp
def subBandIgram(self, masterSlc, slaveSlc, subBandDir):
def subBandIgram(self, masterSlc, slaveSlc, subBandDir,radarWavelength):
img1 = isceobj.createImage()
img1.load(masterSlc + '.xml')
@ -142,7 +227,7 @@ def subBandIgram(self, masterSlc, slaveSlc, subBandDir):
interferogramName = os.path.join(ifgDir , self.insar.ifgFilename)
generateIgram(img1, img2, interferogramName, azLooks, rgLooks)
generateIgram(self,img1, img2, interferogramName, azLooks, rgLooks,radarWavelength)
return interferogramName
@ -175,9 +260,9 @@ def runSubBandInterferograms(self):
slaveHighBandSlc = os.path.join(coregDir , os.path.basename(slaveSlc))
##########
interferogramName = subBandIgram(self, masterLowBandSlc, slaveLowBandSlc, self.insar.lowBandSlcDirname)
interferogramName = subBandIgram(self, masterLowBandSlc, slaveLowBandSlc, self.insar.lowBandSlcDirname,self.insar.lowBandRadarWavelength)
interferogramName = subBandIgram(self, masterHighBandSlc, slaveHighBandSlc, self.insar.highBandSlcDirname)
interferogramName = subBandIgram(self, masterHighBandSlc, slaveHighBandSlc, self.insar.highBandSlcDirname,self.insar.highBandRadarWavelength)
def runFullBandInterferogram(self):
logger.info("Generating interferogram")
@ -185,7 +270,7 @@ def runFullBandInterferogram(self):
masterFrame = self._insar.loadProduct( self._insar.masterSlcCropProduct)
masterSlc = masterFrame.getImage().filename
if self.doRubbersheeting:
if (self.doRubbersheetingRange | self.doRubbersheetingAzimuth):
slaveSlc = os.path.join(self._insar.coregDirname, self._insar.fineCoregFilename)
else:
slaveSlc = os.path.join(self._insar.coregDirname, self._insar.refinedCoregFilename)
@ -211,8 +296,11 @@ def runFullBandInterferogram(self):
os.makedirs(ifgDir)
interferogramName = os.path.join(ifgDir , self.insar.ifgFilename)
generateIgram(img1, img2, interferogramName, azLooks, rgLooks)
info = self._insar.loadProduct(self._insar.slaveSlcCropProduct)
radarWavelength = info.getInstrument().getRadarWavelength()
generateIgram(self,img1, img2, interferogramName, azLooks, rgLooks,radarWavelength)
###Compute coherence
@ -221,7 +309,7 @@ def runFullBandInterferogram(self):
multilook(cohname+'.full', outname=cohname, alks=azLooks, rlks=rgLooks)
###Multilook relevant geometry products
##Multilook relevant geometry products
for fname in [self.insar.latFilename, self.insar.lonFilename, self.insar.losFilename]:
inname = os.path.join(self.insar.geometryDirname, fname)
multilook(inname + '.full', outname= inname, alks=azLooks, rlks=rgLooks)

View File

@ -23,7 +23,7 @@ def runResampleSlc(self, kind='coarse'):
raise Exception('Unknown operation type {0} in runResampleSlc'.format(kind))
if kind == 'fine':
if not self.doRubbersheeting:
if not (self.doRubbersheetingRange | self.doRubbersheetingAzimuth): # Modified by V. Brancato 10.10.2019
print('Rubber sheeting not requested, skipping resampling ....')
return
@ -68,12 +68,25 @@ def runResampleSlc(self, kind='coarse'):
#Since the app is based on geometry module we expect pixel-by-pixel offset
#field
offsetsDir = self.insar.offsetsDirname
rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename)
# Modified by V. Brancato 10.10.2019
#rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename)
if kind in ['coarse', 'refined']:
azname = os.path.join(offsetsDir, self.insar.azimuthOffsetFilename)
rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename)
else:
azname = os.path.join(offsetsDir, self.insar.azimuthRubbersheetFilename)
if self.doRubbersheetingRange:
print('Rubbersheeting in range is turned on, taking the cross-correlation offsets')
print('Setting Flattening to False')
rgname = os.path.join(offsetsDir, self.insar.rangeRubbersheetFilename)
flatten=False
else:
print('Rubbersheeting in range is turned off, taking range geometric offsets')
rgname = os.path.join(offsetsDir, self.insar.rangeOffsetFilename)
flatten=True
rngImg = isceobj.createImage()
rngImg.load(rgname + '.xml')
rngImg.setAccessMode('READ')
@ -85,8 +98,8 @@ def runResampleSlc(self, kind='coarse'):
width = rngImg.getWidth()
length = rngImg.getLength()
flatten = True
# Modified by V. Brancato 10.10.2019
#flatten = True
rObj.flatten = flatten
rObj.outputWidth = width
rObj.outputLines = length

View File

@ -14,7 +14,8 @@ import shelve
logger = logging.getLogger('isce.insar.runResampleSubbandSlc')
def resampleSlc(masterFrame, slaveFrame, imageSlc2, radarWavelength, coregDir,
# Modified by V. Brancato 10.14.2019 added "self" as input parameter of resampleSLC
def resampleSlc(self,masterFrame, slaveFrame, imageSlc2, radarWavelength, coregDir,
azoffname, rgoffname, azpoly = None, rgpoly = None, misreg=False):
logger.info("Resampling slave SLC")
@ -56,8 +57,17 @@ def resampleSlc(masterFrame, slaveFrame, imageSlc2, radarWavelength, coregDir,
width = rngImg.getWidth()
length = rngImg.getLength()
flatten = True
# Modified by V. Brancato on 10.14.2019 (if Rubbersheeting in range is turned on, flatten the interferogram during cross-correlation)
if not self.doRubbersheetingRange:
print('Rubber sheeting in range is turned off, flattening the interferogram during resampling')
flatten = True
print(flatten)
else:
print('Rubber sheeting in range is turned on, flattening the interferogram during interferogram formation')
flatten=False
print(flatten)
# end of Modification
rObj.flatten = flatten
rObj.outputWidth = width
rObj.outputLines = length
@ -105,15 +115,25 @@ def runResampleSubbandSlc(self, misreg=False):
masterFrame = self._insar.loadProduct( self._insar.masterSlcCropProduct)
slaveFrame = self._insar.loadProduct( self._insar.slaveSlcCropProduct)
if self.doRubbersheeting:
print('Using rubber sheeted offsets for resampling sub-bands')
# Modified by V. Brancato 10.14.2019
if self.doRubbersheetingAzimuth:
print('Using rubber in azimuth sheeted offsets for resampling sub-bands')
azoffname = os.path.join( self.insar.offsetsDirname, self.insar.azimuthRubbersheetFilename)
else:
print('Using refined offsets for resampling sub-bands')
azoffname = os.path.join( self.insar.offsetsDirname, self.insar.azimuthOffsetFilename)
rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeOffsetFilename)
if self.doRubbersheetingRange:
print('Using rubber in range sheeted offsets for resampling sub-bands')
rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeRubbersheetFilename)
else:
print('Using refined offsets for resampling sub-bands')
rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeOffsetFilename)
# ****************** End of Modification
# rgoffname = os.path.join( self.insar.offsetsDirname, self.insar.rangeOffsetFilename)
azpoly = self.insar.loadProduct( os.path.join(self.insar.misregDirname, self.insar.misregFilename) + '_az.xml')
rgpoly = self.insar.loadProduct( os.path.join(self.insar.misregDirname, self.insar.misregFilename) + '_rg.xml')
@ -124,7 +144,7 @@ def runResampleSubbandSlc(self, misreg=False):
wvlL = self.insar.lowBandRadarWavelength
coregDir = os.path.join(self.insar.coregDirname, self.insar.lowBandSlcDirname)
lowbandCoregFilename = resampleSlc(masterFrame, slaveFrame, imageSlc2, wvlL, coregDir,
lowbandCoregFilename = resampleSlc(self,masterFrame, slaveFrame, imageSlc2, wvlL, coregDir,
azoffname, rgoffname, azpoly=azpoly, rgpoly=rgpoly,misreg=False)
imageSlc2 = os.path.join(self.insar.splitSpectrumDirname, self.insar.highBandSlcDirname,
@ -132,7 +152,7 @@ def runResampleSubbandSlc(self, misreg=False):
wvlH = self.insar.highBandRadarWavelength
coregDir = os.path.join(self.insar.coregDirname, self.insar.highBandSlcDirname)
highbandCoregFilename = resampleSlc(masterFrame, slaveFrame, imageSlc2, wvlH, coregDir,
highbandCoregFilename = resampleSlc(self,masterFrame, slaveFrame, imageSlc2, wvlH, coregDir,
azoffname, rgoffname, azpoly=azpoly, rgpoly=rgpoly, misreg=False)
self.insar.lowBandSlc2 = lowbandCoregFilename

View File

@ -168,6 +168,7 @@ def runRubbersheet(self):
# filtAzOffsetFile to it.
resampleOffset(filtAzOffsetFile, geometryAzimuthOffset, sheetOffset)
print("I'm here")
return None

View File

@ -0,0 +1,276 @@
#
# Author: Heresh Fattahi
# Copyright 2017
#
# Modified by V. Brancato
# Included offset filtering with no SNR
#
import isce
import isceobj
from osgeo import gdal
from scipy import ndimage
from astropy.convolution import convolve
import numpy as np
import os
def mask_filterNoSNR(denseOffsetFile,filterSize,outName):
# Masking the offsets with a data-based approach
# Open the offsets
ds = gdal.Open(denseOffsetFile+'.vrt',gdal.GA_ReadOnly)
off_az = ds.GetRasterBand(1).ReadAsArray()
off_rg = ds.GetRasterBand(2).ReadAsArray()
ds = None
# Remove missing values from ampcor
off_rg[np.where(off_rg < -9999)]=0
off_az[np.where(off_az < -9999)]=0
# Store the offsets in a complex variable
off = off_rg + 1j*off_az
# Mask the azimuth offsets based on the MAD
mask = off_masking(off,filterSize,thre=3)
xoff_masked = np.ma.array(off.real,mask=mask)
yoff_masked = np.ma.array(off.imag,mask=mask)
# Delete unused variables
mask = None
off = None
# Remove residual noisy spots with a median filter on the azimuth offmap
yoff_masked.mask = yoff_masked.mask | \
(ndimage.median_filter(xoff_masked.filled(fill_value=0),3) == 0) | \
(ndimage.median_filter(yoff_masked.filled(fill_value=0),3) == 0)
# Fill the data by iteratively using smoothed values
data = yoff_masked.data
data[yoff_masked.mask]=np.nan
off_az_filled = fill_with_smoothed(data,filterSize)
# Apply median filter to smooth the azimuth offset map
off_az_filled = ndimage.median_filter(off_az_filled,filterSize)
# Save the filtered offsets
length, width = off_az_filled.shape
# writing the masked and filtered offsets to a file
print ('writing masked and filtered offsets to: ', outName)
##Write array to offsetfile
off_az_filled.tofile(outName)
# write the xml file
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'FLOAT'
img.scheme = 'BIP'
img.renderHdr()
return
def off_masking(off,filterSize,thre=2):
# Define the mask to fill the offsets
vram = ndimage.median_filter(off.real, filterSize)
vazm = ndimage.median_filter(off.imag, filterSize)
mask = (np.abs(off.real-vram) > thre) | (np.abs(off.imag-vazm) > thre) | (off.imag == 0) | (off.real == 0)
return mask
def fill(data, invalid=None):
"""
Replace the value of invalid 'data' cells (indicated by 'invalid')
by the value of the nearest valid data cell
Input:
data: numpy array of any dimension
invalid: a binary array of same shape as 'data'.
data value are replaced where invalid is True
If None (default), use: invalid = np.isnan(data)
Output:
Return a filled array.
"""
if invalid is None: invalid = np.isnan(data)
ind = ndimage.distance_transform_edt(invalid,
return_distances=False,
return_indices=True)
return data[tuple(ind)]
def mask_filter(denseOffsetFile, snrFile, band, snrThreshold, filterSize, outName):
#masking and Filtering
##Read in the offset file
ds = gdal.Open(denseOffsetFile + '.vrt', gdal.GA_ReadOnly)
Offset = ds.GetRasterBand(band).ReadAsArray()
ds = None
##Read in the SNR file
ds = gdal.Open(snrFile + '.vrt', gdal.GA_ReadOnly)
snr = ds.GetRasterBand(1).ReadAsArray()
ds = None
# Masking the dense offsets based on SNR
print ('masking the dense offsets with SNR threshold: ', snrThreshold)
Offset[snr<snrThreshold]=np.nan
# Fill the masked region using valid neighboring pixels
Offset = fill(Offset)
############
# Median filtering the masked offsets
print ('Filtering with median filter with size : ', filterSize)
Offset = ndimage.median_filter(Offset, size=filterSize)
length, width = Offset.shape
# writing the masked and filtered offsets to a file
print ('writing masked and filtered offsets to: ', outName)
##Write array to offsetfile
Offset.tofile(outName)
# write the xml file
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'FLOAT'
img.scheme = 'BIP'
img.renderHdr()
return None
def fill_with_smoothed(off,filterSize):
off_2filt=np.copy(off)
kernel = np.ones((filterSize,filterSize),np.float32)/(filterSize*filterSize)
loop = 0
cnt2=1
while (cnt2!=0 & loop<100):
loop += 1
idx2= np.isnan(off_2filt)
cnt2 = np.sum(np.count_nonzero(np.isnan(off_2filt)))
print(cnt2)
if cnt2 != 0:
off_filt= convolve(off_2filt,kernel,boundary='extend',nan_treatment='interpolate')
off_2filt[idx2]=off_filt[idx2]
idx3 = np.where(off_filt == 0)
off_2filt[idx3]=np.nan
off_filt=None
return off_2filt
def resampleOffset(maskedFiltOffset, geometryOffset, outName):
'''
Oversample offset and add.
'''
from imageMath import IML
import logging
resampledOffset = maskedFiltOffset + ".resampled"
inimg = isceobj.createImage()
inimg.load(geometryOffset + '.xml')
length = inimg.getLength()
width = inimg.getWidth()
###Currently making the assumption that top left of dense offsets and interfeorgrams are the same.
###This is not true for now. We need to update DenseOffsets to have the ability to have same top left
###As the input images. Once that is implemente, the math here should all be consistent.
###However, this is not too far off since the skip for doing dense offsets is generally large.
###The offset is not too large to worry about right now. If the skip is decreased, this could be an issue.
print('oversampling the filtered and masked offsets to the width and length:', width, ' ', length )
cmd = 'gdal_translate -of ENVI -ot Float64 -outsize ' + str(width) + ' ' + str(length) + ' ' + maskedFiltOffset + '.vrt ' + resampledOffset
print(cmd)
os.system(cmd)
img = isceobj.createImage()
img.setFilename(resampledOffset)
img.setWidth(width)
img.setLength(length)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'DOUBLE'
img.scheme = 'BIP'
img.renderHdr()
###Adding the geometry offset and oversampled offset
geomoff = IML.mmapFromISCE(geometryOffset, logging)
osoff = IML.mmapFromISCE(resampledOffset, logging)
fid = open(outName, 'w')
for ll in range(length):
val = geomoff.bands[0][ll,:] + osoff.bands[0][ll,:]
val.tofile(fid)
fid.close()
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setLength(length)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'DOUBLE'
img.scheme = 'BIP'
img.renderHdr()
return None
def runRubbersheetAzimuth(self):
if not self.doRubbersheetingAzimuth:
print('Rubber sheeting in azimuth not requested ... skipping')
return
# denseOffset file name computeed from cross-correlation
denseOffsetFile = os.path.join(self.insar.denseOffsetsDirname , self.insar.denseOffsetFilename)
snrFile = denseOffsetFile + "_snr.bil"
denseOffsetFile = denseOffsetFile + ".bil"
# we want the azimuth offsets only which are the first band
band = [1]
snrThreshold = self.rubberSheetSNRThreshold
filterSize = self.rubberSheetFilterSize
filtAzOffsetFile = os.path.join(self.insar.denseOffsetsDirname, self._insar.filtAzimuthOffsetFilename)
# masking and median filtering the dense offsets
if not self.doRubbersheetingRange:
print('Rubber sheeting in range is off, filtering the offsets with a SNR-based mask')
mask_filter(denseOffsetFile, snrFile, band[0], snrThreshold, filterSize, filtAzOffsetFile)
else:
print('Rubber sheeting in range is on, filtering the offsets with data-based mask')
mask_filterNoSNR(denseOffsetFile, filterSize, filtAzOffsetFile)
# azimuth offsets computed from geometry
offsetsDir = self.insar.offsetsDirname
geometryAzimuthOffset = os.path.join(offsetsDir, self.insar.azimuthOffsetFilename)
sheetOffset = os.path.join(offsetsDir, self.insar.azimuthRubbersheetFilename)
# oversampling the filtAzOffsetFile to the same size of geometryAzimuthOffset
# and then update the geometryAzimuthOffset by adding the oversampled
# filtAzOffsetFile to it.
resampleOffset(filtAzOffsetFile, geometryAzimuthOffset, sheetOffset)
return None

View File

@ -0,0 +1,279 @@
#
# Author: Heresh Fattahi
# Copyright 2017
#
# Modified by V. Brancato (10.12.2019)
# Including offset filtering with no SNR masking
#
import isce
import isceobj
from osgeo import gdal
from scipy import ndimage
import numpy as np
import os
from astropy.convolution import convolve
def mask_filterNoSNR(denseOffsetFile,filterSize,outName):
# Masking the offsets with a data-based approach
# Open the offsets
ds = gdal.Open(denseOffsetFile+'.vrt',gdal.GA_ReadOnly)
off_az = ds.GetRasterBand(1).ReadAsArray()
off_rg = ds.GetRasterBand(2).ReadAsArray()
ds = None
# Remove values reported as missing data (no value data from ampcor)
off_rg[np.where(off_rg < -9999)]=0
off_az[np.where(off_az < -9999)]=0
# Store the offsets in a complex variable
off = off_rg + 1j*off_az
# Mask the offset based on MAD
mask = off_masking(off,filterSize,thre=3)
xoff_masked = np.ma.array(off.real,mask=mask)
yoff_masked = np.ma.array(off.imag,mask=mask)
# Delete not used variables
mask = None
off = None
# Remove residual noisy spots with a median filter on the range offmap
xoff_masked.mask = xoff_masked.mask | \
(ndimage.median_filter(xoff_masked.filled(fill_value=0),3) == 0) | \
(ndimage.median_filter(yoff_masked.filled(fill_value=0),3) == 0)
# Fill the range offset map iteratively with smoothed values
data = xoff_masked.data
data[xoff_masked.mask]=np.nan
off_rg_filled = fill_with_smoothed(data,filterSize)
# Apply the median filter on the offset
off_rg_filled = ndimage.median_filter(off_rg_filled,filterSize)
# Save the filtered offsets
length, width = off_rg_filled.shape
# writing the masked and filtered offsets to a file
print ('writing masked and filtered offsets to: ', outName)
##Write array to offsetfile
off_rg_filled.tofile(outName)
# write the xml file
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'FLOAT'
img.scheme = 'BIP'
img.renderHdr()
return
def off_masking(off,filterSize,thre=2):
vram = ndimage.median_filter(off.real, filterSize)
vazm = ndimage.median_filter(off.imag, filterSize)
mask = (np.abs(off.real-vram) > thre) | (np.abs(off.imag-vazm) > thre) | (off.imag == 0) | (off.real == 0)
return mask
def fill(data, invalid=None):
"""
Replace the value of invalid 'data' cells (indicated by 'invalid')
by the value of the nearest valid data cell
Input:
data: numpy array of any dimension
invalid: a binary array of same shape as 'data'.
data value are replaced where invalid is True
If None (default), use: invalid = np.isnan(data)
Output:
Return a filled array.
"""
if invalid is None: invalid = np.isnan(data)
ind = ndimage.distance_transform_edt(invalid,
return_distances=False,
return_indices=True)
return data[tuple(ind)]
def fill_with_smoothed(off,filterSize):
off_2filt=np.copy(off)
kernel = np.ones((filterSize,filterSize),np.float32)/(filterSize*filterSize)
loop = 0
cnt2=1
while (cnt2 !=0 & loop<100):
loop += 1
idx2= np.isnan(off_2filt)
cnt2 = np.sum(np.count_nonzero(np.isnan(off_2filt)))
print(cnt2)
if cnt2 != 0:
off_filt= convolve(off_2filt,kernel,boundary='extend',nan_treatment='interpolate')
off_2filt[idx2]=off_filt[idx2]
idx3 = np.where(off_filt == 0)
off_2filt[idx3]=np.nan
off_filt=None
return off_2filt
def mask_filter(denseOffsetFile, snrFile, band, snrThreshold, filterSize, outName):
#masking and Filtering
##Read in the offset file
ds = gdal.Open(denseOffsetFile + '.vrt', gdal.GA_ReadOnly)
Offset = ds.GetRasterBand(band).ReadAsArray()
ds = None
##Read in the SNR file
ds = gdal.Open(snrFile + '.vrt', gdal.GA_ReadOnly)
snr = ds.GetRasterBand(1).ReadAsArray()
ds = None
# Masking the dense offsets based on SNR
print ('masking the dense offsets with SNR threshold: ', snrThreshold)
Offset[snr<snrThreshold]=np.nan
# Fill the masked region using valid neighboring pixels
Offset = fill(Offset)
############
# Median filtering the masked offsets
print ('Filtering with median filter with size : ', filterSize)
Offset = ndimage.median_filter(Offset, size=filterSize)
length, width = Offset.shape
# writing the masked and filtered offsets to a file
print ('writing masked and filtered offsets to: ', outName)
##Write array to offsetfile
Offset.tofile(outName)
# write the xml file
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'FLOAT'
img.scheme = 'BIP'
img.renderHdr()
return None
def resampleOffset(maskedFiltOffset, geometryOffset, outName):
'''
Oversample offset and add.
'''
from imageMath import IML
import logging
resampledOffset = maskedFiltOffset + ".resampled"
inimg = isceobj.createImage()
inimg.load(geometryOffset + '.xml')
length = inimg.getLength()
width = inimg.getWidth()
###Currently making the assumption that top left of dense offsets and interfeorgrams are the same.
###This is not true for now. We need to update DenseOffsets to have the ability to have same top left
###As the input images. Once that is implemente, the math here should all be consistent.
###However, this is not too far off since the skip for doing dense offsets is generally large.
###The offset is not too large to worry about right now. If the skip is decreased, this could be an issue.
print('oversampling the filtered and masked offsets to the width and length:', width, ' ', length )
cmd = 'gdal_translate -of ENVI -ot Float64 -outsize ' + str(width) + ' ' + str(length) + ' ' + maskedFiltOffset + '.vrt ' + resampledOffset
print(cmd)
os.system(cmd)
img = isceobj.createImage()
img.setFilename(resampledOffset)
img.setWidth(width)
img.setLength(length)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'DOUBLE'
img.scheme = 'BIP'
img.renderHdr()
###Adding the geometry offset and oversampled offset
geomoff = IML.mmapFromISCE(geometryOffset, logging)
osoff = IML.mmapFromISCE(resampledOffset, logging)
fid = open(outName, 'w')
for ll in range(length):
val = geomoff.bands[0][ll,:] + osoff.bands[0][ll,:]
val.tofile(fid)
fid.close()
img = isceobj.createImage()
img.setFilename(outName)
img.setWidth(width)
img.setLength(length)
img.setAccessMode('READ')
img.bands = 1
img.dataType = 'DOUBLE'
img.scheme = 'BIP'
img.renderHdr()
return None
def runRubbersheetRange(self):
if not self.doRubbersheetingRange:
print('Rubber sheeting in azimuth not requested ... skipping')
return
# denseOffset file name computeed from cross-correlation
denseOffsetFile = os.path.join(self.insar.denseOffsetsDirname , self.insar.denseOffsetFilename)
snrFile = denseOffsetFile + "_snr.bil"
denseOffsetFile = denseOffsetFile + ".bil"
# we want the range offsets only which are the first band
band = [2]
snrThreshold = self.rubberSheetSNRThreshold
filterSize = self.rubberSheetFilterSize
filtRgOffsetFile = os.path.join(self.insar.denseOffsetsDirname, self._insar.filtRangeOffsetFilename)
# masking and median filtering the dense offsets
if not self.doRubbersheetingRange:
print('Rubber sheeting in range is off, applying SNR-masking for the offsets maps')
mask_filter(denseOffsetFile, snrFile, band[0], snrThreshold, filterSize, filtRgOffsetFile)
else:
print('Rubber sheeting in range is on, applying a data-based offsets-masking')
mask_filterNoSNR(denseOffsetFile,filterSize,filtRgOffsetFile)
# range offsets computed from geometry
offsetsDir = self.insar.offsetsDirname
geometryRangeOffset = os.path.join(offsetsDir, self.insar.rangeOffsetFilename)
RgsheetOffset = os.path.join(offsetsDir, self.insar.rangeRubbersheetFilename)
# oversampling the filtRgOffsetFile to the same size of geometryRangeOffset
# and then update the geometryRangeOffset by adding the oversampled
# filtRgOffsetFile to it.
resampleOffset(filtRgOffsetFile, geometryRangeOffset, RgsheetOffset)
return None

View File

@ -54,7 +54,7 @@ class snaphu(Component):
self.azimuthLooks = obj.insar.topo.numberAzimuthLooks
azres = obj.insar.masterFrame.platform.antennaLength/2.0
azfact = obj.insar.topo.numberAzimuthLooks *azres / obj.insar.topo.azimuthSpacing
azfact = azres / obj.insar.topo.azimuthSpacing
rBW = obj.insar.masterFrame.instrument.pulseLength * obj.insar.masterFrame.instrument.chirpSlope
rgres = abs(SPEED_OF_LIGHT / (2.0 * rBW))

View File

@ -54,7 +54,7 @@ class snaphu_mcf(Component):
self.azimuthLooks = obj.insar.topo.numberAzimuthLooks
azres = obj.insar.masterFrame.platform.antennaLength/2.0
azfact = obj.insar.topo.numberAzimuthLooks *azres / obj.insar.topo.azimuthSpacing
azfact = azres / obj.insar.topo.azimuthSpacing
rBW = obj.insar.masterFrame.instrument.pulseLength * obj.insar.masterFrame.instrument.chirpSlope
rgres = abs(SPEED_OF_LIGHT / (2.0 * rBW))

View File

@ -2,19 +2,19 @@
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Copyright 2010 California Institute of Technology. ALL RIGHTS RESERVED.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# United States Government Sponsorship acknowledged. This software is subject to
# U.S. export control laws and regulations and has been classified as 'EAR99 NLR'
# (No [Export] License Required except when exporting to an embargoed country,
@ -49,7 +49,7 @@ if envGPUampcor['GPU_ACC_ENABLED']:
build_base += "-ccbin " + envGPUampcor['NVCC_CCBIN'] + " "
else:
print('Assuming default system compiler for nvcc.')
build_base += "-arch=sm_35 -shared -Xcompiler -fPIC -O3 "
build_base += "-shared -Xcompiler -fPIC -O3 "
build_cmd = build_base + "-dc -m64 -o $TARGET -c $SOURCE"
built_path = os.path.join(build, 'gpu-ampcor.o')
linked_path = os.path.join(build, 'gpu-ampcor-linked.o')

View File

@ -1,2 +1,2 @@
nvcc -arch=sm_35 -Xcompiler -fPIC -o gpu-topo.o -c Topo.cu
nvcc -Xcompiler -fPIC -o gpu-topo.o -c Topo.cu
cp -f gpu-topo.o ..

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
import os
@ -28,7 +28,7 @@ if envPyCuAmpcor['GPU_ACC_ENABLED']:
if not os.path.exists(initFile):
with open(initFile, 'w') as fout:
fout.write("#!/usr/bin/env python")
fout.write("#!/usr/bin/env python3")
listFiles = [initFile]
envPyCuAmpcor.Install(install, listFiles)

View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
#
# Test program to run ampcor with GPU
# For two GeoTiff images
#
import argparse
import numpy as np
from PyCuAmpcor import PyCuAmpcor
def main():
'''
main program
'''
objOffset = PyCuAmpcor() # create the processor
objOffset.algorithm = 0 # cross-correlation method 0=freq 1=time
objOffset.deviceID = 0 # GPU device id to be used
objOffset.nStreams = 2 # cudaStreams; multiple streams to overlap data transfer with gpu calculations
objOffset.masterImageName = "master.tif"
objOffset.masterImageHeight = 16480 # RasterYSize
objOffset.masterImageWidth = 17000 # RasterXSize
objOffset.slaveImageName = "slave.tif"
objOffset.slaveImageHeight = 16480
objOffset.slaveImageWidth = 17000
objOffset.windowSizeWidth = 64 # template window size
objOffset.windowSizeHeight = 64
objOffset.halfSearchRangeDown = 20 # search range
objOffset.halfSearchRangeAcross = 20
objOffset.derampMethod = 1 # deramping for complex signal, set to 1 for real images
objOffset.skipSampleDown = 128 # strides between windows
objOffset.skipSampleAcross = 64
# gpu processes several windows in one batch/Chunk
# total windows in Chunk = numberWindowDownInChunk*numberWindowAcrossInChunk
# the max number of windows depending on gpu memory and type
objOffset.numberWindowDownInChunk = 1
objOffset.numberWindowAcrossInChunk = 10
objOffset.corrSurfaceOverSamplingFactor = 8 # oversampling factor for correlation surface
objOffset.corrSurfaceZoomInWindow = 16 # area in correlation surface to be oversampled
objOffset.corrSufaceOverSamplingMethod = 1 # fft or sinc oversampler
objOffset.useMmap = 1 # default using memory map as buffer, if having troubles, set to 0
objOffset.mmapSize = 1 # mmap or buffer size used for transferring data from file to gpu, in GB
objOffset.numberWindowDown = 40 # number of windows to be processed
objOffset.numberWindowAcross = 100
# if to process the whole image; some math needs to be done
# margin = 0 # margins to be neglected
#objOffset.numberWindowDown = (objOffset.slaveImageHeight - 2*margin - 2*objOffset.halfSearchRangeDown - objOffset.windowSizeHeight) // objOffset.skipSampleDown
#objOffset.numberWindowAcross = (objOffset.slaveImageWidth - 2*margin - 2*objOffset.halfSearchRangeAcross - objOffset.windowSizeWidth) // objOffset.skipSampleAcross
objOffset.setupParams()
objOffset.masterStartPixelDownStatic = objOffset.halfSearchRangeDown # starting pixel offset
objOffset.masterStartPixelAcrossStatic = objOffset.halfSearchRangeDown
objOffset.setConstantGrossOffset(0, 0) # gross offset between master and slave images
objOffset.checkPixelInImageRange() # check whether there is something wrong with
objOffset.runAmpcor()
if __name__ == '__main__':

View File

@ -1,14 +1,14 @@
#!/usr/bin/env python3
#
#
# test_cuAmpcor.py
# Test program to run ampcor with GPU
#
#
#
import argparse
import numpy as np
#from PyCuAmpcor import PyCuAmpcor
from isce.components.contrib.PyCuAmpcor import PyCuAmpcor
from PyCuAmpcor import PyCuAmpcor
def main():
'''
@ -20,10 +20,10 @@ def main():
objOffset.algorithm = 0
objOffset.deviceID = 0 # -1:let system find the best GPU
objOffset.nStreams = 2 #cudaStreams
objOffset.masterImageName = "master.slc"
objOffset.masterImageName = "20131213.slc.vrt"
objOffset.masterImageHeight = 43008
objOffset.masterImageWidth = 24320
objOffset.slaveImageName = "slave.slc"
objOffset.slaveImageName = "20131221.slc.vrt"
objOffset.slaveImageHeight = 43008
objOffset.slaveImageWidth = 24320
objOffset.windowSizeWidth = 64
@ -38,8 +38,9 @@ def main():
objOffset.numberWindowDownInChunk = 10
objOffset.numberWindowAcrossInChunk = 10
objOffset.corrSurfaceOverSamplingFactor = 8
objOffset.corrSurfaceZoomInWindow = 16
objOffset.corrSufaceOverSamplingMethod = 1
objOffset.corrSurfaceZoomInWindow = 16
objOffset.corrSufaceOverSamplingMethod = 1
objOffset.useMmap = 1
objOffset.mmapSize = 8
objOffset.setupParams()
@ -48,8 +49,8 @@ def main():
objOffset.setConstantGrossOffset(642, -30)
objOffset.checkPixelInImageRange()
objOffset.runAmpcor()
if __name__ == '__main__':
main()

View File

@ -1,27 +1,27 @@
#!/usr/bin/env python3
#
#
from PyCuAmpcor import PyCuAmpcor
import numpy as np
def main():
def main():
'''
Set parameters manually and run ampcor
'''
objOffset = PyCuAmpcor()
#step 1 set constant parameters
objOffset.masterImageName = "master.slc"
objOffset.masterImageName = "master.slc.vrt"
objOffset.masterImageHeight = 128
objOffset.masterImageWidth = 128
objOffset.slaveImageName = "slave.slc"
objOffset.slaveImageName = "slave.slc.vrt"
objOffset.masterImageHeight = 128
objOffset.masterImageWidth = 128
objOffset.masterImageWidth = 128
objOffset.skipSampleDown = 2
objOffset.skipSampleAcross = 2
objOffset.windowSizeHeight = 16
objOffset.windowSizeWidth = 16
objOffset.halfSearchRangeDown = 20
objOffset.halfSearchRangeDown = 20
objOffset.halfSearchRangeAcross = 20
objOffset.numberWindowDown = 2
objOffset.numberWindowAcross = 2
@ -29,19 +29,19 @@ def main():
objOffset.numberWindowAcrossInChunk = 2
# 2 set other dependent parameters and allocate aray parameters
objOffset.setupParams()
#3 set gross offsets: constant or varying
objOffset.masterStartPixelDownStatic = objOffset.halfSearchRangeDown
objOffset.masterStartPixelDownStatic = objOffset.halfSearchRangeDown
objOffset.masterStartPixelAcrossStatic = objOffset.halfSearchRangeAcross
vD = np.random.randint(0, 10, size =objOffset.numberWindows, dtype=np.int32)
vD = np.random.randint(0, 10, size =objOffset.numberWindows, dtype=np.int32)
vA = np.random.randint(0, 1, size = objOffset.numberWindows, dtype=np.int32)
objOffset.setVaryingGrossOffset(vD, vA)
objOffset.checkPixelInImageRange()
#4 run ampcor
objOffset.runAmpcor()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,154 @@
#include "GDALImage.h"
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <cublas_v2.h>
#include "cudaError.h"
#include <errno.h>
#include <unistd.h>
/**
* \brief Constructor
*
* @param filename a std::string with the raster image file name
*/
GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useMmap)
: _useMmap(useMmap)
{
// open the file as dataset
_poDataset = (GDALDataset *) GDALOpen(filename.c_str(), GA_ReadOnly );
// if something is wrong, throw an exception
// GDAL reports the error message
if(!_poDataset)
throw;
// check the band info
int count = _poDataset->GetRasterCount();
if(band > count)
{
std::cout << "The desired band " << band << " is greated than " << count << " bands available";
throw;
}
// get the desired band
_poBand = _poDataset->GetRasterBand(band);
if(!_poBand)
throw;
// get the width(x), and height(y)
_width = _poBand->GetXSize();
_height = _poBand->GetYSize();
_dataType = _poBand->GetRasterDataType();
// determine the image type
_isComplex = GDALDataTypeIsComplex(_dataType);
// determine the pixel size in bytes
_pixelSize = GDALGetDataTypeSize(_dataType);
_bufferSize = 1024*1024*cacheSizeInGB;
// checking whether using memory map
if(_useMmap) {
char **papszOptions = NULL;
// if cacheSizeInGB = 0, use default
// else set the option
if(cacheSizeInGB > 0)
papszOptions = CSLSetNameValue( papszOptions,
"CACHE_SIZE",
std::to_string(_bufferSize).c_str());
// space between two lines
GIntBig pnLineSpace;
// set up the virtual mem buffer
_poBandVirtualMem = GDALGetVirtualMemAuto(
static_cast<GDALRasterBandH>(_poBand),
GF_Read,
&_pixelSize,
&pnLineSpace,
papszOptions);
// check it
if(!_poBandVirtualMem)
throw;
// get the starting pointer
_memPtr = CPLVirtualMemGetAddr(_poBandVirtualMem);
}
else { // use a buffer
checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
}
// make sure memPtr is not Null
if (!_memPtr)
throw;
// all done
}
/// load a tile of data h_tile x w_tile from CPU (mmap) to GPU
/// @param dArray pointer for array in device memory
/// @param h_offset Down/Height offset
/// @param w_offset Across/Width offset
/// @param h_tile Down/Height tile size
/// @param w_tile Across/Width tile size
/// @param stream CUDA stream for copying
void GDALImage::loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream)
{
size_t tileStartOffset = (h_offset*_width + w_offset)*_pixelSize;
char * startPtr = (char *)_memPtr ;
startPtr += tileStartOffset;
// @note
// We assume down/across directions as rows/cols. Therefore, SLC mmap and device array are both row major.
// cuBlas assumes both source and target arrays are column major.
// To use cublasSetMatrix, we need to switch w_tile/h_tile for rows/cols
// checkCudaErrors(cublasSetMatrixAsync(w_tile, h_tile, sizeof(float2), startPtr, width, dArray, w_tile, stream));
if (_useMmap)
checkCudaErrors(cudaMemcpy2DAsync(dArray, w_tile*_pixelSize, startPtr, _width*_pixelSize,
w_tile*_pixelSize, h_tile, cudaMemcpyHostToDevice,stream));
else {
// get the total tile size in bytes
size_t tileSize = h_tile*w_tile*_pixelSize;
// if the size is bigger than existing buffer, reallocate
if (tileSize > _bufferSize) {
// maybe we need to make it to fit the pagesize
_bufferSize = tileSize;
checkCudaErrors(cudaFree(_memPtr));
checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
}
// copy from file to buffer
CPLErr err = _poBand->RasterIO(GF_Read, //eRWFlag
w_offset, h_offset, //nXOff, nYOff
w_tile, h_tile, // nXSize, nYSize
_memPtr, // pData
w_tile*h_tile, 1, // nBufXSize, nBufYSize
_dataType, //eBufType
0, 0, //nPixelSpace, nLineSpace in pData
NULL //psExtraArg extra resampling callback
);
if(err != CE_None)
throw;
// copy from buffer to gpu
checkCudaErrors(cudaMemcpyAsync(dArray, _memPtr, tileSize, cudaMemcpyHostToDevice, stream));
}
}
GDALImage::~GDALImage()
{
// free the virtual memory
CPLVirtualMemFree(_poBandVirtualMem),
// free the GDAL Dataset, close the file
delete _poDataset;
}
// end of file

View File

@ -0,0 +1,79 @@
// -*- c++ -*-
/**
* \brief Class for an image described GDAL vrt
*
* only complex (pixelOffset=8) or real(pixelOffset=4) images are supported, such as SLC and single-precision TIFF
*/
#ifndef __GDALIMAGE_H
#define __GDALIMAGE_H
#include <cublas_v2.h>
#include <string>
#include <gdal/gdal_priv.h>
#include <gdal/cpl_conv.h>
class GDALImage{
public:
using size_t = std::size_t;
private:
size_t _fileSize;
int _height;
int _width;
// buffer pointer
void * _memPtr = NULL;
int _pixelSize; //in bytes
int _isComplex;
size_t _bufferSize;
int _useMmap;
GDALDataType _dataType;
CPLVirtualMem * _poBandVirtualMem = NULL;
GDALDataset * _poDataset = NULL;
GDALRasterBand * _poBand = NULL;
public:
GDALImage() = delete;
GDALImage(std::string fn, int band=1, int cacheSizeInGB=0, int useMmap=1);
void * getmemPtr()
{
return(_memPtr);
}
size_t getFileSize()
{
return (_fileSize);
}
size_t getHeight() {
return (_height);
}
size_t getWidth()
{
return (_width);
}
int getPixelSize()
{
return _pixelSize;
}
bool isComplex()
{
return _isComplex;
}
void loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream);
~GDALImage();
};
#endif //__GDALIMAGE_H

View File

@ -3,23 +3,24 @@ PROJECT = CUAMPCOR
LDFLAGS = -lcuda -lcudart -lcufft -lcublas
CXXFLAGS = -std=c++11 -fpermissive -fPIC -shared
NVCCFLAGS = -ccbin g++ -m64 \
-gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_60,code=sm_60 \
-Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \
-ftz=false -prec-div=true -prec-sqrt=true
CXX=g++
NVCC=nvcc
DEPS = cudaUtil.h cudaError.h cuArrays.h SlcImage.h cuAmpcorParameter.h
OBJS = SlcImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \
DEPS = cudaUtil.h cudaError.h cuArrays.h GDALImage.h cuAmpcorParameter.h
OBJS = GDALImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \
cuSincOverSampler.o cuDeramp.o cuOffset.o \
cuCorrNormalization.o cuAmpcorParameter.o cuCorrTimeDomain.o cuCorrFrequency.o \
cuAmpcorChunk.o cuAmpcorController.o cuEstimateStats.o
all: cuampcor
all: pyampcor
SlcImage.o: SlcImage.cu $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ SlcImage.cu
GDALImage.o: GDALImage.cu $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ GDALImage.cu
cuArrays.o: cuArrays.cu $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ cuArrays.cu
@ -45,8 +46,8 @@ cuOffset.o: cuOffset.cu $(DEPS)
cuCorrNormalization.o: cuCorrNormalization.cu $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrNormalization.cu
cuAmpcorParameter.o: cuAmpcorParameter.cu
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorParameter.cu
cuAmpcorParameter.o: cuAmpcorParameter.cu
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorParameter.cu
cuCorrTimeDomain.o: cuCorrTimeDomain.cu $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrTimeDomain.cu
@ -54,8 +55,8 @@ cuCorrTimeDomain.o: cuCorrTimeDomain.cu $(DEPS)
cuCorrFrequency.o: cuCorrFrequency.cu $(DEPS) cuCorrFrequency.h
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrFrequency.cu
cuAmpcorChunk.o: cuAmpcorChunk.cu cuAmpcorUtil.h $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorChunk.cu
cuAmpcorChunk.o: cuAmpcorChunk.cu cuAmpcorUtil.h $(DEPS)
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorChunk.cu
cuAmpcorController.o: cuAmpcorController.cu
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorController.cu
@ -64,8 +65,8 @@ cuEstimateStats.o: cuEstimateStats.cu
$(NVCC) $(NVCCFLAGS) -c -o $@ cuEstimateStats.cu
cuampcor: $(OBJS)
pyampcor: $(OBJS)
rm -f PyCuAmpcor.cpp && python3 setup.py build_ext --inplace
clean:
rm -rf *.o *so build *~ PyCuAmpcor.cpp ctest *.dat
rm -rf *.o *so build *~ PyCuAmpcor.cpp ctest *.dat

View File

@ -1,6 +1,6 @@
#
#
# PYX file to control Python module interface to underlying CUDA-Ampcor code
#
#
from libcpp.string cimport string
import numpy as np
cimport numpy as np
@ -9,13 +9,13 @@ cimport numpy as np
cdef extern from "cudaUtil.h":
int gpuDeviceInit(int)
void gpuDeviceList()
int gpuGetMaxGflopsDeviceId()
int gpuGetMaxGflopsDeviceId()
def listGPU():
gpuDeviceList()
def findGPU():
return gpuGetMaxGflopsDeviceId()
return gpuGetMaxGflopsDeviceId()
def setGPU(int id):
return gpuDeviceInit(id)
@ -24,90 +24,92 @@ def setGPU(int id):
cdef extern from "cuAmpcorParameter.h":
cdef cppclass cuAmpcorParameter:
cuAmpcorParameter() except +
int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain
int deviceID ## Targeted GPU device ID: use -1 to auto select
int nStreams ## Number of streams to asynchonize data transfers and compute kernels
int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain
int deviceID ## Targeted GPU device ID: use -1 to auto select
int nStreams ## Number of streams to asynchonize data transfers and compute kernels
int derampMethod ## Method for deramping 0=None, 1=average, 2=phase gradient
## chip or window size for raw data
int windowSizeHeightRaw ## Template window height (original size)
int windowSizeWidthRaw ## Template window width (original size)
int searchWindowSizeHeightRaw ## Search window height (original size)
int windowSizeWidthRaw ## Template window width (original size)
int searchWindowSizeHeightRaw ## Search window height (original size)
int searchWindowSizeWidthRaw ## Search window width (orignal size)
int halfSearchRangeDownRaw ##(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
int halfSearchRangeDownRaw ##(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
int halfSearchRangeAcrossRaw ##(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
## chip or window size after oversampling
int rawDataOversamplingFactor ## Raw data overampling factor (from original size to oversampled size)
## strides between chips/windows
## strides between chips/windows
int skipSampleDownRaw ## Skip size between neighboring windows in Down direction (original size)
int skipSampleAcrossRaw ## Skip size between neighboring windows in across direction (original size)
## Zoom in region near location of max correlation
int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions)
int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions)
int oversamplingFactor ## Oversampling factor for interpolating correlation surface
int oversamplingMethod
float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data
int oversamplingMethod
float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data
##master image
string masterImageName ## master SLC image name
int imageDataType1 ## master image data type, 2=cfloat=complex=float2 1=float
int masterImageHeight ## master image height
int masterImageHeight ## master image height
int masterImageWidth ## master image width
##slave image
string slaveImageName ## slave SLC image name
int imageDataType2 ## slave image data type, 2=cfloat=complex=float2 1=float
int slaveImageHeight ## slave image height
int slaveImageHeight ## slave image height
int slaveImageWidth ## slave image width
int mmapSizeInGB ## mmap buffer size in unit of Gigabytes
int useMmap ## whether to use mmap
int mmapSizeInGB ## mmap buffer size in unit of Gigabytes (if not mmmap, the buffer size)
## total number of chips/windows
int numberWindowDown ## number of total windows (down)
int numberWindowAcross ## number of total windows (across)
int numberWindows ## numberWindowDown*numberWindowAcross
## number of chips/windows in a batch/chunk
int numberWindowDownInChunk ## number of windows processed in a chunk (down)
int numberWindowAcrossInChunk ## number of windows processed in a chunk (across)
int numberWindowsInChunk ## numberWindowDownInChunk*numberWindowAcrossInChunk
int numberChunkDown ## number of chunks (down)
int numberChunkAcross ## number of chunks (across)
int numberChunks
int numberChunks
int *masterStartPixelDown ## master starting pixels for each window (down)
int *masterStartPixelDown ## master starting pixels for each window (down)
int *masterStartPixelAcross ## master starting pixels for each window (across)
int *slaveStartPixelDown ## slave starting pixels for each window (down)
int *slaveStartPixelAcross ## slave starting pixels for each window (across)
int *slaveStartPixelDown ## slave starting pixels for each window (down)
int *slaveStartPixelAcross ## slave starting pixels for each window (across)
int *grossOffsetDown ## Gross offsets between master and slave windows (down) : slaveStartPixel - masterStartPixel
int *grossOffsetAcross ## Gross offsets between master and slave windows (across)
int *grossOffsetAcross ## Gross offsets between master and slave windows (across)
int grossOffsetDown0 ## constant gross offset (down)
int grossOffsetAcross0 ## constant gross offset (across)
int masterStartPixelDown0 ## the first pixel of master image (down), be adjusted with margins and gross offset
int masterStartPixelDown0 ## the first pixel of master image (down), be adjusted with margins and gross offset
int masterStartPixelAcross0 ## the first pixel of master image (across)
int *masterChunkStartPixelDown ## array of starting pixels for all master chunks (down)
int *masterChunkStartPixelAcross ## array of starting pixels for all master chunks (across)
int *slaveChunkStartPixelDown ## array of starting pixels for all slave chunks (down)
int *slaveChunkStartPixelAcross ## array of starting pixels for all slave chunks (across)
int *masterChunkHeight ## array of heights of all master chunks, required when loading chunk to GPU
int *masterChunkHeight ## array of heights of all master chunks, required when loading chunk to GPU
int *masterChunkWidth ## array of width of all master chunks
int *slaveChunkHeight ## array of width of all master chunks
int *slaveChunkWidth ## array of width of all slave chunks
int maxMasterChunkHeight ## max height for all master/slave chunks, determine the size of reading cache in GPU
int maxMasterChunkWidth ## max width for all master chunks, determine the size of reading cache in GPU
int maxMasterChunkHeight ## max height for all master/slave chunks, determine the size of reading cache in GPU
int maxMasterChunkWidth ## max width for all master chunks, determine the size of reading cache in GPU
int maxSlaveChunkHeight
int maxSlaveChunkWidth
string grossOffsetImageName
string offsetImageName ## Output Offset fields filename
string grossOffsetImageName
string offsetImageName ## Output Offset fields filename
string snrImageName ## Output SNR filename
void setStartPixels(int*, int*, int*, int*)
void setStartPixels(int, int, int*, int*)
void setStartPixels(int, int, int, int)
void checkPixelInImageRange() ## check whether
string covImageName ## Output COV filename
void setStartPixels(int*, int*, int*, int*)
void setStartPixels(int, int, int*, int*)
void setStartPixels(int, int, int, int)
void checkPixelInImageRange() ## check whether
void setupParameters() ## Process other parameters after Python Inpu
cdef extern from "cuAmpcorController.h":
@ -115,34 +117,40 @@ cdef extern from "cuAmpcorController.h":
cuAmpcorController() except +
cuAmpcorParameter *param
void runAmpcor()
cdef class PyCuAmpcor(object):
'''
Python interface for cuda Ampcor
Python interface for cuda Ampcor
'''
cdef cuAmpcorController c_cuAmpcor
def __cinit__(self):
return
return
@property
def algorithm(self):
return self.c_cuAmpcor.param.algorithm
return self.c_cuAmpcor.param.algorithm
@algorithm.setter
def algorithm(self, int a):
self.c_cuAmpcor.param.algorithm = a
@property
def deviceID(self):
return self.c_cuAmpcor.param.deviceID
return self.c_cuAmpcor.param.deviceID
@deviceID.setter
def deviceID(self, int a):
self.c_cuAmpcor.param.deviceID = a
@property
def nStreams(self):
return self.c_cuAmpcor.param.nStreams
return self.c_cuAmpcor.param.nStreams
@nStreams.setter
def nStreams(self, int a):
self.c_cuAmpcor.param.nStreams = a
@property
@property
def useMmap(self):
return self.c_cuAmpcor.param.useMmap
@useMmap.setter
def useMmap(self, int a):
self.c_cuAmpcor.param.useMmap = a
@property
def mmapSize(self):
return self.c_cuAmpcor.param.mmapSizeInGB
@mmapSize.setter
@ -150,19 +158,19 @@ cdef class PyCuAmpcor(object):
self.c_cuAmpcor.param.mmapSizeInGB = a
@property
def derampMethod(self):
return self.c_cuAmpcor.param.derampMethod
return self.c_cuAmpcor.param.derampMethod
@derampMethod.setter
def derampMethod(self, int a):
self.c_cuAmpcor.param.derampMethod = a
@property
def windowSizeHeight(self):
return self.c_cuAmpcor.param.windowSizeHeightRaw
return self.c_cuAmpcor.param.windowSizeHeightRaw
@windowSizeHeight.setter
def windowSizeHeight(self, int a):
self.c_cuAmpcor.param.windowSizeHeightRaw = a
@property
def windowSizeWidth(self):
return self.c_cuAmpcor.param.windowSizeWidthRaw
return self.c_cuAmpcor.param.windowSizeWidthRaw
@windowSizeWidth.setter
def windowSizeWidth(self, int a):
self.c_cuAmpcor.param.windowSizeWidthRaw = a
@ -200,7 +208,7 @@ cdef class PyCuAmpcor(object):
@skipSampleAcross.setter
def skipSampleAcross(self, int a):
self.c_cuAmpcor.param.skipSampleAcrossRaw = a
@property
def rawDataOversamplingFactor(self):
"""anti-aliasing oversampling factor"""
@ -229,7 +237,7 @@ cdef class PyCuAmpcor(object):
@corrSufaceOverSamplingMethod.setter
def corrSufaceOverSamplingMethod(self, int a):
self.c_cuAmpcor.param.oversamplingMethod = a
@property
@property
def masterImageName(self):
return self.c_cuAmpcor.param.masterImageName
@masterImageName.setter
@ -241,12 +249,12 @@ cdef class PyCuAmpcor(object):
@slaveImageName.setter
def slaveImageName(self, str a):
self.c_cuAmpcor.param.slaveImageName = <string> a.encode()
@property
@property
def masterImageName(self):
return self.c_cuAmpcor.param.masterImageName
@masterImageName.setter
def masterImageName(self, str a):
self.c_cuAmpcor.param.masterImageName = <string> a.encode()
self.c_cuAmpcor.param.masterImageName = <string> a.encode()
@property
def masterImageHeight(self):
return self.c_cuAmpcor.param.masterImageHeight
@ -258,7 +266,7 @@ cdef class PyCuAmpcor(object):
return self.c_cuAmpcor.param.masterImageWidth
@masterImageWidth.setter
def masterImageWidth(self, int a):
self.c_cuAmpcor.param.masterImageWidth=a
self.c_cuAmpcor.param.masterImageWidth=a
@property
def slaveImageHeight(self):
return self.c_cuAmpcor.param.slaveImageHeight
@ -270,8 +278,8 @@ cdef class PyCuAmpcor(object):
return self.c_cuAmpcor.param.slaveImageWidth
@slaveImageWidth.setter
def slaveImageWidth(self, int a):
self.c_cuAmpcor.param.slaveImageWidth=a
self.c_cuAmpcor.param.slaveImageWidth=a
@property
def numberWindowDown(self):
return self.c_cuAmpcor.param.numberWindowDown
@ -283,11 +291,11 @@ cdef class PyCuAmpcor(object):
return self.c_cuAmpcor.param.numberWindowAcross
@numberWindowAcross.setter
def numberWindowAcross(self, int a):
self.c_cuAmpcor.param.numberWindowAcross = a
self.c_cuAmpcor.param.numberWindowAcross = a
@property
def numberWindows(self):
return self.c_cuAmpcor.param.numberWindows
@property
def numberWindowDownInChunk(self):
return self.c_cuAmpcor.param.numberWindowDownInChunk
@ -299,7 +307,7 @@ cdef class PyCuAmpcor(object):
return self.c_cuAmpcor.param.numberWindowAcrossInChunk
@numberWindowAcrossInChunk.setter
def numberWindowAcrossInChunk(self, int a):
self.c_cuAmpcor.param.numberWindowAcrossInChunk = a
self.c_cuAmpcor.param.numberWindowAcrossInChunk = a
@property
def numberChunkDown(self):
return self.c_cuAmpcor.param.numberChunkDown
@ -309,9 +317,9 @@ cdef class PyCuAmpcor(object):
@property
def numberChunks(self):
return self.c_cuAmpcor.param.numberChunks
## gross offets
## gross offets
@property
def grossOffsetImageName(self):
return self.c_cuAmpcor.param.grossOffsetImageName
@ -324,13 +332,21 @@ cdef class PyCuAmpcor(object):
@offsetImageName.setter
def offsetImageName(self, str a):
self.c_cuAmpcor.param.offsetImageName = <string> a.encode()
@property
def snrImageName(self):
return self.c_cuAmpcor.param.snrImageName
@snrImageName.setter
def snrImageName(self, str a):
self.c_cuAmpcor.param.snrImageName = <string> a.encode()
@property
def covImageName(self):
return self.c_cuAmpcor.param.covImageName
@covImageName.setter
def covImageName(self, str a):
self.c_cuAmpcor.param.covImageName = <string> a.encode()
@property
def masterStartPixelDownStatic(self):
return self.c_cuAmpcor.param.masterStartPixelDown0
@ -342,20 +358,20 @@ cdef class PyCuAmpcor(object):
return self.c_cuAmpcor.param.masterStartPixelAcross0
@masterStartPixelAcrossStatic.setter
def masterStartPixelAcrossStatic(self, int a):
self.c_cuAmpcor.param.masterStartPixelAcross0 = a
self.c_cuAmpcor.param.masterStartPixelAcross0 = a
@property
def grossOffsetDownStatic(self):
return self.c_cuAmpcor.param.grossOffsetDown0
@grossOffsetDownStatic.setter
def grossOffsetDownStatic(self, int a):
self.c_cuAmpcor.param.grossOffsetDown0 =a
self.c_cuAmpcor.param.grossOffsetDown0 =a
@property
def grossOffsetAcrossStatic(self):
return self.c_cuAmpcor.param.grossOffsetAcross0
@grossOffsetAcrossStatic.setter
def grossOffsetAcrossStatic(self, int a):
self.c_cuAmpcor.param.grossOffsetAcross0 =a
self.c_cuAmpcor.param.grossOffsetAcross0 =a
@property
def grossOffsetDownDynamic(self):
cdef int *c_data
@ -366,12 +382,12 @@ cdef class PyCuAmpcor(object):
return p_data
@grossOffsetDownDynamic.setter
def grossOffsetDownDynamic (self, np.ndarray[np.int32_t,ndim=1,mode="c"] pa):
cdef int *c_data
cdef int *c_data
cdef int *p_data
c_data = self.c_cuAmpcor.param.grossOffsetDown
p_data = <int *> pa.data
for i in range (self.numberWindows):
c_data[i] = p_data[i]
c_data[i] = p_data[i]
@property
def grossOffsetAcrossDynamic(self):
cdef int *c_data
@ -382,23 +398,23 @@ cdef class PyCuAmpcor(object):
return p_data
@grossOffsetAcrossDynamic.setter
def grossOffsetAcrossDynamic (self, np.ndarray[np.int32_t,ndim=1,mode="c"] pa):
cdef int *c_data
cdef int *c_data
cdef int *p_data
c_data = self.c_cuAmpcor.param.grossOffsetAcross
p_data = <int *> pa.data
for i in range (self.numberWindows):
c_data[i] = p_data[i]
c_data[i] = p_data[i]
return
def setConstantGrossOffset(self, int goDown, int goAcross):
"""
"""
constant gross offsets
param goDown gross offset in azimuth direction
param goAcross gross offset in range direction
"""
self.c_cuAmpcor.param.setStartPixels(<int>self.masterStartPixelDownStatic, <int>self.masterStartPixelAcrossStatic, goDown, goAcross)
def setVaryingGrossOffset(self, np.ndarray[np.int32_t,ndim=1,mode="c"] vD, np.ndarray[np.int32_t,ndim=1,mode="c"] vA):
"""
varying gross offsets for each window
@ -411,21 +427,21 @@ cdef class PyCuAmpcor(object):
def checkPixelInImageRange(self):
""" check whether each window is with image range """
self.c_cuAmpcor.param.checkPixelInImageRange()
def setupParams(self):
"""
set up constant parameters and allocate array parameters (offsets)
should be called after number of windows is set and before setting varying gross offsets
"""
self.c_cuAmpcor.param.setupParameters()
self.c_cuAmpcor.param.setupParameters()
def runAmpcor(self):
""" main procedure to run ampcor """
self.c_cuAmpcor.runAmpcor()

View File

@ -6,7 +6,7 @@ package = envPyCuAmpcor['PACKAGE']
project = envPyCuAmpcor['PROJECT']
build = envPyCuAmpcor['PRJ_LIB_DIR']
install = envPyCuAmpcor['PRJ_SCONS_INSTALL'] + '/' + package + '/' + project
listFiles = ['SlcImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu',
listFiles = ['GDALImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu',
'cuArraysPadding.cu', 'cuOverSampler.cu',
'cuSincOverSampler.cu', 'cuDeramp.cu',
'cuOffset.cu', 'cuCorrNormalization.cu',

View File

@ -2,58 +2,74 @@
#include "cuAmpcorUtil.h"
/**
* Run ampcor process for a batch of images (a chunk)
* Run ampcor process for a batch of images (a chunk)
* @param[in] idxDown_ index oIDIVUP(i,j) ((i+j-1)/j)f the chunk along Down/Azimuth direction
* @param[in] idxAcross_ index of the chunk along Across/Range direction
*/
*/
void cuAmpcorChunk::run(int idxDown_, int idxAcross_)
{
// set chunk index
setIndex(idxDown_, idxAcross_);
// load master image chunk
loadMasterChunk();
// load master image chunk
loadMasterChunk();
//std::cout << "load master chunk ok\n";
cuArraysAbs(c_masterBatchRaw, r_masterBatchRaw, stream);
cuArraysSubtractMean(r_masterBatchRaw, stream);
// load slave image chunk
loadSlaveChunk();
cuArraysAbs(c_slaveBatchRaw, r_slaveBatchRaw, stream);
//std::cout << "load slave chunk ok\n";
//cross correlation for none-oversampled data
if(param->algorithm == 0) {
cuCorrFreqDomain->execute(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw);
}
else {
cuCorrTimeDomain(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw, stream); //time domain cross correlation
}
}
cuCorrNormalize(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw, stream);
//find the maximum location of none-oversampled correlation
cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, stream);
// Estimate SNR (Minyan Zhong)
//std::cout<< "flag stats 1" <<std::endl;
//cuArraysCopyExtractCorr(r_corrBatchRaw, r_corrBatchZoomIn, i_corrBatchZoomInValid, offsetInit, stream);
// find the maximum location of none-oversampled correlation
// 41 x 41, if halfsearchrange=20
//cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, stream);
cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, r_maxval, stream);
//std::cout<< "flag stats 2" <<std::endl;
//cuArraysSumCorr(r_corrBatchZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount, stream);
offsetInit->outputToFile("offsetInit1", stream);
//std::cout<< "flag stats 3" <<std::endl;
//cuEstimateSnr(r_corrBatchSum, i_corrBatchValidCount, r_maxval, r_snrValue, stream);
// Estimation of statistics
// Author: Minyan Zhong
// Extraction of correlation surface around the peak
cuArraysCopyExtractCorr(r_corrBatchRaw, r_corrBatchRawZoomIn, i_corrBatchZoomInValid, offsetInit, stream);
//
cudaDeviceSynchronize();
// debug: output the intermediate results
r_maxval->outputToFile("r_maxval",stream);
r_corrBatchRaw->outputToFile("r_corrBatchRaw",stream);
r_corrBatchRawZoomIn->outputToFile("r_corrBatchRawZoomIn",stream);
i_corrBatchZoomInValid->outputToFile("i_corrBatchZoomInValid",stream);
// Summation of correlation and data point values
cuArraysSumCorr(r_corrBatchRawZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount, stream);
// SNR
cuEstimateSnr(r_corrBatchSum, i_corrBatchValidCount, r_maxval, r_snrValue, stream);
// Variance
// cuEstimateVariance(r_corrBatchRaw, offsetInit, r_maxval, r_covValue, stream);
// Using the approximate estimation to adjust slave image (half search window size becomes only 4 pixels)
//offsetInit->debuginfo(stream);
// determine the starting pixel to extract slave images around the max location
cuDetermineSlaveExtractOffset(offsetInit,
cuDetermineSlaveExtractOffset(offsetInit,
param->halfSearchRangeDownRaw, // old range
param->halfSearchRangeAcrossRaw,
param->halfSearchRangeAcrossRaw,
param->halfZoomWindowSizeRaw, // new range
param->halfZoomWindowSizeRaw,
stream);
@ -63,58 +79,67 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_)
masterBatchOverSampler->execute(c_masterBatchRaw, c_masterBatchOverSampled, param->derampMethod);
cuArraysAbs(c_masterBatchOverSampled, r_masterBatchOverSampled, stream);
cuArraysSubtractMean(r_masterBatchOverSampled, stream);
// extract slave and oversample
cuArraysCopyExtract(c_slaveBatchRaw, c_slaveBatchZoomIn, offsetInit, stream);
slaveBatchOverSampler->execute(c_slaveBatchZoomIn, c_slaveBatchOverSampled, param->derampMethod);
cuArraysAbs(c_slaveBatchOverSampled, r_slaveBatchOverSampled, stream);
// correlate oversampled images
if(param->algorithm == 0) {
cuCorrFreqDomain_OverSampled->execute(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn);
}
else {
cuCorrTimeDomain(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream);
}
cuCorrTimeDomain(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream);
}
cuCorrNormalize(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream);
//std::cout << "debug correlation oversample\n";
//std::cout << r_masterBatchOverSampled->height << " " << r_masterBatchOverSampled->width << "\n";
//std::cout << r_slaveBatchOverSampled->height << " " << r_slaveBatchOverSampled->width << "\n";
//std::cout << r_corrBatchZoomIn->height << " " << r_corrBatchZoomIn->width << "\n";
// oversample the correlation surface
// oversample the correlation surface
cuArraysCopyExtract(r_corrBatchZoomIn, r_corrBatchZoomInAdjust, make_int2(0,0), stream);
//std::cout << "debug oversampling " << r_corrBatchZoomInAdjust << " " << r_corrBatchZoomInOverSampled << "\n";
if(param->oversamplingMethod) {
corrSincOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
}
else {
corrOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
corrOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
}
//find the max again
cuArraysMaxloc2D(r_corrBatchZoomInOverSampled, offsetZoomIn, corrMaxValue, stream);
// determine the final offset from non-oversampled (pixel) and oversampled (sub-pixel)
cuSubPixelOffset(offsetInit, offsetZoomIn, offsetFinal,
param->oversamplingFactor, param->rawDataOversamplingFactor,
// determine the final offset from non-oversampled (pixel) and oversampled (sub-pixel)
cuSubPixelOffset(offsetInit, offsetZoomIn, offsetFinal,
param->oversamplingFactor, param->rawDataOversamplingFactor,
param->halfSearchRangeDownRaw, param->halfSearchRangeAcrossRaw,
param->halfZoomWindowSizeRaw, param->halfZoomWindowSizeRaw,
stream);
//offsetInit->debuginfo(stream);
//offsetZoomIn->debuginfo(stream);
//offsetFinal->debuginfo(stream);
//offsetFinal->debuginfo(stream);
// Do insertion.
// Offsetfields.
cuArraysCopyInsert(offsetFinal, offsetImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
// Minyan Zhong
//cuArraysCopyInsert(corrMaxValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
//cuArraysCopyInsert(r_snrValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
// Debugging matrix.
cuArraysCopyInsert(r_corrBatchSum, floatImage1, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
cuArraysCopyInsert(i_corrBatchValidCount, intImage1, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
// Old: save max correlation coefficients.
//cuArraysCopyInsert(corrMaxValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
// New: save SNR
cuArraysCopyInsert(r_snrValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
// Variance.
cuArraysCopyInsert(r_covValue, covImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
}
void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
@ -122,14 +147,14 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
idxChunkDown = idxDown_;
idxChunkAcross = idxAcross_;
idxChunk = idxChunkAcross + idxChunkDown*param->numberChunkAcross;
if(idxChunkDown == param->numberChunkDown -1) {
nWindowsDown = param->numberWindowDown - param->numberWindowDownInChunk*(param->numberChunkDown -1);
}
else {
nWindowsDown = param->numberWindowDownInChunk;
}
if(idxChunkAcross == param->numberChunkAcross -1) {
nWindowsAcross = param->numberWindowAcross - param->numberWindowAcrossInChunk*(param->numberChunkAcross -1);
}
@ -137,20 +162,20 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
nWindowsAcross = param->numberWindowAcrossInChunk;
}
//std::cout << "DEBUG setIndex" << idxChunk << " " << nWindowsDown << " " << nWindowsAcross << "\n";
}
/// obtain the starting pixels for each chip
/// @param[in] oStartPixel
/// @param[in] oStartPixel
///
void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff)
{
for(int i=0; i<param->numberWindowDownInChunk; ++i) {
int iDown = i;
if(i>=nWindowsDown) iDown = nWindowsDown-1;
if(i>=nWindowsDown) iDown = nWindowsDown-1;
for(int j=0; j<param->numberWindowAcrossInChunk; ++j){
int iAcross = j;
if(j>=nWindowsAcross) iAcross = nWindowsAcross-1;
if(j>=nWindowsAcross) iAcross = nWindowsAcross-1;
int idxInChunk = iDown*param->numberWindowAcrossInChunk+iAcross;
int idxInAll = (iDown+idxChunkDown*param->numberWindowDownInChunk)*param->numberWindowAcross
+ idxChunkAcross*param->numberWindowAcrossInChunk+iAcross;
@ -158,108 +183,179 @@ void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel,
//fprintf(stderr, "relative offset %d %d %d %d\n", i, j, rStartPixel[idxInChunk], diff);
}
}
}
}
void cuAmpcorChunk::loadMasterChunk()
{
//load a chunk from mmap to gpu
int startD = param->masterChunkStartPixelDown[idxChunk];
int startA = param->masterChunkStartPixelAcross[idxChunk];
int height = param->masterChunkHeight[idxChunk];
int width = param->masterChunkWidth[idxChunk];
masterImage->loadToDevice(c_masterChunkRaw->devData, startD, startA, height, width, stream);
std::cout << "debug load master: " << startD << " " << startA << " " << height << " " << width << "\n";
//copy the chunk to a batch of images format (nImages, height, width)
//use cpu for some simple math
// we first load the whole chunk of image from cpu to a gpu buffer c(r)_masterChunkRaw
// then copy to a batch of windows with (nImages, height, width) (leading dimension on the right)
// get the chunk size to be loaded to gpu
int startD = param->masterChunkStartPixelDown[idxChunk]; //start pixel down (along height)
int startA = param->masterChunkStartPixelAcross[idxChunk]; // start pixel across (along width)
int height = param->masterChunkHeight[idxChunk]; // number of pixels along height
int width = param->masterChunkWidth[idxChunk]; // number of pixels along width
//use cpu to compute the starting positions for each window
getRelativeOffset(ChunkOffsetDown->hostData, param->masterStartPixelDown, param->masterChunkStartPixelDown[idxChunk]);
// copy the positions to gpu
ChunkOffsetDown->copyToDevice(stream);
// same for the across direction
getRelativeOffset(ChunkOffsetAcross->hostData, param->masterStartPixelAcross, param->masterChunkStartPixelAcross[idxChunk]);
ChunkOffsetAcross->copyToDevice(stream);
// if derampMethod = 0 (no deramp), take amplitudes; otherwise, copy complex data
if(param->derampMethod == 0) {
cuArraysCopyToBatchAbsWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk],
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
// check whether the image is complex (e.g., SLC) or real( e.g. TIFF)
if(masterImage->isComplex())
{
// allocate a gpu buffer to load data from cpu/file
// try allocate/deallocate the buffer on the fly to save gpu memory 07/09/19
c_masterChunkRaw = new cuArrays<float2> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
c_masterChunkRaw->allocate();
// load the data from cpu
masterImage->loadToDevice((void *)c_masterChunkRaw->devData, startD, startA, height, width, stream);
//std::cout << "debug load master: " << startD << " " << startA << " " << height << " " << width << "\n";
//copy the chunk to a batch format (nImages, height, width)
// if derampMethod = 0 (no deramp), take amplitudes; otherwise, copy complex data
if(param->derampMethod == 0) {
cuArraysCopyToBatchAbsWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk],
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
else {
cuArraysCopyToBatchWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk],
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
// deallocate the gpu buffer
delete c_masterChunkRaw;
}
// if the image is real
else {
cuArraysCopyToBatchWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk],
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
r_masterChunkRaw = new cuArrays<float> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
r_masterChunkRaw->allocate();
// load the data from cpu
masterImage->loadToDevice((void *)r_masterChunkRaw->devData, startD, startA, height, width, stream);
// copy the chunk (real) to a batch format (complex)
cuArraysCopyToBatchWithOffsetR2C(r_masterChunkRaw, param->masterChunkWidth[idxChunk],
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
// deallocate the gpu buffer
delete r_masterChunkRaw;
}
}
void cuAmpcorChunk::loadSlaveChunk()
{
//load a chunk from mmap to gpu
slaveImage->loadToDevice(c_slaveChunkRaw->devData,
param->slaveChunkStartPixelDown[idxChunk],
param->slaveChunkStartPixelAcross[idxChunk],
param->slaveChunkHeight[idxChunk],
param->slaveChunkWidth[idxChunk],
stream);
//copy to a batch format (nImages, height, width)
getRelativeOffset(ChunkOffsetDown->hostData, param->slaveStartPixelDown, param->slaveChunkStartPixelDown[idxChunk]);
ChunkOffsetDown->copyToDevice(stream);
getRelativeOffset(ChunkOffsetAcross->hostData, param->slaveStartPixelAcross, param->slaveChunkStartPixelAcross[idxChunk]);
ChunkOffsetAcross->copyToDevice(stream);
if(param->derampMethod == 0) {
cuArraysCopyToBatchAbsWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
else
if(slaveImage->isComplex())
{
cuArraysCopyToBatchWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
c_slaveChunkRaw = new cuArrays<float2> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
c_slaveChunkRaw->allocate();
//load a chunk from mmap to gpu
slaveImage->loadToDevice(c_slaveChunkRaw->devData,
param->slaveChunkStartPixelDown[idxChunk],
param->slaveChunkStartPixelAcross[idxChunk],
param->slaveChunkHeight[idxChunk],
param->slaveChunkWidth[idxChunk],
stream);
if(param->derampMethod == 0) {
cuArraysCopyToBatchAbsWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
else {
cuArraysCopyToBatchWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
}
delete c_slaveChunkRaw;
}
else { //real image
//allocate the gpu buffer
r_slaveChunkRaw = new cuArrays<float> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
r_slaveChunkRaw->allocate();
//load a chunk from mmap to gpu
slaveImage->loadToDevice(r_slaveChunkRaw->devData,
param->slaveChunkStartPixelDown[idxChunk],
param->slaveChunkStartPixelAcross[idxChunk],
param->slaveChunkHeight[idxChunk],
param->slaveChunkWidth[idxChunk],
stream);
// convert to the batch format
cuArraysCopyToBatchWithOffsetR2C(r_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
delete r_slaveChunkRaw;
}
}
cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_,
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_, cudaStream_t stream_)
cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *master_, GDALImage *slave_,
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_, cuArrays<float3> *covImage_, cuArrays<int> *intImage1_, cuArrays<float> *floatImage1_, cudaStream_t stream_)
{
param = param_;
masterImage = master_;
slaveImage = slave_;
slaveImage = slave_;
offsetImage = offsetImage_;
snrImage = snrImage_;
covImage = covImage_;
intImage1 = intImage1_;
floatImage1 = floatImage1_;
stream = stream_;
std::cout << "debug Chunk creator " << param->maxMasterChunkHeight << " " << param->maxMasterChunkWidth << "\n";
c_masterChunkRaw = new cuArrays<float2> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
c_masterChunkRaw->allocate();
c_slaveChunkRaw = new cuArrays<float2> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
c_slaveChunkRaw->allocate();
// std::cout << "debug Chunk creator " << param->maxMasterChunkHeight << " " << param->maxMasterChunkWidth << "\n";
// try allocate/deallocate on the fly to save gpu memory 07/09/19
// c_masterChunkRaw = new cuArrays<float2> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
// c_masterChunkRaw->allocate();
// c_slaveChunkRaw = new cuArrays<float2> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
// c_slaveChunkRaw->allocate();
ChunkOffsetDown = new cuArrays<int> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
ChunkOffsetDown->allocate();
ChunkOffsetDown->allocateHost();
ChunkOffsetAcross = new cuArrays<int> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
ChunkOffsetAcross->allocate();
ChunkOffsetAcross->allocateHost();
c_masterBatchRaw = new cuArrays<float2> (
param->windowSizeHeightRaw, param->windowSizeWidthRaw,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
c_masterBatchRaw->allocate();
c_slaveBatchRaw = new cuArrays<float2> (
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
c_slaveBatchRaw->allocate();
r_masterBatchRaw = new cuArrays<float> (
param->windowSizeHeightRaw, param->windowSizeWidthRaw,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_masterBatchRaw->allocate();
r_slaveBatchRaw = new cuArrays<float> (
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_slaveBatchRaw->allocate();
c_slaveBatchZoomIn = new cuArrays<float2> (
param->searchWindowSizeHeightRawZoomIn, param->searchWindowSizeWidthRawZoomIn,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
c_slaveBatchZoomIn->allocate();
c_masterBatchOverSampled = new cuArrays<float2> (
param->windowSizeHeight, param->windowSizeWidth,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
@ -269,7 +365,7 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm
param->searchWindowSizeHeight, param->searchWindowSizeWidth,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
c_slaveBatchOverSampled->allocate();
r_masterBatchOverSampled = new cuArrays<float> (
param->windowSizeHeight, param->windowSizeWidth,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
@ -279,66 +375,114 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm
param->searchWindowSizeHeight, param->searchWindowSizeWidth,
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_slaveBatchOverSampled->allocate();
masterBatchOverSampler = new cuOverSamplerC2C(
c_masterBatchRaw->height, c_masterBatchRaw->width, //orignal size
c_masterBatchOverSampled->height, c_masterBatchOverSampled->width, //oversampled size
c_masterBatchOverSampled->height, c_masterBatchOverSampled->width, //oversampled size
c_masterBatchRaw->count, stream);
slaveBatchOverSampler = new cuOverSamplerC2C(c_slaveBatchZoomIn->height, c_slaveBatchZoomIn->width,
slaveBatchOverSampler = new cuOverSamplerC2C(c_slaveBatchZoomIn->height, c_slaveBatchZoomIn->width,
c_slaveBatchOverSampled->height, c_slaveBatchOverSampled->width, c_slaveBatchRaw->count, stream);
r_corrBatchRaw = new cuArrays<float> (
param->searchWindowSizeHeightRaw-param->windowSizeHeightRaw+1,
param->searchWindowSizeWidthRaw-param->windowSizeWidthRaw+1,
param->numberWindowDownInChunk,
param->searchWindowSizeHeightRaw-param->windowSizeHeightRaw+1,
param->searchWindowSizeWidthRaw-param->windowSizeWidthRaw+1,
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
r_corrBatchRaw->allocate();
r_corrBatchZoomIn = new cuArrays<float> (
param->searchWindowSizeHeight - param->windowSizeHeight+1,
param->searchWindowSizeWidth - param->windowSizeWidth+1,
param->numberWindowDownInChunk,
param->searchWindowSizeHeight - param->windowSizeHeight+1,
param->searchWindowSizeWidth - param->windowSizeWidth+1,
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
r_corrBatchZoomIn->allocate();
r_corrBatchZoomInAdjust = new cuArrays<float> (
param->searchWindowSizeHeight - param->windowSizeHeight,
param->searchWindowSizeWidth - param->windowSizeWidth,
param->numberWindowDownInChunk,
param->searchWindowSizeHeight - param->windowSizeHeight,
param->searchWindowSizeWidth - param->windowSizeWidth,
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
r_corrBatchZoomInAdjust->allocate();
r_corrBatchZoomInOverSampled = new cuArrays<float> (
param->zoomWindowSize * param->oversamplingFactor,
param->zoomWindowSize * param->oversamplingFactor,
param->numberWindowDownInChunk,
param->zoomWindowSize * param->oversamplingFactor,
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
r_corrBatchZoomInOverSampled->allocate();
offsetInit = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
offsetInit->allocate();
offsetZoomIn = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
offsetZoomIn->allocate();
offsetFinal = new cuArrays<float2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
offsetFinal->allocate();
corrMaxValue = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
corrMaxValue->allocate();
// new arrays due to snr estimation
std::cout<< "corrRawZoomInHeight: " << param->corrRawZoomInHeight << "\n";
std::cout<< "corrRawZoomInWidth: " << param->corrRawZoomInWidth << "\n";
r_corrBatchRawZoomIn = new cuArrays<float> (
param->corrRawZoomInHeight,
param->corrRawZoomInWidth,
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
r_corrBatchRawZoomIn->allocate();
i_corrBatchZoomInValid = new cuArrays<int> (
param->corrRawZoomInHeight,
param->corrRawZoomInWidth,
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
i_corrBatchZoomInValid->allocate();
r_corrBatchSum = new cuArrays<float> (
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
r_corrBatchSum->allocate();
i_corrBatchValidCount = new cuArrays<int> (
param->numberWindowDownInChunk,
param->numberWindowAcrossInChunk);
i_corrBatchValidCount->allocate();
i_maxloc = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
i_maxloc->allocate();
r_maxval = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_maxval->allocate();
r_snrValue = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_snrValue->allocate();
r_covValue = new cuArrays<float3> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
r_covValue->allocate();
// end of new arrays
if(param->oversamplingMethod) {
corrSincOverSampler = new cuSincOverSamplerR2R(param->zoomWindowSize, param->oversamplingFactor, stream);
}
else {
else {
corrOverSampler= new cuOverSamplerR2R(param->zoomWindowSize, param->zoomWindowSize,
(param->zoomWindowSize)*param->oversamplingFactor,
(param->zoomWindowSize)*param->oversamplingFactor,
(param->zoomWindowSize)*param->oversamplingFactor,
param->numberWindowDownInChunk*param->numberWindowAcrossInChunk,
stream);
}
param->numberWindowDownInChunk*param->numberWindowAcrossInChunk,
stream);
}
if(param->algorithm == 0) {
cuCorrFreqDomain = new cuFreqCorrelator(
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
@ -347,10 +491,10 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm
cuCorrFreqDomain_OverSampled = new cuFreqCorrelator(
param->searchWindowSizeHeight, param->searchWindowSizeWidth,
param->numberWindowDownInChunk*param->numberWindowAcrossInChunk,
stream);
stream);
}
debugmsg("all objects in chunk are created ...\n");

View File

@ -1,4 +1,4 @@
/*
/*
* cuAmpcorChunk.h
* Purpose: a group of chips processed at the same time
*/
@ -6,7 +6,7 @@
#ifndef __CUAMPCORCHUNK_H
#define __CUAMPCORCHUNK_H
#include "SlcImage.h"
#include "GDALImage.h"
#include "cuArrays.h"
#include "cuAmpcorParameter.h"
#include "cuOverSampler.h"
@ -22,64 +22,81 @@ private:
int nWindowsAcross;
int devId;
cudaStream_t stream;
SlcImage *masterImage;
SlcImage *slaveImage;
cudaStream_t stream;
GDALImage *masterImage;
GDALImage *slaveImage;
cuAmpcorParameter *param;
cuArrays<float2> *offsetImage;
cuArrays<float> *snrImage;
cuArrays<float2> * c_masterChunkRaw, * c_slaveChunkRaw;
cuArrays<float3> *covImage;
// added for test
cuArrays<int> *intImage1;
cuArrays<float> *floatImage1;
// gpu buffer
cuArrays<float2> * c_masterChunkRaw, * c_slaveChunkRaw;
cuArrays<float> * r_masterChunkRaw, * r_slaveChunkRaw;
// gpu windows raw data
cuArrays<float2> * c_masterBatchRaw, * c_slaveBatchRaw, * c_slaveBatchZoomIn;
cuArrays<float> * r_masterBatchRaw, * r_slaveBatchRaw;
cuArrays<float2> * c_masterBatchOverSampled, * c_slaveBatchOverSampled;
// gpu windows oversampled data
cuArrays<float2> * c_masterBatchOverSampled, * c_slaveBatchOverSampled;
cuArrays<float> * r_masterBatchOverSampled, * r_slaveBatchOverSampled;
cuArrays<float> * r_corrBatchRaw, * r_corrBatchZoomIn, * r_corrBatchZoomInOverSampled, * r_corrBatchZoomInAdjust;
cuArrays<int> *ChunkOffsetDown, *ChunkOffsetAcross;
cuOverSamplerC2C *masterBatchOverSampler, *slaveBatchOverSampler;
cuOverSamplerR2R *corrOverSampler;
cuSincOverSamplerR2R *corrSincOverSampler;
cuSincOverSamplerR2R *corrSincOverSampler;
//for frequency domain
cuFreqCorrelator *cuCorrFreqDomain, *cuCorrFreqDomain_OverSampled;
cuArrays<int2> *offsetInit;
cuArrays<int2> *offsetZoomIn;
cuArrays<float2> *offsetFinal;
cuArrays<float> *corrMaxValue;
//SNR estimation
cuArrays<float> *r_corrBatchRawZoomIn;
cuArrays<float> *r_corrBatchSum;
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
cuArrays<float> *r_snrValue;
//corr statistics
cuArrays<int2> *i_maxloc;
cuArrays<float> *r_maxval;
cuArrays<float> *r_corrBatchSum;
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
cuArrays<float> *corrMaxValue;
cuArrays<float> *r_snrValue;
// Varince estimation.
cuArrays<float3> *r_covValue;
public:
cuAmpcorChunk() {}
//cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_);
void setIndex(int idxDown_, int idxAcross_);
cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *master_, GDALImage *slave_, cuArrays<float2> *offsetImage_,
cuArrays<float> *snrImage_, cuArrays<float3> *covImage_, cuArrays<int> *intImage1_, cuArrays<float> *floatImage1_, cudaStream_t stream_);
cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_, cuArrays<float2> *offsetImage_,
cuArrays<float> *snrImage_, cudaStream_t stream_);
void loadMasterChunk();
void loadSlaveChunk();
void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff);
~cuAmpcorChunk();
void run(int, int);
~cuAmpcorChunk();
void run(int, int);
};
#endif
#endif

View File

@ -1,113 +1,142 @@
// Implementation of cuAmpcorController
#include "cuAmpcorController.h"
#include "SlcImage.h"
#include "GDALImage.h"
#include "cuArrays.h"
#include "cudaUtil.h"
#include "cuAmpcorChunk.h"
#include "cuAmpcorUtil.h"
#include <iostream>
cuAmpcorController::cuAmpcorController() { param = new cuAmpcorParameter();}
cuAmpcorController::~cuAmpcorController() { delete param; }
cuAmpcorController::cuAmpcorController() { param = new cuAmpcorParameter();}
cuAmpcorController::~cuAmpcorController() { delete param; }
void cuAmpcorController::runAmpcor() {
void cuAmpcorController::runAmpcor() {
// set the gpu id
param->deviceID = gpuDeviceInit(param->deviceID);
SlcImage *masterImage;
SlcImage *slaveImage;
// initialize the gdal driver
GDALAllRegister();
// master and slave images; use band=1 as default
// TODO: selecting band
GDALImage *masterImage = new GDALImage(param->masterImageName, 1, param->mmapSizeInGB);
GDALImage *slaveImage = new GDALImage(param->slaveImageName, 1, param->mmapSizeInGB);
cuArrays<float2> *offsetImage, *offsetImageRun;
cuArrays<float> *snrImage, *snrImageRun;
// cuArrays<float> *floatImage;
// cuArrays<int> *intImage;
cuArrays<float3> *covImage, *covImageRun;
// For debugging.
cuArrays<int> *intImage1;
cuArrays<float> *floatImage1;
int nWindowsDownRun = param->numberChunkDown * param->numberWindowDownInChunk;
int nWindowsAcrossRun = param->numberChunkAcross * param->numberWindowAcrossInChunk;
masterImage = new SlcImage(param->masterImageName, param->masterImageHeight, param->masterImageWidth, param->mmapSizeInGB);
slaveImage = new SlcImage(param->slaveImageName, param->slaveImageHeight, param->slaveImageWidth, param->mmapSizeInGB);
int nWindowsDownRun = param->numberChunkDown*param->numberWindowDownInChunk;
int nWindowsAcrossRun = param->numberChunkAcross*param->numberWindowAcrossInChunk;
std::cout << "Debug " << nWindowsDownRun << " " << param->numberWindowDown << "\n";
offsetImageRun = new cuArrays<float2>(nWindowsDownRun, nWindowsAcrossRun);
snrImageRun = new cuArrays<float>(nWindowsDownRun, nWindowsAcrossRun);
offsetImageRun->allocate();
snrImageRun = new cuArrays<float>(nWindowsDownRun, nWindowsAcrossRun);
snrImageRun->allocate();
covImageRun = new cuArrays<float3>(nWindowsDownRun, nWindowsAcrossRun);
covImageRun->allocate();
// intImage 1 and floatImage 1 are added for debugging issues
intImage1 = new cuArrays<int>(nWindowsDownRun, nWindowsAcrossRun);
intImage1->allocate();
floatImage1 = new cuArrays<float>(nWindowsDownRun, nWindowsAcrossRun);
floatImage1->allocate();
// Offsetfields.
offsetImage = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross);
snrImage = new cuArrays<float>(param->numberWindowDown, param->numberWindowAcross);
offsetImage->allocate();
// SNR.
snrImage = new cuArrays<float>(param->numberWindowDown, param->numberWindowAcross);
snrImage->allocate();
// Minyan Zhong
// floatImage = new cuArrays<float>(param->numberWindowDown, param->numberWindowAcross);
// intImage = new cuArrays<int>(param->numberWindowDown, param->numberWindowAcross);
// Variance.
covImage = new cuArrays<float3>(param->numberWindowDown, param->numberWindowAcross);
covImage->allocate();
// floatImage->allocate();
// intImage->allocate();
//
cudaStream_t streams[param->nStreams];
cuAmpcorChunk *chunk[param->nStreams];
for(int ist=0; ist<param->nStreams; ist++)
for(int ist=0; ist<param->nStreams; ist++)
{
cudaStreamCreate(&streams[ist]);
chunk[ist]= new cuAmpcorChunk(param, masterImage, slaveImage, offsetImageRun, snrImageRun, streams[ist]);
chunk[ist]= new cuAmpcorChunk(param, masterImage, slaveImage, offsetImageRun, snrImageRun, covImageRun, intImage1, floatImage1, streams[ist]);
}
int nChunksDown = param->numberChunkDown;
int nChunksAcross = param->numberChunkAcross;
int nChunksAcross = param->numberChunkAcross;
std::cout << "Total number of windows (azimuth x range): " <<param->numberWindowDown << " x " << param->numberWindowAcross << std::endl;
std::cout << "to be processed in the number of chunks: " <<nChunksDown << " x " << nChunksAcross << std::endl;
for(int i = 60; i<nChunksDown; i++)
for(int i = 0; i<nChunksDown; i++)
{
std::cout << "Processing chunk (" << i <<", x" << ")" << std::endl;
std::cout << "Processing chunk (" << i <<", x" << ")" << std::endl;
for(int j=0; j<nChunksAcross; j+=param->nStreams)
{
//std::cout << "Processing chunk(" << i <<", " << j <<")" << std::endl;
for(int ist = 0; ist<param->nStreams; ist++)
{
{
if(j+ist < nChunksAcross) {
chunk[ist]->run(i, j+ist);
}
}
}
}
}
cudaDeviceSynchronize();
// Do extraction.
cuArraysCopyExtract(offsetImageRun, offsetImage, make_int2(0,0), streams[0]);
cuArraysCopyExtract(snrImageRun, snrImage, make_int2(0,0), streams[0]);
cuArraysCopyExtract(snrImageRun, snrImage, make_int2(0,0), streams[0]);
cuArraysCopyExtract(covImageRun, covImage, make_int2(0,0), streams[0]);
offsetImage->outputToFile(param->offsetImageName, streams[0]);
snrImage->outputToFile(param->snrImageName, streams[0]);
covImage->outputToFile(param->covImageName, streams[0]);
// Minyan Zhong
// floatImage->allocate();
// intImage->allocate();
//
// Output debugging arrays.
intImage1->outputToFile("intImage1", streams[0]);
floatImage1->outputToFile("floatImage1", streams[0]);
outputGrossOffsets();
// Delete arrays.
delete offsetImage;
delete snrImage;
delete covImage;
delete intImage1;
delete floatImage1;
delete offsetImageRun;
delete snrImageRun;
delete covImageRun;
for (int ist=0; ist<param->nStreams; ist++)
delete chunk[ist];
delete masterImage;
delete slaveImage;
}
delete slaveImage;
}
void cuAmpcorController::outputGrossOffsets()
{
cuArrays<float2> *grossOffsets = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross);
grossOffsets->allocateHost();
for(int i=0; i< param->numberWindows; i++)
grossOffsets->hostData[i] = make_float2(param->grossOffsetDown[i], param->grossOffsetAcross[i]);
grossOffsets->outputHostToFile(param->grossOffsetImageName);
@ -176,7 +205,7 @@ void cuAmpcorController::setGrossOffsets(int *in, int size) {
param->grossOffsets = (int *)malloc(size*sizeof(int));
mempcpy(param->grossOffsets, in, size*sizeof(int));
fprintf(stderr, "copy grossOffsets %d\n", size);
}
}
void cuAmpcorController::setOffsetImageName(std::string s) { param->offsetImageName = s; }
void cuAmpcorController::setSNRImageName(std::string s) { param->snrImageName = s; }
//void cuAmpcorController::setMargin(int n) { param->margin = n; }

View File

@ -1,6 +1,6 @@
/**
* cuAmpcorParameter.cu
* Input parameters for ampcor
* Input parameters for ampcor
*/
#include "cuAmpcorParameter.h"
@ -11,17 +11,19 @@
#endif
///
/// Constructor for cuAmpcorParameter class
/// Constructor for cuAmpcorParameter class
/// also sets the default/initial values of various parameters
///
cuAmpcorParameter::cuAmpcorParameter()
{
algorithm = 0; //0 freq; 1 time
deviceID = 0;
nStreams = 1;
// default settings
// will be changed if they are set by python scripts
algorithm = 0; //0 freq; 1 time
deviceID = 0;
nStreams = 1;
derampMethod = 1;
windowSizeWidthRaw = 64;
windowSizeHeightRaw = 64;
halfSearchRangeDownRaw = 20;
@ -31,9 +33,9 @@ cuAmpcorParameter::cuAmpcorParameter()
skipSampleDownRaw = 64;
rawDataOversamplingFactor = 2;
zoomWindowSize = 8;
oversamplingFactor = 16;
oversamplingMethod = 0;
oversamplingFactor = 16;
oversamplingMethod = 0;
masterImageName = "master.slc";
masterImageWidth = 1000;
masterImageHeight = 1000;
@ -43,50 +45,58 @@ cuAmpcorParameter::cuAmpcorParameter()
offsetImageName = "DenseOffset.off";
grossOffsetImageName = "GrossOffset.off";
snrImageName = "snr.snr";
covImageName = "cov.cov";
numberWindowDown = 1;
numberWindowAcross = 1;
numberWindowAcross = 1;
numberWindowDownInChunk = 1;
numberWindowAcrossInChunk = 1 ;
numberWindowAcrossInChunk = 1 ;
masterStartPixelDown0 = 0;
masterStartPixelAcross0 = 0;
corrRawZoomInHeight = 17; // 8*2+1
corrRawZoomInWidth = 17;
useMmap = 1; // use mmap
mmapSizeInGB = 1;
}
/**
* To determine other process parameters after reading essential parameters from python
*/
* To determine other process parameters after reading essential parameters from python
*/
void cuAmpcorParameter::setupParameters()
{
{
zoomWindowSize *= rawDataOversamplingFactor; //8 * 2
halfZoomWindowSizeRaw = zoomWindowSize/(2*rawDataOversamplingFactor); // 8*2/(2*2) = 4
halfZoomWindowSizeRaw = zoomWindowSize/(2*rawDataOversamplingFactor); // 8*2/(2*2) = 4
windowSizeWidth = windowSizeWidthRaw*rawDataOversamplingFactor; //
windowSizeHeight = windowSizeHeightRaw*rawDataOversamplingFactor;
searchWindowSizeWidthRaw = windowSizeWidthRaw + 2*halfSearchRangeDownRaw;
searchWindowSizeWidthRaw = windowSizeWidthRaw + 2*halfSearchRangeDownRaw;
searchWindowSizeHeightRaw = windowSizeHeightRaw + 2*halfSearchRangeAcrossRaw;
searchWindowSizeWidthRawZoomIn = windowSizeWidthRaw + 2*halfZoomWindowSizeRaw;
searchWindowSizeHeightRawZoomIn = windowSizeHeightRaw + 2*halfZoomWindowSizeRaw;
searchWindowSizeWidth = searchWindowSizeWidthRawZoomIn*rawDataOversamplingFactor;
searchWindowSizeHeight = searchWindowSizeHeightRawZoomIn*rawDataOversamplingFactor;
numberWindows = numberWindowDown*numberWindowAcross;
if(numberWindows <=0) {
fprintf(stderr, "Incorrect number of windows! (%d, %d)\n", numberWindowDown, numberWindowAcross);
exit(EXIT_FAILURE);
}
}
// modified 02/12/2018 to include one more chunk
// e.g. numberWindowDownInChunk=102, numberWindowDown=10, results in numberChunkDown=11
// the last chunk will include 2 windows, numberWindowDownInChunkRun = 2.
// the last chunk will include 2 windows, numberWindowDownInChunkRun = 2.
numberChunkDown = IDIVUP(numberWindowDown, numberWindowDownInChunk);
numberChunkAcross = IDIVUP(numberWindowAcross, numberWindowAcrossInChunk);
numberChunks = numberChunkDown*numberChunkAcross;
allocateArrays();
allocateArrays();
}
@ -99,7 +109,7 @@ void cuAmpcorParameter::allocateArrays()
masterStartPixelAcross = (int *)malloc(arraySize);
slaveStartPixelDown = (int *)malloc(arraySize);
slaveStartPixelAcross = (int *)malloc(arraySize);
int arraySizeChunk = numberChunks*sizeof(int);
masterChunkStartPixelDown = (int *)malloc(arraySizeChunk);
masterChunkStartPixelAcross = (int *)malloc(arraySizeChunk);
@ -130,18 +140,18 @@ void cuAmpcorParameter::deallocateArrays()
}
/// Set starting pixels for master and slave windows from arrays
/// Set starting pixels for master and slave windows from arrays
/// set also gross offsets between master and slave windows
///
///
void cuAmpcorParameter::setStartPixels(int *mStartD, int *mStartA, int *gOffsetD, int *gOffsetA)
{
for(int i=0; i<numberWindows; i++)
{
masterStartPixelDown[i] = mStartD[i];
grossOffsetDown[i] = gOffsetD[i];
grossOffsetDown[i] = gOffsetD[i];
slaveStartPixelDown[i] = masterStartPixelDown[i] + grossOffsetDown[i] - halfSearchRangeDownRaw;
masterStartPixelAcross[i] = mStartA[i];
grossOffsetAcross[i] = gOffsetA[i];
grossOffsetAcross[i] = gOffsetA[i];
slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
}
setChunkStartPixels();
@ -160,7 +170,7 @@ void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int *gOffsetD,
masterStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw;
grossOffsetAcross[i] = gOffsetA[i];
slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
}
}
}
setChunkStartPixels();
}
@ -179,60 +189,60 @@ void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int gOffsetD, i
masterStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw;
grossOffsetAcross[i] = gOffsetA;
slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
}
}
}
setChunkStartPixels();
}
void cuAmpcorParameter::setChunkStartPixels()
{
maxMasterChunkHeight = 0;
maxMasterChunkWidth = 0;
maxSlaveChunkHeight = 0;
maxSlaveChunkWidth = 0;
for(int ichunk=0; ichunk <numberChunkDown; ichunk++)
{
for (int jchunk =0; jchunk<numberChunkAcross; jchunk++)
{
int idxChunk = ichunk*numberChunkAcross+jchunk;
int mChunkSD = masterImageHeight;
int mChunkSA = masterImageWidth;
int mChunkSD = masterImageHeight;
int mChunkSA = masterImageWidth;
int mChunkED = 0;
int mChunkEA = 0;
int sChunkSD = slaveImageHeight;
int sChunkSA = slaveImageWidth;
int sChunkED = 0;
int sChunkEA = 0;
// modified 02/12/2018
int numberWindowDownInChunkRun = numberWindowDownInChunk;
int numberWindowAcrossInChunkRun = numberWindowAcrossInChunk;
// modify the number of windows in last chunk
if(ichunk == numberChunkDown -1)
int numberWindowDownInChunkRun = numberWindowDownInChunk;
int numberWindowAcrossInChunkRun = numberWindowAcrossInChunk;
// modify the number of windows in last chunk
if(ichunk == numberChunkDown -1)
numberWindowDownInChunkRun = numberWindowDown - numberWindowDownInChunk*(numberChunkDown -1);
if(jchunk == numberChunkAcross -1)
if(jchunk == numberChunkAcross -1)
numberWindowAcrossInChunkRun = numberWindowAcross - numberWindowAcrossInChunk*(numberChunkAcross -1);
for(int i=0; i<numberWindowDownInChunkRun; i++)
for(int i=0; i<numberWindowDownInChunkRun; i++)
{
for(int j=0; j<numberWindowAcrossInChunkRun; j++)
{
{
int idxWindow = (ichunk*numberWindowDownInChunk+i)*numberWindowAcross + (jchunk*numberWindowAcrossInChunk+j);
int vpixel = masterStartPixelDown[idxWindow];
if(mChunkSD > vpixel) mChunkSD = vpixel;
if(mChunkSD > vpixel) mChunkSD = vpixel;
if(mChunkED < vpixel) mChunkED = vpixel;
vpixel = masterStartPixelAcross[idxWindow];
if(mChunkSA > vpixel) mChunkSA = vpixel;
if(mChunkSA > vpixel) mChunkSA = vpixel;
if(mChunkEA < vpixel) mChunkEA = vpixel;
vpixel = slaveStartPixelDown[idxWindow];
if(sChunkSD > vpixel) sChunkSD = vpixel;
if(sChunkSD > vpixel) sChunkSD = vpixel;
if(sChunkED < vpixel) sChunkED = vpixel;
vpixel = slaveStartPixelAcross[idxWindow];
if(sChunkSA > vpixel) sChunkSA = vpixel;
if(sChunkSA > vpixel) sChunkSA = vpixel;
if(sChunkEA < vpixel) sChunkEA = vpixel;
}
}
@ -261,58 +271,58 @@ void cuAmpcorParameter::checkPixelInImageRange()
for(int col = 0; col < numberWindowAcross; col++)
{
int i = row*numberWindowAcross + col;
if(masterStartPixelDown[i] <0)
if(masterStartPixelDown[i] <0)
{
fprintf(stderr, "Master Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, masterStartPixelDown[i]);
exit(EXIT_FAILURE); //or raise range error
}
if(masterStartPixelAcross[i] <0)
}
if(masterStartPixelAcross[i] <0)
{
fprintf(stderr, "Master Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, masterStartPixelAcross[i]);
exit(EXIT_FAILURE);
}
endPixel = masterStartPixelDown[i] + windowSizeHeightRaw;
if(endPixel >= masterImageHeight)
}
endPixel = masterStartPixelDown[i] + windowSizeHeightRaw;
if(endPixel >= masterImageHeight)
{
fprintf(stderr, "Master Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel);
exit(EXIT_FAILURE);
}
endPixel = masterStartPixelAcross[i] + windowSizeWidthRaw;
if(endPixel >= masterImageWidth)
}
endPixel = masterStartPixelAcross[i] + windowSizeWidthRaw;
if(endPixel >= masterImageWidth)
{
fprintf(stderr, "Master Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel);
exit(EXIT_FAILURE);
}
}
//slave
if(slaveStartPixelDown[i] <0)
if(slaveStartPixelDown[i] <0)
{
fprintf(stderr, "Slave Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, slaveStartPixelDown[i]);
exit(EXIT_FAILURE);
}
if(slaveStartPixelAcross[i] <0)
exit(EXIT_FAILURE);
}
if(slaveStartPixelAcross[i] <0)
{
fprintf(stderr, "Slave Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, slaveStartPixelAcross[i]);
exit(EXIT_FAILURE);
}
endPixel = slaveStartPixelDown[i] + searchWindowSizeHeightRaw;
if(endPixel >= slaveImageHeight)
}
endPixel = slaveStartPixelDown[i] + searchWindowSizeHeightRaw;
if(endPixel >= slaveImageHeight)
{
fprintf(stderr, "Slave Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel);
exit(EXIT_FAILURE);
}
endPixel = slaveStartPixelAcross[i] + searchWindowSizeWidthRaw;
if(endPixel >= slaveImageWidth)
}
endPixel = slaveStartPixelAcross[i] + searchWindowSizeWidthRaw;
if(endPixel >= slaveImageWidth)
{
fprintf(stderr, "Slave Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel);
exit(EXIT_FAILURE);
}
}
}
}
}
}
cuAmpcorParameter::~cuAmpcorParameter()
cuAmpcorParameter::~cuAmpcorParameter()
{
deallocateArrays();
}

View File

@ -1,7 +1,7 @@
/**
* cuAmpcorParameter.h
* Header file for Ampcor Parameter Class
*
*
* Author: Lijun Zhu @ Seismo Lab, Caltech
* March 2017
*/
@ -12,13 +12,13 @@
#include <string>
/// Class container for all parameters
///
///
/// @note
/// The dimension/direction names used are:
/// The dimension/direction names used are:
/// The inner-most dimension: x, row, height, down, azimuth, along the track.
/// The outer-most dimension: y, column, width, across, range, along the sight.
/// C/C++/Python use row-major indexing: a[i][j] -> a[i*WIDTH+j]
/// FORTRAN/BLAS/CUBLAS use column-major indexing: a[i][j]->a[i+j*LENGTH]
/// C/C++/Python use row-major indexing: a[i][j] -> a[i*WIDTH+j]
/// FORTRAN/BLAS/CUBLAS use column-major indexing: a[i][j]->a[i+j*LENGTH]
/// @note
/// Common procedures to use cuAmpcorParameter
@ -27,72 +27,74 @@
/// 3. Call setupParameters() to determine related parameters and allocate starting pixels for each window: param->setupParameters()
/// 4. Provide/set Master window starting pixel(s), and gross offset(s): param->setStartPixels(masterStartDown, masterStartAcross, grossOffsetDown, grossOffsetAcross)
/// 4a. Optionally, check the range of windows is within the SLC image range: param->checkPixelInImageRange()
/// Steps 1, 3, 4 are mandatory. If step 2 is missing, default values will be used
/// Steps 1, 3, 4 are mandatory. If step 2 is missing, default values will be used
class cuAmpcorParameter{
public:
int algorithm; /// Cross-correlation algorithm: 0=freq domain (default) 1=time domain
int deviceID; /// Targeted GPU device ID: use -1 to auto select
int nStreams; /// Number of streams to asynchonize data transfers and compute kernels
int algorithm; /// Cross-correlation algorithm: 0=freq domain (default) 1=time domain
int deviceID; /// Targeted GPU device ID: use -1 to auto select
int nStreams; /// Number of streams to asynchonize data transfers and compute kernels
int derampMethod; /// Method for deramping 0=None, 1=average, 2=phase gradient
// chip or window size for raw data
int windowSizeHeightRaw; /// Template window height (original size)
int windowSizeWidthRaw; /// Template window width (original size)
int searchWindowSizeHeightRaw; /// Search window height (original size)
int windowSizeWidthRaw; /// Template window width (original size)
int searchWindowSizeHeightRaw; /// Search window height (original size)
int searchWindowSizeWidthRaw; /// Search window width (orignal size)
int halfSearchRangeDownRaw; ///(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
int halfSearchRangeDownRaw; ///(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
int halfSearchRangeAcrossRaw; ///(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
// search range is (-halfSearchRangeRaw, halfSearchRangeRaw)
int searchWindowSizeHeightRawZoomIn;
int searchWindowSizeWidthRawZoomIn;
int corrRawZoomInHeight; // window to estimate snr
int corrRawZoomInWidth;
// chip or window size after oversampling
int rawDataOversamplingFactor; /// Raw data overampling factor (from original size to oversampled size)
int windowSizeHeight; /// Template window length (oversampled size)
int windowSizeWidth; /// Template window width (original size)
int searchWindowSizeHeight; /// Search window height (oversampled size)
int searchWindowSizeWidth; /// Search window width (oversampled size)
// strides between chips/windows
int searchWindowSizeHeight; /// Search window height (oversampled size)
int searchWindowSizeWidth; /// Search window width (oversampled size)
// strides between chips/windows
int skipSampleDownRaw; /// Skip size between neighboring windows in Down direction (original size)
int skipSampleAcrossRaw; /// Skip size between neighboring windows in across direction (original size)
//int skipSampleDown; /// Skip size between neighboring windows in Down direction (oversampled size)
//int skipSampleAcross; /// Skip size between neighboring windows in Across direction (oversampled size)
// Zoom in region near location of max correlation
int zoomWindowSize; /// Zoom-in window size in correlation surface (same for down and across directions)
int halfZoomWindowSizeRaw; /// = half of zoomWindowSize/rawDataOversamplingFactor
int zoomWindowSize; /// Zoom-in window size in correlation surface (same for down and across directions)
int halfZoomWindowSizeRaw; /// = half of zoomWindowSize/rawDataOversamplingFactor
int oversamplingFactor; /// Oversampling factor for interpolating correlation surface
int oversamplingMethod; /// 0 = fft (default) 1 = sinc
float thresholdSNR; /// Threshold of Signal noise ratio to remove noisy data
int oversamplingMethod; /// 0 = fft (default) 1 = sinc
float thresholdSNR; /// Threshold of Signal noise ratio to remove noisy data
//master image
std::string masterImageName; /// master SLC image name
int imageDataType1; /// master image data type, 2=cfloat=complex=float2 1=float
int masterImageHeight; /// master image height
int masterImageHeight; /// master image height
int masterImageWidth; /// master image width
//slave image
std::string slaveImageName; /// slave SLC image name
int imageDataType2; /// slave image data type, 2=cfloat=complex=float2 1=float
int slaveImageHeight; /// slave image height
int slaveImageHeight; /// slave image height
int slaveImageWidth; /// slave image width
// total number of chips/windows
int numberWindowDown; /// number of total windows (down)
int numberWindowAcross; /// number of total windows (across)
int numberWindows; /// numberWindowDown*numberWindowAcross
// number of chips/windows in a batch/chunk
int numberWindowDownInChunk; /// number of windows processed in a chunk (down)
int numberWindowAcrossInChunk; /// number of windows processed in a chunk (across)
@ -100,20 +102,21 @@ public:
int numberChunkDown; /// number of chunks (down)
int numberChunkAcross; /// number of chunks (across)
int numberChunks;
int mmapSizeInGB;
int useMmap; /// whether to use mmap 0=not 1=yes (default = 0)
int mmapSizeInGB; /// size for mmap buffer(useMmap=1) or a cpu memory buffer (useMmap=0)
int masterStartPixelDown0;
int masterStartPixelAcross0;
int *masterStartPixelDown; /// master starting pixels for each window (down)
int *masterStartPixelDown; /// master starting pixels for each window (down)
int *masterStartPixelAcross;/// master starting pixels for each window (across)
int *slaveStartPixelDown; /// slave starting pixels for each window (down)
int *slaveStartPixelAcross; /// slave starting pixels for each window (across)
int *slaveStartPixelDown; /// slave starting pixels for each window (down)
int *slaveStartPixelAcross; /// slave starting pixels for each window (across)
int grossOffsetDown0;
int grossOffsetAcross0;
int *grossOffsetDown; /// Gross offsets between master and slave windows (down) : slaveStartPixel - masterStartPixel
int *grossOffsetAcross; /// Gross offsets between master and slave windows (across)
int *grossOffsetAcross; /// Gross offsets between master and slave windows (across)
int *masterChunkStartPixelDown;
int *masterChunkStartPixelAcross;
int *slaveChunkStartPixelDown;
@ -124,18 +127,19 @@ public:
int *slaveChunkWidth;
int maxMasterChunkHeight, maxMasterChunkWidth;
int maxSlaveChunkHeight, maxSlaveChunkWidth;
std::string grossOffsetImageName;
std::string offsetImageName; /// Output Offset fields filename
std::string grossOffsetImageName;
std::string offsetImageName; /// Output Offset fields filename
std::string snrImageName; /// Output SNR filename
std::string covImageName;
cuAmpcorParameter(); /// Class constructor and default parameters setter
~cuAmpcorParameter(); /// Class descontructor
~cuAmpcorParameter(); /// Class descontructor
void allocateArrays(); /// Allocate various arrays after the number of Windows is given
void deallocateArrays(); /// Deallocate arrays on exit
/// Three methods to set master/slave starting pixels and gross offsets from input master start pixel(s) and gross offset(s)
/// 1 (int *, int *, int *, int *): varying master start pixels and gross offsets
/// 2 (int, int, int *, int *): fixed master start pixel (first window) and varying gross offsets
@ -144,7 +148,7 @@ public:
void setStartPixels(int, int, int*, int*);
void setStartPixels(int, int, int, int);
void setChunkStartPixels();
void checkPixelInImageRange(); /// check whether
void checkPixelInImageRange(); /// check whether
void setupParameters(); /// Process other parameters after Python Input
};

View File

@ -1,10 +1,10 @@
/*
/*
* cuAmpcorUtil.h
* header file to include the various routines for ampcor
* serves as an index
* serves as an index
*/
#ifndef __CUAMPCORUTIL_H
#define __CUMAPCORUTIL_H
@ -18,20 +18,27 @@
//in cuArraysCopy.cu: various utitlies for copy images file in gpu memory
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2, int strideH, int strideW, cudaStream_t stream);
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream);
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream);
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream);
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2, int strideH, int strideW, cudaStream_t stream);
// same routine name overloaded for different data type
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offset, cudaStream_t stream);
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream);
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream);
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int2 offset, cudaStream_t stream);
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream);
void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut, int2 offset, cudaStream_t stream);
void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int offsetX, int offsetY, cudaStream_t stream);
void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
void cuArraysCopyInversePadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream);
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream);
@ -46,8 +53,8 @@ void cuDerampMethod2(cuArrays<float2> *images, cudaStream_t stream);
void cpuDerampMethod3(cuArrays<float2> *images, cudaStream_t stream);
//in cuArraysPadding.cu: various utilities for oversampling padding
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream);
void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream);
@ -57,21 +64,21 @@ void cuArraysSubtractMean(cuArrays<float> *images, cudaStream_t stream);
void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
//in cuOffset.cu: utitilies for determining the max locaiton of cross correlations or the offset
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cudaStream_t stream);
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStream_t stream);
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cudaStream_t stream);
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStream_t stream);
void cuSubPixelOffset(cuArrays<int2> *offsetInit, cuArrays<int2> *offsetZoomIn, cuArrays<float2> *offsetFinal,
int OverSampleRatioZoomin, int OverSampleRatioRaw,
int xHalfRangeInit, int yHalfRangeInit, int xHalfRangeZoomIn, int yHalfRangeZoomIn,
cudaStream_t stream);
void cuDetermineInterpZone(cuArrays<int2> *maxloc, cuArrays<int2> *zoomInOffset, cuArrays<float> *corrOrig, cuArrays<float> *corrZoomIn, cudaStream_t stream);
void cuDetermineSlaveExtractOffset(cuArrays<int2> *maxLoc, int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream);
//in cuCorrTimeDomain.cu: cross correlation in time domain
void cuCorrTimeDomain(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
void cuCorrTimeDomain(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
//in cuCorrFrequency.cu: cross correlation in freq domain, also include fft correlatior class
void cuArraysElementMultiply(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysElementMultiply(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2> *image2, float coef, cudaStream_t stream);
@ -80,7 +87,11 @@ void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2>
void cuArraysCopyExtractCorr(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int> *imagesValid, cuArrays<int2> *maxloc, cudaStream_t stream);
// implemented in cuCorrNormalization.cu
void cuArraysSumCorr(cuArrays<float> *images, cuArrays<int> *imagesValid, cuArrays<float> *imagesSum, cuArrays<int> *imagesValidCount, cudaStream_t stream);
// implemented in cuEstimateStats.cu
void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuArrays<float> *maxval, cuArrays<float> *snrValue, cudaStream_t stream);
#endif
// implemented in cuEstimateStats.cu
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream);
#endif

View File

@ -154,8 +154,21 @@
file.write((char *)data, size*count*sizeof(float2));
file.close();
}
template<>
void cuArrays<float3>::outputToFile(std::string fn, cudaStream_t stream)
{
float *data;
data = (float *)malloc(size*count*sizeof(float3));
checkCudaErrors(cudaMemcpyAsync(data, devData, size*count*sizeof(float3), cudaMemcpyDeviceToHost, stream));
std::ofstream file;
file.open(fn.c_str(), std::ios_base::binary);
file.write((char *)data, size*count*sizeof(float3));
file.close();
}
template class cuArrays<float>;
template class cuArrays<float2>;
template class cuArrays<float3>;
template class cuArrays<int2>;
template class cuArrays<int>;

View File

@ -4,7 +4,7 @@
* Lijun Zhu @ Seismo Lab, Caltech
* v1.0 Jan 2017
*/
#include "cuArrays.h"
#include "cudaUtil.h"
#include "cudaError.h"
@ -16,14 +16,14 @@ inline __device__ float cuAbs(float2 a)
return sqrtf(a.x*a.x+a.y*a.y);
}*/
//copy a chunk into a series of chips
__global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX, const int inNY,
float2 *imageOut, const int outNX, const int outNY,
const int nImagesX, const int nImagesY,
// copy a chunk into a batch of chips for a given stride
__global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX, const int inNY,
float2 *imageOut, const int outNX, const int outNY,
const int nImagesX, const int nImagesY,
const int strideX, const int strideY)
{
int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(idxImage >=nImagesX*nImagesY|| outx >= outNX || outy >= outNY) return;
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
@ -33,8 +33,7 @@ __global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX
imageOut[idxOut] = imageIn[idxIn];
}
//tested
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
int strideH, int strideW, cudaStream_t stream)
{
const int nthreads = NTHREADS2D;
@ -48,12 +47,14 @@ void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
getLastCudaError("cuArraysCopyToBatch_kernel");
}
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to complex
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int *offsetX, const int *offsetY)
{
int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(idxImage>=nImages || outx >= outNX || outy >= outNY) return;
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
@ -61,11 +62,8 @@ __global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, cons
imageOut[idxOut] = imageIn[idxIn];
}
/// @param[in] image1 input image in a large chunk
/// @param[in] lda1 width of image 1
/// @param[out] image2 output image with a batch of small windows
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
// lda1 (inNY) is the leading dimension of image1, usually, its width
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream)
{
const int nthreads = 16;
@ -79,12 +77,13 @@ void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuA
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
}
__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to real(take amplitudes)
__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int *offsetX, const int *offsetY)
{
int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(idxImage>=nImages || outx >= outNX || outy >= outNY) return;
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
@ -92,7 +91,7 @@ __global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, c
imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0);
}
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream)
{
const int nthreads = 16;
@ -106,14 +105,42 @@ void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1,
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
}
// copy a chunk into a batch of chips for a set of offsets (varying strides), from real to complex(to real part)
__global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int *offsetX, const int *offsetY)
{
int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(idxImage>=nImages || outx >= outNX || outy >= outNY) return;
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
int idxIn = (offsetX[idxImage]+outx)*inNY + offsetY[idxImage] + outy;
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
}
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream)
{
const int nthreads = 16;
dim3 blockSize(nthreads, nthreads, 1);
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
//fprintf(stderr, "copy tile to batch, %d %d\n", lda1, image2->count);
cuArraysCopyToBatchWithOffsetR2C_kernel<<<gridSize,blockSize, 0 , stream>>> (
image1->devData, lda1,
image2->devData, image2->height, image2->width, image2->count,
offsetH, offsetW);
getLastCudaError("cuArraysCopyToBatchWithOffsetR2C_kernel");
}
//copy a chunk into a series of chips
__global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY,
const int nImagesX, const int nImagesY,
__global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY,
const int nImagesX, const int nImagesY,
const int strideX, const int strideY, const float factor)
{
int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(idxImage >=nImagesX*nImagesY|| outx >= outNX || outy >= outNY) return;
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
@ -121,17 +148,17 @@ __global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, co
int idxImageY = idxImage%nImagesY;
int idxIn = (idxImageX*strideX+outx)*inNY + idxImageY*strideY+outy;
imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor;
//printf( "%d\n", idxOut);
//printf( "%d\n", idxOut);
}
//tested
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
int strideH, int strideW, cudaStream_t stream)
{
const int nthreads = 16;
dim3 blockSize(nthreads, nthreads, 1);
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
float factor = 1.0f/image1->size; //the FFT factor
float factor = 1.0f/image1->size; //the FFT factor
cuArraysCopyC2R_kernel<<<gridSize,blockSize, 0 , stream>>> (
image1->devData, image1->height, image1->width,
image2->devData, image2->height, image2->width,
@ -141,22 +168,22 @@ void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
}
__global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, const int nImages,
float *imageOut, const int outNX, const int outNY, const int nImages,
const int2 *offsets)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
int idxImage = blockIdx.z;
int idxImage = blockIdx.z;
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y;
int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y;
imageOut[idxOut] = imageIn[idxIn];
}
}
/* copy a tile of images to another image, with starting pixels offsets
/* copy a tile of images to another image, with starting pixels offsets
* param[in] imageIn inut images
* param[out] imageOut output images of dimension nImages*outNX*outNY
*/
@ -166,24 +193,24 @@ void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
cuArraysCopyExtractVaryingOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
cuArraysCopyExtractVaryingOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offsets->devData);
getLastCudaError("cuArraysCopyExtract error");
}
__global__ void cuArraysCopyExtractVaryingOffset_C2C(const float2 *imageIn, const int inNX, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int2 *offsets)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
int idxImage = blockIdx.z;
int idxImage = blockIdx.z;
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y;
int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y;
imageOut[idxOut] = imageIn[idxIn];
}
}
@ -194,7 +221,7 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
cuArraysCopyExtractVaryingOffset_C2C<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
cuArraysCopyExtractVaryingOffset_C2C<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offsets->devData);
getLastCudaError("cuArraysCopyExtractC2C error");
@ -202,26 +229,29 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
// correlation surface extraction (Minyan Zhong)
__global__ void cuArraysCopyExtractVaryingOffsetCorr(const float *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, int *imageValid, const int nImages,
float *imageOut, const int outNX, const int outNY, int *imageValid, const int nImages,
const int2 *maxloc)
{
int idxImage = blockIdx.z;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
// One thread per out point. Find the coordinates within the current image.
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
// Find the correponding input.
int inx = outx + maxloc[idxImage].x - outNX/2;
int iny = outy + maxloc[idxImage].y - outNY/2;
if (outx < outNX && outy < outNY)
if (outx < outNX && outy < outNY)
{
// Find the location in full array.
int idxOut = ( blockIdx.z * outNX + outx ) * outNY + outy;
int idxIn = ( blockIdx.z * inNX + inx ) * inNY + iny;
if (inx>=0 && iny>=0 && inx<inNX && iny<inNY) {
imageOut[idxOut] = imageIn[idxIn];
imageValid[idxOut] = 1;
}
@ -255,21 +285,21 @@ void cuArraysCopyExtractCorr(cuArrays<float> *imagesIn, cuArrays<float> *imagesO
__global__ void cuArraysCopyExtractFixedOffset(const float *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, const int nImages,
float *imageOut, const int outNX, const int outNY, const int nImages,
const int offsetX, const int offsetY)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
{
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
imageOut[idxOut] = imageIn[idxIn];
}
}
/* copy a tile of images to another image, with starting pixels offsets
/* copy a tile of images to another image, with starting pixels offsets
* param[in] imageIn inut images
* param[out] imageOut output images of dimension nImages*outNX*outNY
*/
@ -279,23 +309,24 @@ void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
cuArraysCopyExtractFixedOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
cuArraysCopyExtractFixedOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
getLastCudaError("cuArraysCopyExtract error");
}
//
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int offsetX, const int offsetY)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
{
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
imageOut[idxOut] = imageIn[idxIn];
}
}
@ -311,27 +342,64 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
//imagesIn->debuginfo(stream);
//imagesOut->debuginfo(stream);
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
(imagesIn->devData, imagesIn->height, imagesIn->width,
(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
getLastCudaError("cuArraysCopyExtractC2C error");
}
//
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, const int nImages,
// float3
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const int inNX, const int inNY,
float3 *imageOut, const int outNX, const int outNY, const int nImages,
const int offsetX, const int offsetY)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
{
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
imageOut[idxOut] = imageIn[idxIn];
}
}
void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut, int2 offset, cudaStream_t stream)
{
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
const int nthreads = NTHREADS2D;
dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
//std::cout << "debug copyExtract" << imagesOut->width << imagesOut->height << "\n";
//imagesIn->debuginfo(stream);
//imagesOut->debuginfo(stream);
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
getLastCudaError("cuArraysCopyExtractFloat3 error");
}
//
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, const int nImages,
const int offsetX, const int offsetY)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
imageOut[idxOut] = imageIn[idxIn].x;
}
}
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream)
{
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
@ -339,16 +407,16 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut,
dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
cuArraysCopyExtract_C2R_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
(imagesIn->devData, imagesIn->height, imagesIn->width,
(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
getLastCudaError("cuArraysCopyExtractC2C error");
}
//
__global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY,
float2* imageOut, const int outNY, const int offsetX, const int offsetY)
{
int inx = threadIdx.x + blockDim.x*blockIdx.x;
int inx = threadIdx.x + blockDim.x*blockIdx.x;
int iny = threadIdx.y + blockDim.y*blockIdx.y;
if(inx < inNX && iny < inNY) {
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
@ -363,16 +431,40 @@ void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, i
const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads);
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
imageOut->devData, imageOut->width, offsetX, offsetY);
getLastCudaError("cuArraysCopyInsert error");
}
//
// float3
__global__ void cuArraysCopyInsert_kernel(const float3* imageIn, const int inNX, const int inNY,
float3* imageOut, const int outNY, const int offsetX, const int offsetY)
{
int inx = threadIdx.x + blockDim.x*blockIdx.x;
int iny = threadIdx.y + blockDim.y*blockIdx.y;
if(inx < inNX && iny < inNY) {
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
int idxIn = IDX2R(inx, iny, inNY);
imageOut[idxOut] = make_float3(imageIn[idxIn].x, imageIn[idxIn].y, imageIn[idxIn].z);
}
}
void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
{
const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads);
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
imageOut->devData, imageOut->width, offsetX, offsetY);
getLastCudaError("cuArraysCopyInsert error");
}
//
__global__ void cuArraysCopyInsert_kernel(const float* imageIn, const int inNX, const int inNY,
float* imageOut, const int outNY, const int offsetX, const int offsetY)
{
int inx = threadIdx.x + blockDim.x*blockIdx.x;
int inx = threadIdx.x + blockDim.x*blockIdx.x;
int iny = threadIdx.y + blockDim.y*blockIdx.y;
if(inx < inNX && iny < inNY) {
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
@ -387,18 +479,44 @@ void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int
const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads);
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
imageOut->devData, imageOut->width, offsetX, offsetY);
getLastCudaError("cuArraysCopyInsert Float error");
}
//
__global__ void cuArraysCopyInsert_kernel(const int* imageIn, const int inNX, const int inNY,
int* imageOut, const int outNY, const int offsetX, const int offsetY)
{
int inx = threadIdx.x + blockDim.x*blockIdx.x;
int iny = threadIdx.y + blockDim.y*blockIdx.y;
if(inx < inNX && iny < inNY) {
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
int idxIn = IDX2R(inx, iny, inNY);
imageOut[idxOut] = imageIn[idxIn];
}
}
void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
{
const int nthreads = 16;
dim3 threadsperblock(nthreads, nthreads);
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
imageOut->devData, imageOut->width, offsetX, offsetY);
getLastCudaError("cuArraysCopyInsert Integer error");
}
//
__global__ void cuArraysCopyInversePadded_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
float *imageOut, int outNX, int outNY, int sizeOut, int nImages)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
int idxImage = blockIdx.z;
@ -409,27 +527,27 @@ __global__ void cuArraysCopyInversePadded_kernel(float *imageIn, int inNX, int i
}
else
{ imageOut[idxOut] = 0.0f; }
}
}
}
void cuArraysCopyInversePadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream)
{
const int nthreads = 16;
int nImages = imageIn->count;
int nImages = imageIn->count;
dim3 blockSize(nthreads, nthreads,1);
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
cuArraysCopyInversePadded_kernel<<<gridSize, blockSize, 0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
getLastCudaError("cuArraysCopyInversePadded error");
getLastCudaError("cuArraysCopyInversePadded error");
}
__global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
float *imageOut, int outNX, int outNY, int sizeOut, int nImages)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
int idxImage = blockIdx.z;
@ -440,26 +558,26 @@ __global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY
}
else
{ imageOut[idxOut] = 0.0f; }
}
}
}
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream)
{
const int nthreads = 16;
int nImages = imageIn->count;
int nImages = imageIn->count;
dim3 blockSize(nthreads, nthreads,1);
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
cuArraysCopyPadded_R2R_kernel<<<gridSize, blockSize, 0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
getLastCudaError("cuArraysCopyPaddedR2R error");
getLastCudaError("cuArraysCopyPaddedR2R error");
}
__global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inNY, int sizeIn,
float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
int idxImage = blockIdx.z;
@ -468,31 +586,31 @@ __global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inN
int idxIn = IDX2R(outx, outy, inNY)+idxImage*sizeIn;
imageOut[idxOut] = imageIn[idxIn];
}
else{
imageOut[idxOut] = make_float2(0.0f, 0.0f);
else{
imageOut[idxOut] = make_float2(0.0f, 0.0f);
}
}
}
}
void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
{
const int nthreads = NTHREADS2D;
int nImages = imageIn->count;
int nImages = imageIn->count;
dim3 blockSize(nthreads, nthreads,1);
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
cuArraysCopyPadded_C2C_kernel<<<gridSize, blockSize, 0, stream>>>
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
getLastCudaError("cuArraysCopyInversePadded error");
getLastCudaError("cuArraysCopyInversePadded error");
}
__global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages)
{
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outx = threadIdx.x + blockDim.x*blockIdx.x;
int outy = threadIdx.y + blockDim.y*blockIdx.y;
if(outx < outNX && outy < outNY)
{
int idxImage = blockIdx.z;
@ -501,42 +619,42 @@ __global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY
int idxIn = IDX2R(outx, outy, inNY)+idxImage*sizeIn;
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
}
else{
imageOut[idxOut] = make_float2(0.0f, 0.0f);
else{
imageOut[idxOut] = make_float2(0.0f, 0.0f);
}
}
}
}
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
{
const int nthreads = NTHREADS2D;
int nImages = imageIn->count;
int nImages = imageIn->count;
dim3 blockSize(nthreads, nthreads,1);
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
cuArraysCopyPadded_R2C_kernel<<<gridSize, blockSize, 0, stream>>>
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
getLastCudaError("cuArraysCopyPadded error");
getLastCudaError("cuArraysCopyPadded error");
}
__global__ void cuArraysSetConstant_kernel(float *image, int size, float value)
{
int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idx = threadIdx.x + blockDim.x*blockIdx.x;
if(idx < size)
{
image[idx] = value;
}
}
}
void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t stream)
{
const int nthreads = 256;
int size = imageIn->getSize();
int size = imageIn->getSize();
cuArraysSetConstant_kernel<<<IDIVUP(size, nthreads), nthreads, 0, stream>>>
(imageIn->devData, imageIn->size, value);
getLastCudaError("cuArraysCopyPadded error");
getLastCudaError("cuArraysCopyPadded error");
}

View File

@ -195,7 +195,6 @@ __device__ float2 partialSums(const float v, volatile float* shmem, const int st
return make_float2(Sum, Sum2);
}
__forceinline__ __device__ int __mul(const int a, const int b) { return a*b; }
template<const int Nthreads2>
__global__ void cuCorrNormalize_kernel(
@ -232,7 +231,7 @@ __global__ void cuCorrNormalize_kernel(
templateSum += templateD[i];
}
templateSum = sumReduceBlock<Nthreads>(templateSum, shmem);
__syncthreads();
float templateSum2 = 0.0f;
for (int i = tid; i < templateSize; i += Nthreads)
@ -241,11 +240,12 @@ __global__ void cuCorrNormalize_kernel(
templateSum2 += t*t;
}
templateSum2 = sumReduceBlock<Nthreads>(templateSum2, shmem);
__syncthreads();
//if(tid ==0) printf("template sum %d %g %g \n", imageIdx, templateSum, templateSum2);
/*********/
shmem[tid] = shmem[tid + Nthreads] = 0.0f;
shmem[tid] = shmem[tid + Nthreads] = shmem[tid + 2*Nthreads] = 0.0f;
__syncthreads();
float imageSum = 0.0f;
@ -281,7 +281,7 @@ __global__ void cuCorrNormalize_kernel(
if (tid < resultNY)
{
const int ix = iaddr/imageNY;
const int addr = __mul(ix-templateNX, resultNY);
const int addr = (ix-templateNX)*resultNY;
//printf("test norm %d %d %d %d %f\n", tid, ix, addr, addr+tid, resultD[addr + tid]);

View File

@ -25,7 +25,7 @@ __global__ void cudaKernel_estimateSnr(const float* corrSum, const int* corrVali
float mean = (corrSum[idx] - maxval[idx] * maxval[idx]) / (corrValidCount[idx] - 1);
snrValue[idx] = maxval[idx] / mean;
snrValue[idx] = maxval[idx] * maxval[idx] / mean;
}
void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuArrays<float> *maxval, cuArrays<float> *snrValue, cudaStream_t stream)
@ -55,7 +55,7 @@ void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuAr
//for (int i=0; i<size; i++){
// std::cout<<corrSum->hostData[i]<<std::endl;
// std::cout<<corrValidCount->hostData[i]<<std::endl;
@ -68,3 +68,80 @@ void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuAr
getLastCudaError("cuda kernel estimate stats error\n");
}
template <const int BLOCKSIZE> // number of threads per block.
__global__ void cudaKernel_estimateVar(const float* corrBatchRaw, const int NX, const int NY, const int2* maxloc, const float* maxval, float3* covValue, const int size)
{
// Find image id.
int idxImage = threadIdx.x + blockDim.x*blockIdx.x;
if (idxImage >= size) return;
// Preparation.
int px = maxloc[idxImage].x;
int py = maxloc[idxImage].y;
float peak = maxval[idxImage];
// Check if maxval is on the margin.
if (px-1 < 0 || py-1 <0 || px + 1 >=NX || py+1 >=NY) {
covValue[idxImage] = make_float3(99.0, 99.0, 99.0);
}
else {
int offset = NX * NY * idxImage;
int idx00 = offset + (px - 1) * NY + py - 1;
int idx01 = offset + (px - 1) * NY + py ;
int idx02 = offset + (px - 1) * NY + py + 1;
int idx10 = offset + (px ) * NY + py - 1;
int idx11 = offset + (px ) * NY + py ;
int idx12 = offset + (px ) * NY + py + 1;
int idx20 = offset + (px + 1) * NY + py - 1;
int idx21 = offset + (px + 1) * NY + py ;
int idx22 = offset + (px + 1) * NY + py + 1;
float dxx = - ( corrBatchRaw[idx21] + corrBatchRaw[idx01] - 2*corrBatchRaw[idx11] ) * 0.5;
float dyy = - ( corrBatchRaw[idx12] + corrBatchRaw[idx10] - 2*corrBatchRaw[idx11] ) * 0.5;
float dxy = - ( corrBatchRaw[idx22] + corrBatchRaw[idx00] - corrBatchRaw[idx20] - corrBatchRaw[idx02] ) *0.25;
float n2 = fmaxf(1 - peak, 0.0);
int winSize = NX*NY;
dxx = dxx * winSize;
dyy = dyy * winSize;
dxy = dxy * winSize;
float n4 = n2*n2;
n2 = n2 * 2;
n4 = n4 * 0.5 * winSize;
float u = dxy * dxy - dxx * dyy;
float u2 = u*u;
if (fabsf(u) < 1e-2) {
covValue[idxImage] = make_float3(99.0, 99.0, 99.0);
}
else {
float cov_xx = (- n2 * u * dyy + n4 * ( dyy*dyy + dxy*dxy) ) / u2;
float cov_yy = (- n2 * u * dxx + n4 * ( dxx*dxx + dxy*dxy) ) / u2;
float cov_xy = ( n2 * u * dxy - n4 * ( dxx + dyy ) * dxy ) / u2;
covValue[idxImage] = make_float3(cov_xx, cov_yy, cov_xy);
}
}
}
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream)
{
int size = corrBatchRaw->count;
// One dimensional launching parameters to loop over every correlation surface.
cudaKernel_estimateVar<NTHREADS><<< IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>
(corrBatchRaw->devData, corrBatchRaw->height, corrBatchRaw->width, maxloc->devData, maxval->devData, covValue->devData, size);
getLastCudaError("cudaKernel_estimateVar error\n");
}

View File

@ -7,20 +7,21 @@
from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize
import os
os.environ["CC"] = "g++"
import numpy
setup( name = 'PyCuAmpcor',
ext_modules = cythonize(Extension(
"PyCuAmpcor",
sources=['PyCuAmpcor.pyx'],
include_dirs=['/usr/local/cuda/include'], # REPLACE WITH YOUR PATH TO YOUR CUDA LIBRARY HEADERS
include_dirs=['/usr/local/cuda/include', numpy.get_include()], # REPLACE WITH YOUR PATH TO YOUR CUDA LIBRARY HEADERS
extra_compile_args=['-fPIC','-fpermissive'],
extra_objects=['SlcImage.o','cuAmpcorChunk.o','cuAmpcorParameter.o','cuCorrFrequency.o',
extra_objects=['GDALImage.o','cuAmpcorChunk.o','cuAmpcorParameter.o','cuCorrFrequency.o',
'cuCorrNormalization.o','cuCorrTimeDomain.o','cuArraysCopy.o',
'cuArrays.o','cuArraysPadding.o','cuOffset.o','cuOverSampler.o',
'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o'],
extra_link_args=['-L/usr/local/cuda/lib64','-lcuda','-lcudart','-lcufft','-lcublas'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES
'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o','cuEstimateStats.o'],
extra_link_args=['-L/usr/local/cuda/lib64',
'-L/usr/lib64/nvidia',
'-lcuda','-lcudart','-lcufft','-lcublas','-lgdal'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES
language='c++'
)))

View File

@ -78,3 +78,6 @@ SConscript(rfi)
SConscript('PyCuAmpcor/SConscript')
SConscript('splitSpectrum/SConscript')
SConscript('alos2proc/SConscript')
if os.path.exists('geo_autoRIFT'):
SConscript('geo_autoRIFT/SConscript')

View File

@ -139,7 +139,7 @@ def main(iargs=None):
direction = np.sign(np.dot( np.cross(targxyz-mxyz, sxyz-mxyz), mvel))
Bperp[ii,jj] = direction*perp
Bperp.tofile(fid)
Bperp.tofile(fid)
fid.close()
####Write XML

View File

@ -33,6 +33,8 @@ def cmdLineParse(iargs = None):
def run(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
objSlc1 = isceobj.createSlcImage()
#right now imageSlc1 and 2 are just text files, need to open them as image
IU.copyAttributes(imageSlc1, objSlc1)
objSlc1.setAccessMode('read')
objSlc1.createImage()
@ -81,7 +83,6 @@ def run(imageSlc1, imageSlc2, resampName, azLooks, rgLooks):
def main(iargs=None):
inps = cmdLineParse(iargs)
img1 = isceobj.createImage()
@ -96,9 +97,8 @@ def main(iargs=None):
run(img1, img2, inps.prefix, inps.azlooks, inps.rglooks)
if __name__ == '__main__':
main()
'''
Main driver.
'''

View File

@ -52,7 +52,7 @@ def generate(env):
# default flags for the NVCC compiler
env['STATICNVCCFLAGS'] = ''
env['SHAREDNVCCFLAGS'] = ''
env['ENABLESHAREDNVCCFLAG'] = '-arch=sm_35 -shared -Xcompiler -fPIC'
env['ENABLESHAREDNVCCFLAG'] = '-shared -Xcompiler -fPIC'
# default NVCC commands
env['STATICNVCCCMD'] = '$NVCC -o $TARGET -c $NVCCFLAGS $STATICNVCCFLAGS $SOURCES'
@ -153,7 +153,7 @@ def generate(env):
#env.Append(LIBPATH=[cudaSDKPath + '/lib', cudaSDKPath + '/common/lib' + cudaSDKSubLibDir, cudaToolkitPath + '/lib'])
env.Append(CUDACPPPATH=[cudaToolkitPath + '/include'])
env.Append(CUDALIBPATH=[cudaToolkitPath + '/lib', cudaToolkitPath + '/lib64'])
env.Append(CUDALIBPATH=[cudaToolkitPath + '/lib', cudaToolkitPath + '/lib64', '/lib64'])
env.Append(CUDALIBS=['cudart'])
def exists(env):

View File

@ -12,7 +12,7 @@
from __future__ import print_function
import sys
import os
import urllib2
import urllib
import getopt
import re
import shutil
@ -57,7 +57,7 @@ def print2log(msg, withtime=True, cmd=False):
if withtime:
now = datetime.datetime.today()
msg = "%s >> %s" % (now.isoformat(), msg)
LOGFILE.write(msg + '\n')
LOGFILE.write((msg + '\n').encode('utf-8'))
LOGFILE.flush()
os.fsync(LOGFILE)
@ -157,9 +157,9 @@ def downloadfile(url, fname, repeat=1):
counter = 0
while counter < repeat:
try:
response = urllib2.urlopen(url)
response = urllib.request.urlopen(url)
break
except urllib2.URLError, e:
except urllib.request.URLError as e:
counter += 1
if hasattr(e, 'reason'):
print2log("Failed to reach server. Reason: %s" % e.reason)
@ -851,7 +851,7 @@ class ISCEDeps(object):
f = open(self.config, 'rb')
lines = f.readlines()
for line in lines:
m = re.match("([^#].*?)=([^#]+?)$", line.strip())
m = re.match("([^#].*?)=([^#]+?)$", line.strip().decode('utf-8'))
if m:
var = m.group(1).strip()
val = m.group(2).strip()
@ -867,7 +867,7 @@ def readSetupConfig(setup_config):
f = open(setup_config, 'rb')
lines = f.readlines()
for line in lines:
m = re.match("([^#].*?)=([^#]+?)$", line.strip())
m = re.match("([^#].*?)=([^#]+?)$", line.strip().decode('utf-8'))
if m:
var = m.group(1).strip()
val = m.group(2).strip().replace('"', '')
@ -885,7 +885,7 @@ def checkArgs(args):
"""
try:
opts, args = getopt.getopt(args, "h", ["help", "prefix=", "ping=", "config=", "uname=", "download=", "unpack=", "install=", "gcc=", "gpp=", "verbose"])
except getopt.GetoptError, err:
except getopt.GetoptError as err:
print2log("ProgError: %s" % str(err))
usage()
sys.exit(2)