GPUresamp module updates (#325)

* GPUresamp module updates

  * modify the gpu resamp code to produce results consistent with cpu
  * add the gpuresamp module to runFineResamp.py in TopsProc, which can be loaded topsApp.py
  * also fine-tune the sinc interpolation in cpu code: the sinc coef center value and normalization
LT1AB
Lijun Zhu 2021-09-23 14:04:19 -07:00 committed by GitHub
parent 5e3f28311b
commit 0dcbbf67d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 633 additions and 502 deletions

View File

@ -62,7 +62,7 @@ GEOMETRY_DIRNAME = Component.Parameter('geometryDirname',
public_name='geometry directory name', public_name='geometry directory name',
default='geom_reference', default='geom_reference',
type=str, type=str,
mandatory=False, mandatory=False,
doc = 'Geometry directory') doc = 'Geometry directory')
ESD_DIRNAME = Component.Parameter('esdDirname', ESD_DIRNAME = Component.Parameter('esdDirname',
@ -456,19 +456,19 @@ class TopsProc(Component):
''' '''
Save the product to an XML file using Product Manager. Save the product to an XML file using Product Manager.
''' '''
from iscesys.Component.ProductManager import ProductManager as PM from iscesys.Component.ProductManager import ProductManager as PM
pm = PM() pm = PM()
pm.configure() pm.configure()
pm.dumpProduct(obj, xmlname) pm.dumpProduct(obj, xmlname)
return None return None
@property @property
def referenceSlcOverlapProduct(self): def referenceSlcOverlapProduct(self):
return os.path.join(self.referenceSlcProduct, self.overlapsSubDirname) return os.path.join(self.referenceSlcProduct, self.overlapsSubDirname)
@property @property
def coregOverlapProduct(self): def coregOverlapProduct(self):
@ -519,7 +519,7 @@ class TopsProc(Component):
return [x+1 for x in range(self.numberOfSwaths)] return [x+1 for x in range(self.numberOfSwaths)]
else: else:
return inlist return inlist
def getValidSwathList(self, inlist): def getValidSwathList(self, inlist):
''' '''
Used to get list of swaths left after applying all filters - e.g, region of interest. Used to get list of swaths left after applying all filters - e.g, region of interest.
@ -540,6 +540,7 @@ class TopsProc(Component):
try: try:
from zerodop.GPUtopozero.GPUtopozero import PyTopozero from zerodop.GPUtopozero.GPUtopozero import PyTopozero
from zerodop.GPUgeo2rdr.GPUgeo2rdr import PyGeo2rdr from zerodop.GPUgeo2rdr.GPUgeo2rdr import PyGeo2rdr
from zerodop.GPUresampslc.GPUresampslc import PyResampSlc
flag = True flag = True
except: except:
pass pass

View File

@ -2,6 +2,7 @@
# Author: Piyush Agram # Author: Piyush Agram
# Copyright 2016 # Copyright 2016
# #
#
import isce import isce
import isceobj import isceobj
@ -12,12 +13,15 @@ from isceobj.Util.Poly2D import Poly2D
import os import os
import copy import copy
from isceobj.Sensor.TOPS import createTOPSSwathSLCProduct from isceobj.Sensor.TOPS import createTOPSSwathSLCProduct
import logging
def resampSecondary(mas, slv, rdict, outname ): logger = logging.getLogger('isce.topsinsar.fineresamp')
def resampSecondaryCPU(reference, secondary, rdict, outname):
''' '''
Resample burst by burst. Resample burst by burst.
''' '''
azpoly = rdict['azpoly'] azpoly = rdict['azpoly']
rgpoly = rdict['rgpoly'] rgpoly = rdict['rgpoly']
azcarrpoly = rdict['carrPoly'] azcarrpoly = rdict['carrPoly']
@ -32,29 +36,28 @@ def resampSecondary(mas, slv, rdict, outname ):
aziImg.setAccessMode('READ') aziImg.setAccessMode('READ')
inimg = isceobj.createSlcImage() inimg = isceobj.createSlcImage()
inimg.load(slv.image.filename + '.xml') inimg.load(secondary.image.filename + '.xml')
inimg.setAccessMode('READ') inimg.setAccessMode('READ')
rObj = stdproc.createResamp_slc() rObj = stdproc.createResamp_slc()
rObj.slantRangePixelSpacing = slv.rangePixelSize rObj.slantRangePixelSpacing = secondary.rangePixelSize
rObj.radarWavelength = slv.radarWavelength rObj.radarWavelength = secondary.radarWavelength
rObj.azimuthCarrierPoly = azcarrpoly rObj.azimuthCarrierPoly = azcarrpoly
rObj.dopplerPoly = dpoly rObj.dopplerPoly = dpoly
rObj.azimuthOffsetsPoly = azpoly rObj.azimuthOffsetsPoly = azpoly
rObj.rangeOffsetsPoly = rgpoly rObj.rangeOffsetsPoly = rgpoly
rObj.imageIn = inimg rObj.imageIn = inimg
####Setting reference values ####Setting reference values
rObj.startingRange = slv.startingRange rObj.startingRange = secondary.startingRange
rObj.referenceSlantRangePixelSpacing = mas.rangePixelSize rObj.referenceSlantRangePixelSpacing = reference.rangePixelSize
rObj.referenceStartingRange = mas.startingRange rObj.referenceStartingRange = reference.startingRange
rObj.referenceWavelength = mas.radarWavelength rObj.referenceWavelength = reference.radarWavelength
width = mas.numberOfSamples width = reference.numberOfSamples
length = mas.numberOfLines length = reference.numberOfLines
imgOut = isceobj.createSlcImage() imgOut = isceobj.createSlcImage()
imgOut.setWidth(width) imgOut.setWidth(width)
imgOut.filename = outname imgOut.filename = outname
@ -70,24 +73,135 @@ def resampSecondary(mas, slv, rdict, outname ):
imgOut.renderHdr() imgOut.renderHdr()
return imgOut return imgOut
def convertPoly2D(poly):
'''
Convert a isceobj.Util.Poly2D {poly} into zerodop.GPUresampslc.GPUresampslc.PyPloy2d
'''
from zerodop.GPUresampslc.GPUresampslc import PyPoly2d
import itertools
def getRelativeShifts(mFrame, sFrame, minBurst, maxBurst, secondaryBurstStart): # get parameters from poly
azimuthOrder = poly.getAzimuthOrder()
rangeOrder = poly.getRangeOrder()
azimuthMean = poly.getMeanAzimuth()
rangeMean = poly.getMeanRange()
azimuthNorm = poly.getNormAzimuth()
rangeNorm = poly.getNormRange()
# create the PyPoly2d object
pPoly = PyPoly2d(azimuthOrder, rangeOrder, azimuthMean, rangeMean, azimuthNorm, rangeNorm)
# copy the coeffs, need to flatten into 1d list
pPoly.coeffs = list(itertools.chain.from_iterable(poly.getCoeffs()))
# all done
return pPoly
def resampSecondaryGPU(reference, secondary, rdict, outname):
'''
Resample burst by burst with GPU
'''
# import the GPU module
import zerodop.GPUresampslc
# get Poly2D objects from rdict and convert them into PyPoly2d objects
azpoly = convertPoly2D(rdict['azpoly'])
rgpoly = convertPoly2D(rdict['rgpoly'])
azcarrpoly = convertPoly2D(rdict['carrPoly'])
dpoly = convertPoly2D(rdict['doppPoly'])
rngImg = isceobj.createImage()
rngImg.load(rdict['rangeOff'] + '.xml')
rngImg.setCaster('read', 'FLOAT')
rngImg.createImage()
aziImg = isceobj.createImage()
aziImg.load(rdict['azimuthOff'] + '.xml')
aziImg.setCaster('read', 'FLOAT')
aziImg.createImage()
inimg = isceobj.createSlcImage()
inimg.load(secondary.image.filename + '.xml')
inimg.setAccessMode('READ')
inimg.createImage()
# create a GPU resample processor
rObj = zerodop.GPUresampslc.createResampSlc()
# set parameters
rObj.slr = secondary.rangePixelSize
rObj.wvl = secondary.radarWavelength
# set polynomials
rObj.azCarrier = azcarrpoly
rObj.dopplerPoly = dpoly
rObj.azOffsetsPoly = azpoly
rObj.rgOffsetsPoly = rgpoly
# need to create an empty rgCarrier poly
rgCarrier = Poly2D()
rgCarrier.initPoly(rangeOrder=0, azimuthOrder=0, coeffs=[[0.]])
rgCarrier = convertPoly2D(rgCarrier)
rObj.rgCarrier = rgCarrier
# input secondary image
rObj.slcInAccessor = inimg.getImagePointer()
rObj.inWidth = inimg.getWidth()
rObj.inLength = inimg.getLength()
####Setting reference values
rObj.r0 = secondary.startingRange
rObj.refr0 = reference.rangePixelSize
rObj.refslr = reference.startingRange
rObj.refwvl = reference.radarWavelength
# set output image
width = reference.numberOfSamples
length = reference.numberOfLines
imgOut = isceobj.createSlcImage()
imgOut.setWidth(width)
imgOut.filename = outname
imgOut.setAccessMode('write')
imgOut.createImage()
rObj.slcOutAccessor = imgOut.getImagePointer()
rObj.outWidth = width
rObj.outLength = length
rObj.residRgAccessor = rngImg.getImagePointer()
rObj.residAzAccessor = aziImg.getImagePointer()
# need to specify data type, only complex is currently supported
rObj.isComplex = (inimg.dataType == 'CFLOAT')
# run resampling
rObj.resamp_slc()
# finalize images
inimg.finalizeImage()
imgOut.finalizeImage()
rngImg.finalizeImage()
aziImg.finalizeImage()
imgOut.renderHdr()
return imgOut
def getRelativeShifts(referenceFrame, secondaryFrame, minBurst, maxBurst, secondaryBurstStart):
''' '''
Estimate the relative shifts between the start of the bursts. Estimate the relative shifts between the start of the bursts.
''' '''
azReferenceOff = {} azReferenceOff = {}
azSecondaryOff = {} azSecondaryOff = {}
azRelOff = {} azRelOff = {}
tm = mFrame.bursts[minBurst].sensingStart tm = referenceFrame.bursts[minBurst].sensingStart
dt = mFrame.bursts[minBurst].azimuthTimeInterval dt = referenceFrame.bursts[minBurst].azimuthTimeInterval
ts = sFrame.bursts[secondaryBurstStart].sensingStart ts = secondaryFrame.bursts[secondaryBurstStart].sensingStart
for index in range(minBurst, maxBurst): for index in range(minBurst, maxBurst):
burst = mFrame.bursts[index] burst = referenceFrame.bursts[index]
azReferenceOff[index] = int(np.round((burst.sensingStart - tm).total_seconds() / dt)) azReferenceOff[index] = int(np.round((burst.sensingStart - tm).total_seconds() / dt))
burst = sFrame.bursts[secondaryBurstStart + index - minBurst] burst = secondaryFrame.bursts[secondaryBurstStart + index - minBurst]
azSecondaryOff[secondaryBurstStart + index - minBurst] = int(np.round((burst.sensingStart - ts).total_seconds() / dt)) azSecondaryOff[secondaryBurstStart + index - minBurst] = int(np.round((burst.sensingStart - ts).total_seconds() / dt))
azRelOff[secondaryBurstStart + index - minBurst] = azSecondaryOff[secondaryBurstStart + index - minBurst] - azReferenceOff[index] azRelOff[secondaryBurstStart + index - minBurst] = azSecondaryOff[secondaryBurstStart + index - minBurst] - azReferenceOff[index]
@ -149,7 +263,7 @@ def adjustValidSampleLine(reference, secondary, minAz=0, maxAz=0, minRng=0, maxR
reference.numValidLines = secondary.numValidLines - 8 reference.numValidLines = secondary.numValidLines - 8
else: else:
reference.numValidLines = reference.numberOfLines - reference.firstValidLine reference.numValidLines = reference.numberOfLines - reference.firstValidLine
elif (minAz < 0) and (maxAz > 0): elif (minAz < 0) and (maxAz > 0):
reference.firstValidLine = secondary.firstValidLine - int(np.floor(minAz) - 4) reference.firstValidLine = secondary.firstValidLine - int(np.floor(minAz) - 4)
lastValidLine = reference.firstValidLine + secondary.numValidLines + int(np.floor(minAz) - 8) - int(np.ceil(maxAz)) lastValidLine = reference.firstValidLine + secondary.numValidLines + int(np.floor(minAz) - 8) - int(np.ceil(maxAz))
@ -162,7 +276,7 @@ def adjustValidSampleLine(reference, secondary, minAz=0, maxAz=0, minRng=0, maxR
def getValidLines(secondary, rdict, inname, misreg_az=0.0, misreg_rng=0.0): def getValidLines(secondary, rdict, inname, misreg_az=0.0, misreg_rng=0.0):
''' '''
Looks at the reference, secondary and azimuth offsets and gets the Interferogram valid lines Looks at the reference, secondary and azimuth offsets and gets the Interferogram valid lines
''' '''
dimg = isceobj.createSlcImage() dimg = isceobj.createSlcImage()
dimg.load(inname + '.xml') dimg.load(inname + '.xml')
@ -189,9 +303,18 @@ def getValidLines(secondary, rdict, inname, misreg_az=0.0, misreg_rng=0.0):
def runFineResamp(self): def runFineResamp(self):
''' '''
Create coregistered overlap secondarys. Create coregistered overlap secondary image.
''' '''
# decide whether to use CPU or GPU
hasGPU = self.useGPU and self._insar.hasGPU()
if hasGPU:
resampSecondary = resampSecondaryGPU
print('Using GPU for fineresamp')
else:
resampSecondary = resampSecondaryCPU
swathList = self._insar.getValidSwathList(self.swaths) swathList = self._insar.getValidSwathList(self.swaths)
@ -209,7 +332,6 @@ def runFineResamp(self):
outdir = os.path.join(self._insar.fineCoregDirname, 'IW{0}'.format(swath)) outdir = os.path.join(self._insar.fineCoregDirname, 'IW{0}'.format(swath))
os.makedirs(outdir, exist_ok=True) os.makedirs(outdir, exist_ok=True)
###Directory with offsets ###Directory with offsets
offdir = os.path.join(self._insar.fineOffsetsDirname, 'IW{0}'.format(swath)) offdir = os.path.join(self._insar.fineOffsetsDirname, 'IW{0}'.format(swath))
@ -222,8 +344,8 @@ def runFineResamp(self):
continue continue
relShifts = getRelativeShifts(reference, secondary, minBurst, maxBurst, secondaryBurstStart) relShifts = getRelativeShifts(reference, secondary, minBurst, maxBurst, secondaryBurstStart)
print('Shifts IW-{0}: '.format(swath), relShifts) print('Shifts IW-{0}: '.format(swath), relShifts)
####Can corporate known misregistration here ####Can corporate known misregistration here
apoly = Poly2D() apoly = Poly2D()
apoly.initPoly(rangeOrder=0,azimuthOrder=0,coeffs=[[0.]]) apoly.initPoly(rangeOrder=0,azimuthOrder=0,coeffs=[[0.]])
@ -240,10 +362,10 @@ def runFineResamp(self):
coreg.configure() coreg.configure()
for ii in range(minBurst, maxBurst): for ii in range(minBurst, maxBurst):
jj = secondaryBurstStart + ii - minBurst jj = secondaryBurstStart + ii - minBurst
masBurst = reference.bursts[ii] referenceBurst = reference.bursts[ii]
slvBurst = secondary.bursts[jj] secondaryBurst = secondary.bursts[jj]
try: try:
offset = relShifts[jj] offset = relShifts[jj]
@ -251,7 +373,7 @@ def runFineResamp(self):
raise Exception('Trying to access shift for secondary burst index {0}, which may not overlap with reference for swath {1}'.format(jj, swath)) raise Exception('Trying to access shift for secondary burst index {0}, which may not overlap with reference for swath {1}'.format(jj, swath))
outname = os.path.join(outdir, 'burst_%02d.slc'%(ii+1)) outname = os.path.join(outdir, 'burst_%02d.slc'%(ii+1))
####Setup initial polynomials ####Setup initial polynomials
### If no misregs are given, these are zero ### If no misregs are given, these are zero
### If provided, can be used for resampling without running to geo2rdr again for fast results ### If provided, can be used for resampling without running to geo2rdr again for fast results
@ -262,19 +384,19 @@ def runFineResamp(self):
###For future - should account for azimuth and range misreg here .. ignoring for now. ###For future - should account for azimuth and range misreg here .. ignoring for now.
azCarrPoly, dpoly = secondary.estimateAzimuthCarrierPolynomials(slvBurst, offset = -1.0 * offset) azCarrPoly, dpoly = secondary.estimateAzimuthCarrierPolynomials(secondaryBurst, offset = -1.0 * offset)
rdict['carrPoly'] = azCarrPoly rdict['carrPoly'] = azCarrPoly
rdict['doppPoly'] = dpoly rdict['doppPoly'] = dpoly
outimg = resampSecondary(masBurst, slvBurst, rdict, outname) outimg = resampSecondary(referenceBurst, secondaryBurst, rdict, outname)
minAz, maxAz, minRg, maxRg = getValidLines(slvBurst, rdict, outname,
minAz, maxAz, minRg, maxRg = getValidLines(secondaryBurst, rdict, outname,
misreg_az = misreg_az - offset, misreg_rng = misreg_rg) misreg_az = misreg_az - offset, misreg_rng = misreg_rg)
# copyBurst = copy.deepcopy(referenceBurst)
# copyBurst = copy.deepcopy(masBurst) copyBurst = referenceBurst.clone()
copyBurst = masBurst.clone() adjustValidSampleLine(copyBurst, secondaryBurst,
adjustValidSampleLine(copyBurst, slvBurst,
minAz=minAz, maxAz=maxAz, minAz=minAz, maxAz=maxAz,
minRng=minRg, maxRng=maxRg) minRng=minRg, maxRng=maxRg)
copyBurst.image.filename = outimg.filename copyBurst.image.filename = outimg.filename
@ -285,4 +407,3 @@ def runFineResamp(self):
coreg.numberOfBursts = len(coreg.bursts) coreg.numberOfBursts = len(coreg.bursts)
self._insar.saveProduct(coreg, os.path.join(self._insar.fineCoregDirname, 'IW{0}.xml'.format(swath))) self._insar.saveProduct(coreg, os.path.join(self._insar.fineCoregDirname, 'IW{0}.xml'.format(swath)))

View File

@ -9,28 +9,28 @@ contains
if( idx > high) idx = dble(high) if( idx > high) idx = dble(high)
end subroutine crop_indices end subroutine crop_indices
real*8 function bilinear(x,y,z) real*8 function bilinear(x,y,z)
! Bilinear interpolation, input values x,y are ! Bilinear interpolation, input values x,y are
! expected in unitless decimal index value ! expected in unitless decimal index value
real*8, intent(in) :: x,y real*8, intent(in) :: x,y
real*4, intent(in), dimension(:,:) :: z real*4, intent(in), dimension(:,:) :: z
real*8 :: x1, x2, y1, y2 real*8 :: x1, x2, y1, y2
real*8 :: q11, q12, q21, q22 real*8 :: q11, q12, q21, q22
x1 = floor(x) x1 = floor(x)
x2 = ceiling(x) x2 = ceiling(x)
y1 = ceiling(y) y1 = ceiling(y)
y2 = floor(y) y2 = floor(y)
q11 = z(int(y1),int(x1)) q11 = z(int(y1),int(x1))
q12 = z(int(y2),int(x1)) q12 = z(int(y2),int(x1))
q21 = z(int(y1),int(x2)) q21 = z(int(y1),int(x2))
q22 = z(int(y2),int(x2)) q22 = z(int(y2),int(x2))
if(y1.eq.y2.and.x1.eq.x2) then if(y1.eq.y2.and.x1.eq.x2) then
bilinear = q11 bilinear = q11
elseif(y1.eq.y2) then elseif(y1.eq.y2) then
bilinear = (x2 - x)/(x2 - x1)*q11 + (x - x1)/(x2 - x1)*q21 bilinear = (x2 - x)/(x2 - x1)*q11 + (x - x1)/(x2 - x1)*q21
elseif (x1.eq.x2) then elseif (x1.eq.x2) then
@ -109,13 +109,13 @@ contains
end if end if
end function bilinear_f end function bilinear_f
real*8 function bicubic(x,y,z) real*8 function bicubic(x,y,z)
! Bicubic interpolation, input values x,y are ! Bicubic interpolation, input values x,y are
! expected in unitless decimal index value ! expected in unitless decimal index value
! (based on Numerical Recipes Algorithm) ! (based on Numerical Recipes Algorithm)
real*8, intent(in) :: x,y real*8, intent(in) :: x,y
real*4, intent(in), dimension(:,:) :: z real*4, intent(in), dimension(:,:) :: z
integer :: x1, x2, y1, y2, i, j, k, l integer :: x1, x2, y1, y2, i, j, k, l
real*8, dimension(4) :: dzdx,dzdy,dzdxy,zz real*8, dimension(4) :: dzdx,dzdy,dzdxy,zz
real*8, dimension(4,4) :: c ! coefftable real*8, dimension(4,4) :: c ! coefftable
real*8 :: q(16),qq,wt(16,16),cl(16),t,u real*8 :: q(16),qq,wt(16,16),cl(16),t,u
@ -128,16 +128,16 @@ contains
10*0,-3,3,2*0,2,-2,2*0,-1,1,6*0,3,-3,2*0,-2,2,& 10*0,-3,3,2*0,2,-2,2*0,-1,1,6*0,3,-3,2*0,-2,2,&
5*0,1,-2,1,0,-2,4,-2,0,1,-2,1,9*0,-1,2,-1,0,1,-2,1,& 5*0,1,-2,1,0,-2,4,-2,0,1,-2,1,9*0,-1,2,-1,0,1,-2,1,&
10*0,1,-1,2*0,-1,1,6*0,-1,1,2*0,2,-2,2*0,-1,1/ 10*0,1,-1,2*0,-1,1,6*0,-1,1,2*0,2,-2,2*0,-1,1/
x1 = floor(x) x1 = floor(x)
x2 = ceiling(x) x2 = ceiling(x)
!!$ y1 = ceiling(y) !!$ y1 = ceiling(y)
!!$ y2 = floor(y) !!$ y2 = floor(y)
y1 = floor(y) y1 = floor(y)
y2 = ceiling(y) y2 = ceiling(y)
zz(1) = z(y1,x1) zz(1) = z(y1,x1)
zz(4) = z(y2,x1) zz(4) = z(y2,x1)
zz(2) = z(y1,x2) zz(2) = z(y1,x2)
@ -162,7 +162,7 @@ contains
z(y1+1,x2-1)+z(y1-1,x2-1)) z(y1+1,x2-1)+z(y1-1,x2-1))
dzdxy(3) = 0.25d0*(z(y2+1,x2+1)-z(y2-1,x2+1)-& dzdxy(3) = 0.25d0*(z(y2+1,x2+1)-z(y2-1,x2+1)-&
z(y2+1,x2-1)+z(y2-1,x2-1)) z(y2+1,x2-1)+z(y2-1,x2-1))
! compute polynomial coeff ! compute polynomial coeff
! pack values into temp array qq ! pack values into temp array qq
do i = 1,4 do i = 1,4
@ -178,8 +178,8 @@ contains
qq = qq+wt(i,k)*q(k) qq = qq+wt(i,k)*q(k)
enddo enddo
cl(i)=qq cl(i)=qq
enddo enddo
! unpack results into the coeff table ! unpack results into the coeff table
l = 0 l = 0
do i = 1,4 do i = 1,4
@ -188,7 +188,7 @@ contains
c(i,j) = cl(l) c(i,j) = cl(l)
enddo enddo
enddo enddo
! normalize desired points from 0to1 ! normalize desired points from 0to1
t = (x - x1) t = (x - x1)
u = (y - y1) u = (y - y1)
@ -200,13 +200,13 @@ contains
end function bicubic end function bicubic
complex function bicubic_cx(x,y,z) complex function bicubic_cx(x,y,z)
! Bicubic interpolation, input values x,y are ! Bicubic interpolation, input values x,y are
! expected in unitless decimal index value ! expected in unitless decimal index value
! (based on Numerical Recipes Algorithm) ! (based on Numerical Recipes Algorithm)
real*8, intent(in) :: x,y real*8, intent(in) :: x,y
complex, intent(in), dimension(:,:) :: z complex, intent(in), dimension(:,:) :: z
integer :: x1, x2, y1, y2, i, j, k, l integer :: x1, x2, y1, y2, i, j, k, l
complex, dimension(4) :: dzdx,dzdy,dzdxy,zz complex, dimension(4) :: dzdx,dzdy,dzdxy,zz
complex, dimension(4,4) :: c ! coefftable complex, dimension(4,4) :: c ! coefftable
complex :: q(16),qq,cl(16) complex :: q(16),qq,cl(16)
@ -220,16 +220,16 @@ contains
10*0,-3,3,2*0,2,-2,2*0,-1,1,6*0,3,-3,2*0,-2,2,& 10*0,-3,3,2*0,2,-2,2*0,-1,1,6*0,3,-3,2*0,-2,2,&
5*0,1,-2,1,0,-2,4,-2,0,1,-2,1,9*0,-1,2,-1,0,1,-2,1,& 5*0,1,-2,1,0,-2,4,-2,0,1,-2,1,9*0,-1,2,-1,0,1,-2,1,&
10*0,1,-1,2*0,-1,1,6*0,-1,1,2*0,2,-2,2*0,-1,1/ 10*0,1,-1,2*0,-1,1,6*0,-1,1,2*0,2,-2,2*0,-1,1/
x1 = floor(x) x1 = floor(x)
x2 = ceiling(x) x2 = ceiling(x)
!!$ y1 = ceiling(y) !!$ y1 = ceiling(y)
!!$ y2 = floor(y) !!$ y2 = floor(y)
y1 = floor(y) y1 = floor(y)
y2 = ceiling(y) y2 = ceiling(y)
zz(1) = z(y1,x1) zz(1) = z(y1,x1)
zz(4) = z(y2,x1) zz(4) = z(y2,x1)
zz(2) = z(y1,x2) zz(2) = z(y1,x2)
@ -254,7 +254,7 @@ contains
z(y1+1,x2-1)+z(y1-1,x2-1)) z(y1+1,x2-1)+z(y1-1,x2-1))
dzdxy(3) = 0.25d0*(z(y2+1,x2+1)-z(y2-1,x2+1)-& dzdxy(3) = 0.25d0*(z(y2+1,x2+1)-z(y2-1,x2+1)-&
z(y2+1,x2-1)+z(y2-1,x2-1)) z(y2+1,x2-1)+z(y2-1,x2-1))
! compute polynomial coeff ! compute polynomial coeff
! pack values into temp array qq ! pack values into temp array qq
do i = 1,4 do i = 1,4
@ -270,8 +270,8 @@ contains
qq = qq+wt(i,k)*q(k) qq = qq+wt(i,k)*q(k)
enddo enddo
cl(i)=qq cl(i)=qq
enddo enddo
! unpack results into the coeff table ! unpack results into the coeff table
l = 0 l = 0
do i = 1,4 do i = 1,4
@ -280,7 +280,7 @@ contains
c(i,j) = cl(l) c(i,j) = cl(l)
enddo enddo
enddo enddo
! normalize desired points from 0to1 ! normalize desired points from 0to1
t = (x - x1) t = (x - x1)
u = (y - y1) u = (y - y1)
@ -297,26 +297,27 @@ contains
i_weight,i_intplength,i_filtercoef,r_filter) i_weight,i_intplength,i_filtercoef,r_filter)
!!$c**************************************************************** !!$c****************************************************************
!!$c** !!$c**
!!$c** FILE NAME: sinc_coef.f !!$c** FILE NAME: sinc_coef.f
!!$c** !!$c**
!!$c** DATE WRITTEN: 10/15/97 !!$c** DATE WRITTEN: 10/15/97
!!$c** !!$c**
!!$c** PROGRAMMER: Scott Hensley !!$c** PROGRAMMER: Scott Hensley
!!$c** !!$c**
!!$c** FUNCTIONAL DESCRIPTION: The number of data values in the array !!$c** FUNCTIONAL DESCRIPTION: The number of data values in the array
!!$c** will always be the interpolation length * the decimation factor, !!$c** will always be the interpolation length * the decimation factor,
!!$c** so this is not returned separately by the function. !!$c** so this is not returned separately by the function.
!!$c** !!$c**
!!$c** ROUTINES CALLED: !!$c** ROUTINES CALLED:
!!$c** !!$c**
!!$c** NOTES: !!$c** NOTES:
!!$c** !!$c**
!!$c** UPDATE LOG: !!$c** UPDATE LOG:
!!$c** !!$c**
!!$c** Date Changed Reason Changed CR # and Version # !!$c** Date Changed Reason Changed CR # and Version #
!!$c** ------------ ---------------- ----------------- !!$c** ------------ ---------------- -----------------
!!$c** !!$c** 06/18/21 adjust r_soff to make sure coef at the center is 1.
!!$c** note that the routine doesn't work well for odd sequences
!!$c***************************************************************** !!$c*****************************************************************
use fortranUtils use fortranUtils
@ -330,18 +331,18 @@ contains
integer i_decfactor !the decimation factor integer i_decfactor !the decimation factor
real*8 r_pedestal !pedestal height real*8 r_pedestal !pedestal height
integer i_weight !0 = no weight , 1=weight integer i_weight !0 = no weight , 1=weight
!c OUTPUT VARIABLES: !c OUTPUT VARIABLES:
integer i_intplength !the interpolation length integer i_intplength !the interpolation length
integer i_filtercoef !number of coefficients integer i_filtercoef !number of coefficients
real*8 r_filter(*) !an array of data values real*8 r_filter(*) !an array of data values
!c LOCAL VARIABLES: !c LOCAL VARIABLES:
real*8 r_wgt,r_s,r_fct,r_wgthgt,r_soff,r_wa real*8 r_wgt,r_s,r_fct,r_wgthgt,r_soff,r_wa
integer i integer i
real*8 pi,j real*8 pi,j
!c COMMON BLOCKS: !c COMMON BLOCKS:
@ -354,25 +355,24 @@ contains
!c PROCESSING STEPS: !c PROCESSING STEPS:
!c number of coefficients !c number of coefficients
pi = getPi() pi = getPi()
i_intplength = nint(r_relfiltlen/r_beta) i_intplength = nint(r_relfiltlen/r_beta)
i_filtercoef = i_intplength*i_decfactor i_filtercoef = i_intplength*i_decfactor
r_wgthgt = (1.d0 - r_pedestal)/2.d0 r_wgthgt = (1.d0 - r_pedestal)/2.d0
r_soff = ((i_filtercoef - 1.d0)/2.d0) r_soff = i_filtercoef/2.d0
do i=0,i_filtercoef-1 do i=0,i_filtercoef-1
r_wa = i - r_soff r_wa = i - r_soff
r_wgt = (1.d0 - r_wgthgt) + r_wgthgt*cos((pi*r_wa)/r_soff) r_s = r_wa*r_beta/(1.0d0 * i_decfactor)
j = floor(i - r_soff)
r_s = j*r_beta/(1.0d0 * i_decfactor)
if(r_s .ne. 0.0)then if(r_s .ne. 0.0)then
r_fct = sin(pi*r_s)/(pi*r_s) r_fct = sin(pi*r_s)/(pi*r_s)
else else
r_fct = 1.0d0 r_fct = 1.0d0
endif endif
if(i_weight .eq. 1)then if(i_weight .eq. 1)then
r_wgt = (1.d0 - r_wgthgt) + r_wgthgt*cos((pi*r_wa)/r_soff)
r_filter(i+1) = r_fct*r_wgt r_filter(i+1) = r_fct*r_wgt
else else
r_filter(i+1) = r_fct r_filter(i+1) = r_fct
@ -386,7 +386,7 @@ contains
!cc------------------------------------------- !cc-------------------------------------------
complex*8 function sinc_eval(arrin,nsamp,intarr,idec,ilen,intp,frp) complex*8 function sinc_eval(arrin,nsamp,intarr,idec,ilen,intp,frp)
integer ilen,idec,intp, nsamp integer ilen,idec,intp, nsamp
real*8 frp real*8 frp
complex arrin(0:nsamp-1) complex arrin(0:nsamp-1)
@ -405,12 +405,12 @@ contains
end function sinc_eval end function sinc_eval
real*4 function sinc_eval_2d_f(arrin,intarr,idec,ilen,intpx,intpy,frpx,frpy,xlen,ylen) real*4 function sinc_eval_2d_f(arrin,intarr,idec,ilen,intpx,intpy,frpx,frpy,xlen,ylen)
integer ilen,idec,intpx,intpy,xlen,ylen,k,m,ifracx,ifracy integer ilen,idec,intpx,intpy,xlen,ylen,k,m,ifracx,ifracy
real*8 frpx,frpy real*8 frpx,frpy
real*4 arrin(0:xlen-1,0:ylen-1) real*4 arrin(0:xlen-1,0:ylen-1)
real*4 intarr(0:idec*ilen-1) real*4 intarr(0:idec*ilen-1)
! note: arrin is a zero based coming in, so intp must be a zero-based index. ! note: arrin is a zero based coming in, so intp must be a zero-based index.
sinc_eval_2d_f = 0. sinc_eval_2d_f = 0.
@ -418,63 +418,68 @@ contains
ifracx= min(max(0,int(frpx*idec)),idec-1) ifracx= min(max(0,int(frpx*idec)),idec-1)
ifracy= min(max(0,int(frpy*idec)),idec-1) ifracy= min(max(0,int(frpy*idec)),idec-1)
do k=0,ilen-1 do k=0,ilen-1
do m=0,ilen-1 do m=0,ilen-1
sinc_eval_2d_f = sinc_eval_2d_f + arrin(intpx-k,intpy-m)*& sinc_eval_2d_f = sinc_eval_2d_f + arrin(intpx-k,intpy-m)*&
intarr(k + ifracx*ilen)*intarr(m + ifracy*ilen) intarr(k + ifracx*ilen)*intarr(m + ifracy*ilen)
enddo enddo
enddo enddo
end if end if
end function sinc_eval_2d_f end function sinc_eval_2d_f
real*4 function sinc_eval_2d_d(arrin,intarr,idec,ilen,intpx,intpy,frpx,frpy,xlen,ylen) real*4 function sinc_eval_2d_d(arrin,intarr,idec,ilen,intpx,intpy,frpx,frpy,xlen,ylen)
integer ilen,idec,intpx,intpy,xlen,ylen,k,m,ifracx,ifracy integer ilen,idec,intpx,intpy,xlen,ylen,k,m,ifracx,ifracy
real*8 frpx,frpy real*8 frpx,frpy
real*8 arrin(0:xlen-1,0:ylen-1) real*8 arrin(0:xlen-1,0:ylen-1)
real*8 intarr(0:idec*ilen-1) real*8 intarr(0:idec*ilen-1)
! note: arrin is a zero based coming in, so intp must be a zero-based index. ! note: arrin is a zero based coming in, so intp must be a zero-based index.
sinc_eval_2d_d = 0.d0 sinc_eval_2d_d = 0.d0
if((intpx.ge.ilen-1.and.intpx.lt.xlen) .and. (intpy.ge.ilen-1.and.intpy.lt.ylen)) then if((intpx.ge.ilen-1.and.intpx.lt.xlen) .and. (intpy.ge.ilen-1.and.intpy.lt.ylen)) then
ifracx= min(max(0,int(frpx*idec)),idec-1) ifracx= min(max(0,int(frpx*idec)),idec-1)
ifracy= min(max(0,int(frpy*idec)),idec-1) ifracy= min(max(0,int(frpy*idec)),idec-1)
do k=0,ilen-1 do k=0,ilen-1
do m=0,ilen-1 do m=0,ilen-1
sinc_eval_2d_d = sinc_eval_2d_d + arrin(intpx-k,intpy-m)*& sinc_eval_2d_d = sinc_eval_2d_d + arrin(intpx-k,intpy-m)*&
intarr(k + ifracx*ilen)*intarr(m + ifracy*ilen) intarr(k + ifracx*ilen)*intarr(m + ifracy*ilen)
enddo enddo
enddo enddo
end if end if
end function sinc_eval_2d_d end function sinc_eval_2d_d
complex function sinc_eval_2d_cx(arrin,intarr,idec,ilen,intpx,intpy,frpx,frpy,xlen,ylen) complex function sinc_eval_2d_cx(arrin,intarr,idec,ilen,intpx,intpy,frpx,frpy,xlen,ylen)
integer ilen,idec,intpx,intpy,xlen,ylen,k,m,ifracx,ifracy integer ilen,idec,intpx,intpy,xlen,ylen,k,m,ifracx,ifracy
real*8 frpx,frpy real*8 frpx,frpy, fweight, fweightsum
complex arrin(0:xlen-1,0:ylen-1) complex arrin(0:xlen-1,0:ylen-1)
real*4 intarr(0:idec*ilen-1) real*4 intarr(0:idec*ilen-1)
! note: arrin is a zero based coming in, so intp must be a zero-based index. ! note: arrin is a zero based coming in, so intp must be a zero-based index.
sinc_eval_2d_cx = cmplx(0.,0.) sinc_eval_2d_cx = cmplx(0.,0.)
if((intpx.ge.ilen-1.and.intpx.lt.xlen) .and. (intpy.ge.ilen-1.and.intpy.lt.ylen)) then if((intpx.ge.ilen-1.and.intpx.lt.xlen) .and. (intpy.ge.ilen-1.and.intpy.lt.ylen)) then
ifracx= min(max(0,int(frpx*idec)),idec-1) ifracx= min(max(0,int(frpx*idec)),idec-1)
ifracy= min(max(0,int(frpy*idec)),idec-1) ifracy= min(max(0,int(frpy*idec)),idec-1)
! to normalize the sinc interpolator
fweightsum = 0.
do k=0,ilen-1 do k=0,ilen-1
do m=0,ilen-1 do m=0,ilen-1
sinc_eval_2d_cx = sinc_eval_2d_cx + arrin(intpx-k,intpy-m)*& fweight = intarr(k + ifracx*ilen)*intarr(m + ifracy*ilen)
intarr(k + ifracx*ilen)*intarr(m + ifracy*ilen) sinc_eval_2d_cx = sinc_eval_2d_cx + arrin(intpx-k,intpy-m) * fweight
fweightsum = fweightsum + fweight
enddo enddo
enddo enddo
end if sinc_eval_2d_cx = sinc_eval_2d_cx/fweightsum
end if
end function sinc_eval_2d_cx end function sinc_eval_2d_cx

View File

@ -22,19 +22,19 @@
integer int_az_off integer int_az_off
integer i_na integer i_na
integer ith, thnum, ithorig integer ith, thnum, ithorig
integer ii, jj integer ii, jj
integer chipi, chipj integer chipi, chipj
real*8 r_ro, r_ao, r_rt, r_at, r_ph, r_dop real*8 r_ro, r_ao, r_rt, r_at, r_ph, r_dop
real*4 t0, t1 real*4 t0, t1
real*8 r_azcorner,r_racorner,fracr,fraca real*8 r_azcorner,r_racorner,fracr,fraca
complex, allocatable, dimension(:,:) :: cin complex, allocatable, dimension(:,:) :: cin
complex, allocatable, dimension(:) :: cout complex, allocatable, dimension(:) :: cout
complex, allocatable, dimension(:) :: cline complex, allocatable, dimension(:) :: cline
complex, allocatable, dimension(:,:,:) :: chip complex, allocatable, dimension(:,:,:) :: chip
complex cval complex cval
real*4, allocatable, dimension(:,:) :: rin real*4, allocatable, dimension(:,:) :: rin
@ -52,9 +52,9 @@
iscomplex = 1 iscomplex = 1
t0 = secnds(0.0) t0 = secnds(0.0)
print *, ' ' print *, ' '
print *, ' << Resample one image to another image coordinates >>' print *, ' << Resample one image to another image coordinates >>'
print *, ' ' print *, ' '
print *, 'Input Image Dimensions: ' print *, 'Input Image Dimensions: '
print *, inwidth, ' pixels' print *, inwidth, ' pixels'
@ -105,7 +105,7 @@
allocate(residrg(outwidth)) allocate(residrg(outwidth))
call prepareMethods(method) call prepareMethods(method)
print *, 'Azimuth Carrier Poly' print *, 'Azimuth Carrier Poly'
call printpoly2d_f(azCarrier) call printpoly2d_f(azCarrier)
@ -118,6 +118,8 @@
print *, 'Azimuth offsets poly' print *, 'Azimuth offsets poly'
call printpoly2d_f(azOffsetsPoly) call printpoly2d_f(azOffsetsPoly)
print *, 'Doppler poly'
call printpoly2d_f(dopplerPoly)
print *, 'Reading in the image' print *, 'Reading in the image'
!c read in the reference image !c read in the reference image
@ -176,7 +178,7 @@
if(residRgAccessor .ne. 0) then if(residRgAccessor .ne. 0) then
call getLineSequential(residRgAccessor, residrg, lineNum) call getLineSequential(residRgAccessor, residrg, lineNum)
endif endif
cout=cmplx(0.,0.) cout=cmplx(0.,0.)
!!!Start of the parallel loop !!!Start of the parallel loop
@ -188,7 +190,7 @@
!$OMP shared(rgCarrier,azCarrier,outwidth,inwidth,dopplerPoly)& !$OMP shared(rgCarrier,azCarrier,outwidth,inwidth,dopplerPoly)&
!$OMP shared(REFR0, REFSLR, R0, REFWVL) !$OMP shared(REFR0, REFSLR, R0, REFWVL)
do i=1,outwidth do i=1,outwidth
!!!Get thread number !!!Get thread number
thnum = omp_get_thread_num() + 1 thnum = omp_get_thread_num() + 1
@ -199,14 +201,14 @@
r_ao = evalPoly2d_f(azOffsetsPoly,r_at,r_rt) + residaz(i) r_ao = evalPoly2d_f(azOffsetsPoly,r_at,r_rt) + residaz(i)
k=int(i+r_ro) !range offset k=floor(i+r_ro) !range offset
fracr=i+r_ro-k fracr=i+r_ro-k
if ((k .le. sinchalf) .or. (k.ge.(inwidth-sinchalf))) then if ((k .le. sinchalf) .or. (k.ge.(inwidth-sinchalf))) then
cycle cycle
endif endif
kk=int(j+r_ao) !azimuth offset kk=floor(j+r_ao) !azimuth offset
fraca=j+r_ao-kk fraca=j+r_ao-kk
if ((kk .le. sinchalf) .or. (kk.ge.(inlength-sinchalf))) then if ((kk .le. sinchalf) .or. (kk.ge.(inlength-sinchalf))) then
@ -228,7 +230,7 @@
chip(ii,jj,thnum) = cin(chipi,chipj)*cval chip(ii,jj,thnum) = cin(chipi,chipj)*cval
end do end do
end do end do
!!!Doppler to be added back !!!Doppler to be added back
r_ph = r_dop*fraca r_ph = r_dop*fraca
@ -261,7 +263,7 @@
print *, 'Real data interpolation not implemented yet.' print *, 'Real data interpolation not implemented yet.'
endif endif
@ -289,5 +291,5 @@
!Reset number of threads !Reset number of threads
call omp_set_num_threads(ithorig) call omp_set_num_threads(ithorig)
end end

View File

@ -126,7 +126,7 @@ cdef class PyPoly2d:
newpoly.c_poly2d = poly newpoly.c_poly2d = poly
newpoly.owner = False newpoly.owner = False
return newpoly return newpoly
def setCoeff(self, int a, int b, double c): def setCoeff(self, int a, int b, double c):
self.c_poly2d.setCoeff(a,b,c) self.c_poly2d.setCoeff(a,b,c)
def getCoeff(self, int a, int b): def getCoeff(self, int a, int b):
@ -163,15 +163,15 @@ cdef extern from "ResampSlc.h":
void clearPolys() void clearPolys()
void resetPolys() void resetPolys()
void resamp() void resamp()
cdef class PyResampSlc: cdef class PyResampSlc:
cdef ResampSlc *c_resamp cdef ResampSlc *c_resamp
def __cinit__(self): def __cinit__(self):
self.c_resamp = new ResampSlc() self.c_resamp = new ResampSlc()
def __dealloc__(self): #def __dealloc__(self):
del self.c_resamp # del self.c_resamp
@property @property
def slcInAccessor(self): def slcInAccessor(self):
@ -335,7 +335,7 @@ cdef class PyResampSlc:
cdef PyPoly2d poly = PyPoly2d.boundTo(self.c_resamp.releaseDoppler()) cdef PyPoly2d poly = PyPoly2d.boundTo(self.c_resamp.releaseDoppler())
poly.owner = True poly.owner = True
return poly return poly
def resamp_slc(self): def resamp_slc(self):
self.c_resamp.resamp() self.c_resamp.resamp()

View File

@ -15,14 +15,16 @@
#define SINC_HALF (SINC_LEN/2) #define SINC_HALF (SINC_LEN/2)
#define SINC_ONE (SINC_LEN+1) #define SINC_ONE (SINC_LEN+1)
#define IDX1D(i,j,w) (((i)*(w))+(j)) #define IDX1D(i,j,w) (((i)*(w))+(j))
#define modulo_f(a,b) fmod(fmod(a,b)+(b),(b)) #define modulo_f(a,b) fmod(fmod(a,b)+(b),(b))
struct InputData { struct InputData {
cuFloatComplex *imgIn; cuFloatComplex *imgIn;
cuFloatComplex *imgOut; cuFloatComplex *imgOut;
double *residAz; float *residAz;
double *residRg; float *residRg;
double *azOffPoly; double *azOffPoly;
double *rgOffPoly; double *rgOffPoly;
double *dopPoly; double *dopPoly;
@ -38,7 +40,7 @@ __constant__ int ini[8];
// GPU Helper Functions // GPU Helper Functions
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
// Data usage: 8 doubles/pointers, 2 ints -- 72 bytes/call // Data usage: 8 floats/pointers, 2 ints -- 72 bytes/call
__device__ double evalPolyAt(double *polyArr, double azi, double rng) { __device__ double evalPolyAt(double *polyArr, double azi, double rng) {
// C-style eval method of Poly2d (adjusted to work with the array-format Poly2d where: // C-style eval method of Poly2d (adjusted to work with the array-format Poly2d where:
// polyArr[0] = azimuthOrder // polyArr[0] = azimuthOrder
@ -49,7 +51,7 @@ __device__ double evalPolyAt(double *polyArr, double azi, double rng) {
// polyArr[5] = rangeNorm // polyArr[5] = rangeNorm
// polyArr[6...] = coeffs (len ([0]+1)*([1]+1)) // polyArr[6...] = coeffs (len ([0]+1)*([1]+1))
// Therefore we can guarantee that polyArr has at least 7 elements, and intuitively stores its own length using the orders // Therefore we can guarantee that polyArr has at least 7 elements, and intuitively stores its own length using the orders
double val, scalex, scaley, xval, yval; double val, scalex, scaley, xval, yval;
int i, j; int i, j;
val = 0.; val = 0.;
@ -65,123 +67,120 @@ __device__ double evalPolyAt(double *polyArr, double azi, double rng) {
return val; return val;
} }
// Data usage: 4 doubles, 1 int -- 36 bytes/call __global__ void removeCarrier(struct InputData inData) {
__device__ cuFloatComplex fintp(int idx) { // remove the carriers from input slc
// Replaces storing ~4MB and copying in/out of constant storage by calculating an element of fintp on the fly // thread id, as the pixel index for the input image
int pix = blockDim.x * blockIdx.x + threadIdx.x;
double weight, filtFact, sinFact, i; // check the thread range
i = int(int(idx / SINC_LEN) + ((idx % SINC_LEN) * (SINC_SUB / 2)) - 1) - 16383.5; // ini[0] - inLength
weight = .5 + (.5 * cos((M_PI * i) / 16383.5)); // ini[1] - inWidth
sinFact = i * (.75 / (SINC_SUB / 2)); if(pix >= ini[0]*ini[1])
filtFact = (sin(M_PI * sinFact) / (M_PI * sinFact)) * weight; // No need to check if sinFact is 0 since SINC_SUB != 0 and i-16383.5 != 0 as i is an int return;
return make_cuFloatComplex(filtFact*weight, 0.);
}
// Data usage: 6 double/complex/pointers, 4 ints -- 64 bytes/call // get pixel location along azimuth/range
// Add call to fintp (36 bytes) -- 100 bytes/call int idxi = pix/ini[1];
__device__ cuFloatComplex sinc_interp(cuFloatComplex *chip, double fraca, double fracr, double dop, float *fintp) { int idxj = pix%ini[1];
// This is a modified/hardwired form of sinc_eval_2d from Interpolator. We eliminate a couple of checks that we know will pass, and
// adjusted the algorithm to account for modifications to the main kernel below. Primarily three things are of interest:
// 1. Chip is actually a pointer to the top-left of the chip location in the main image block, so chip's 'width' actually is
// ini[1], not SINC_ONE.
// 2. We account for removing doppler effects using cval and taking in dop. In the older version of the main kernel, this
// value was calculated and multiplied as the data was copied into the smaller chip. Here we calculate it on the fly using
// the same index for 'ii' as the row index of the chip being operated on (so in this case since it happens from the bottom
// up, we use SINC_LEN-i instead of i).
cuFloatComplex ret = make_cuFloatComplex(0.,0.); // the poly uses fortran 1-indexing
cuFloatComplex cval; double r_i = idxi +1;
int ifracx, ifracy, i, j; double r_j = idxj +1;
// get the phase shift due to carriers
ifracx = min(max(0,int(fraca*SINC_SUB)), SINC_SUB-1); double ph = evalPolyAt(inData.rgCarrierPoly, r_i, r_j) +
ifracy = min(max(0,int(fracr*SINC_SUB)), SINC_SUB-1); evalPolyAt(inData.azCarrierPoly, r_i, r_j);
for (i=0; i<SINC_LEN; i++) { ph = modulo_f(ph, 2.*M_PI);
cval = make_cuFloatComplex(cos((SINC_LEN-i-4.)*dop), -sin((SINC_LEN-i-4.)*dop)); // remove the phase shift from the data
for (j=0; j<SINC_LEN; j++) { cuFloatComplex cval = cuCmulf(inData.imgIn[pix], make_cuFloatComplex(cosf(ph), -sinf(ph)));
ret = cuCaddf(ret, // assign the new value
cuCmulf( inData.imgIn[pix] = cval;
cuCmulf(chip[IDX1D(SINC_LEN-i,SINC_LEN-j,ini[1])], cval), // all done
make_cuFloatComplex(fintp[IDX1D(ifracx,i,SINC_LEN)]*fintp[IDX1D(ifracy,j,SINC_LEN)], 0.)
)
);
}
}
return ret;
} }
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
// GPU Main Kernel // GPU Main Kernel
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
// Data Usage: 15 pointers/doubles, 5 ints, 1 bool -- 144 bytes/call (assuming 1 bool ==> 1 int) // Data Usage: 15 pointers/floats, 5 ints, 1 bool -- 144 bytes/call (assuming 1 bool ==> 1 int)
// Add call to sinc_interp (100 bytes/call) -- 244 bytes/call (for funsies let's assume ~250 bytes/call) // Add call to sinfc_interp (100 bytes/call) -- 244 bytes/call (for funsies let's assume ~250 bytes/call)
// NOTE: We ignore calls to evalPolyAt since they have less // NOTE: We ignore calls to evalPolyAt sinfce they have less
// data usage and therefore do not really matter for // data usage and therefore do not really matter for
// max data usage // max data usage
__global__ void GPUResamp(struct InputData inData) { __global__ void GPUResamp(struct InputData inData) {
// Main GPU ResampSlc kernel, slightly modified from original algorithm to save significant space // Main GPU ResampSlc kernel, slightly modified from original algorithm to save significant space
int pix = (blockDim.x * blockIdx.x) + threadIdx.x; int pix = blockDim.x * blockIdx.x + threadIdx.x;
if (pix < (ini[2] * ini[6])) { // check within outWidth*LINES_PER_TILE
//cuFloatComplex chip[SINC_ONE*SINC_ONE]; if (pix >= (ini[2] * ini[6]))
cuFloatComplex cval; return;
double ao, ro, fracr, fraca, ph, dop;
int k, kk, idxi, idxj; //chipi, chipj, ii, jj;
bool flag;
flag = false; // index along row/azimuth
idxi = (pix / ini[2]) + ini[4]; int idxi = (pix / ini[2]) + ini[4];
idxj = (pix % ini[2]); // index along width/range
int idxj = (pix % ini[2]);
ao = evalPolyAt(inData.azOffPoly, idxi+1, idxj+1); // offset
ro = evalPolyAt(inData.rgOffPoly, idxi+1, idxj+1); // note that the polys use 1-indexing in Fortran code
double ao = evalPolyAt(inData.azOffPoly, idxi+1, idxj+1) + inData.residAz[pix];
double ro = evalPolyAt(inData.rgOffPoly, idxi+1, idxj+1) + inData.residRg[pix];
k = idxi + ao; // azimuth coordinate
fraca = idxi + ao - k; int ka = floor(idxi + ao);
if ((k < SINC_HALF) || (k >= (ini[0]-SINC_HALF))) flag = true; double fraca = idxi + ao - ka;
// range coordinate
if (!flag) { int kr = floor(idxj + ro);
kk = idxj + ro; double fracr = idxj + ro - kr;
fracr = idxj + ro - kk; // check whether the pixel is out of the interpolation region
if ((kk < SINC_HALF) || (kk >= (ini[1]-SINC_HALF))) flag = true; if ((ka < SINC_HALF) || ( ka >= (ini[0]-SINC_HALF))
|| (kr < SINC_HALF) || (kr >= (ini[1]-SINC_HALF)))
{
// out of range
inData.imgOut[pix] = make_cuFloatComplex(0., 0.);
return;
}
if (!flag) { // in range, continue
dop = evalPolyAt(inData.dopPoly, idxi+1, idxj+1);
/* // evaluate the doppler phase at the secondary coordinate
for (ii=0; ii<SINC_ONE; ii++) { double dop = evalPolyAt(inData.dopPoly, idxi+1+ao, idxj+1+ro);
chipi = k - ini[3] + ii - SINC_HALF;
cval = make_cuFloatComplex(cos((ii-4.)*dop), -sin((ii-4.)*dop));
for (jj=0; jj<SINC_ONE; jj++) {
chipj = kk + jj - SINC_HALF;
chip[IDX1D(ii,jj,SINC_ONE)] = cuCmulf(inData.imgIn[IDX1D(chipi,chipj,ini[1])], cval);
}
}
top = k - ini[3] - SINC_HALF; // phase corrections to be added later
left = kk - SINC_HALF; double ph = (dop * fraca) + evalPolyAt(inData.rgCarrierPoly, idxi+1+ao, idxj+1+ro) +
topLeft = IDX1D(top,left,ini[1]); evalPolyAt(inData.azCarrierPoly, idxi+1+ao, idxj+1+ro);
*/
ph = (dop * fraca) + evalPolyAt(inData.rgCarrierPoly, idxi+ao, idxj+ro) + evalPolyAt(inData.azCarrierPoly, idxi+ao, idxj+ro); // if flatten
if (ini[7] == 1)
ph = ph + ((4.*(M_PI/ind[0]))*((ind[2]-ind[3])+(idxj*(ind[4]-ind[5]))+(ro*ind[4])))
+((4.*M_PI*(ind[3]+(idxj*ind[5])))*((1./ind[1])-(1./ind[0])));
if (ini[7] == 1) ph = modulo_f(ph, 2.*M_PI);
ph = ph + ((4.*(M_PI/ind[0]))*((ind[2]-ind[3])+(idxj*(ind[4]-ind[5]))+(ro*ind[4])))+((4.*M_PI*(ind[3]+(idxj*ind[5])))*((1./ind[1])-(1./ind[0])));
ph = modulo_f(ph, 2.*M_PI); // temp variable to keep track of the interpolated value
// NOTE: This has been modified to pass the pointer to the "top left" location of what used to be the 'chip' copy of data from imgIn. This cuFloatComplex cval = make_cuFloatComplex(0.,0.);
// saves allocating 81 extra floats per pixel (since chip is SINC_ONE*SINC_ONE), and instead uses logic to determine the offsets. The // get the indices in the sinfc_coef of the fractional parts
// logic simply takes the minimum values of chipi and chipj above and adds the IDX1D index to the pointer. This means that sinc_interp int ifraca = int(fraca*SINC_SUB);
// will pass this pointer as "chip" and it will still point to the correct values (since chip is just a 2D window subset of imgIn). We int ifracr = int(fracr*SINC_SUB);
// Also have to account for 'cval' by modifying the sinc_interp function to calculate 'cval' dynamically (no extra cost computationally).
// This should actually speed up the kernel as well as significantly reduce redundant data usage. // weight for sinfc interp coefficients
cval = sinc_interp((inData.imgIn + IDX1D(k-ini[3]-SINC_HALF,kk-SINC_HALF,ini[1])), fraca, fracr, dop, inData.fintp); float weightsum = 0.;
inData.imgOut[pix] = cuCmulf(cval, make_cuFloatComplex(cos(ph), sin(ph)));
} // iterate over the interpolation zone, e.g. [-3, 4] x [-3, 4] for SINC_LEN = 8
for (int i=-SINC_HALF+1; i<=SINC_HALF; i++) {
cuFloatComplex cdop = make_cuFloatComplex(cosf(i*dop), -sinf(i*dop));
for (int j=-SINC_HALF+1; j<=SINC_HALF; j++) {
float weight = inData.fintp[IDX1D(ifraca,SINC_HALF-i,SINC_LEN)]
*inData.fintp[IDX1D(ifracr,SINC_HALF-j,SINC_LEN)];
// correct the doppler phase here
cuFloatComplex cin = cuCmulf(inData.imgIn[IDX1D(i+ka,j+kr,ini[1])], cdop);
cval = cuCaddf(cval, make_cuFloatComplex(cuCrealf(cin)*weight, cuCimagf(cin)*weight));
weightsum += weight;
} }
} }
// normalize
cval = make_cuFloatComplex(cuCrealf(cval)/weightsum, cuCimagf(cval)/weightsum);
// phase correction
cval = cuCmulf(cval, make_cuFloatComplex(cosf(ph), sinf(ph)));
// assign and return
inData.imgOut[pix] = cval;
} }
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
@ -189,7 +188,7 @@ __global__ void GPUResamp(struct InputData inData) {
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
double cpuSecond() { double cpuSecond() {
struct timeval tp; struct timeval tp;
gettimeofday(&tp,NULL); gettimeofday(&tp,NULL);
return (double(tp.tv_sec) + double(tp.tv_usec)*1.e-6); return (double(tp.tv_sec) + double(tp.tv_usec)*1.e-6);
@ -208,8 +207,10 @@ void checkKernelErrors() {
// Main CPU Function // Main CPU Function
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgOut, double *residAz, double *residRg, double *azOffPoly, double *rgOffPoly, void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgOut,
double *dopPoly, double *azCarrierPoly, double *rgCarrierPoly, float *fintp) { float *residAz, float *residRg, double *azOffPoly, double *rgOffPoly,
double *dopPoly, double *azCarrierPoly, double *rgCarrierPoly, float *fintp)
{
/* * * * * * * * * * * * * * * * * * * * /* * * * * * * * * * * * * * * * * * * *
* Input mapping - * Input mapping -
* *
@ -237,14 +238,15 @@ void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgO
// Create handles for device copies of inputs // Create handles for device copies of inputs
cuFloatComplex *d_imgIn, *d_imgOut; cuFloatComplex *d_imgIn, *d_imgOut;
double *d_residAz, *d_residRg, *d_azOffPoly, *d_rgOffPoly, *d_dopPoly, *d_azCarrierPoly, *d_rgCarrierPoly; float *d_residAz, *d_residRg;
double *d_azOffPoly, *d_rgOffPoly, *d_dopPoly, *d_azCarrierPoly, *d_rgCarrierPoly;
float *d_fintp; float *d_fintp;
double startRun, endRun, startKernel, endKernel; double startRun, endRun, startKernel, endKernel;
struct InputData inData; struct InputData inData;
printf("\n Initializing GPU ResampSlc\n"); printf("\n Initializing GPU ResampSlc\n");
cudaSetDevice(0); cudaSetDevice(0);
@ -267,18 +269,18 @@ void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgO
size_t nb_in = nInPix * sizeof(cuFloatComplex); size_t nb_in = nInPix * sizeof(cuFloatComplex);
size_t nb_out = nOutPix * sizeof(cuFloatComplex); size_t nb_out = nOutPix * sizeof(cuFloatComplex);
size_t nb_rsdAz = nResidAzPix * sizeof(double); size_t nb_rsdAz = nResidAzPix * sizeof(float);
size_t nb_rsdRg = nResidRgPix * sizeof(double); size_t nb_rsdRg = nResidRgPix * sizeof(float);
size_t nb_azOff = nAzOffPix * sizeof(double); size_t nb_azOff = nAzOffPix * sizeof(double);
size_t nb_rgOff = nRgOffPix * sizeof(double); size_t nb_rgOff = nRgOffPix * sizeof(double);
size_t nb_dop = nDopPix * sizeof(double); size_t nb_dop = nDopPix * sizeof(double);
size_t nb_azCarry = nAzCarryPix * sizeof(double); size_t nb_azCarry = nAzCarryPix * sizeof(double);
size_t nb_rgCarry = nRgCarryPix * sizeof(double); size_t nb_rgCarry = nRgCarryPix * sizeof(double);
cudaMalloc((cuFloatComplex**)&d_imgIn, nb_in); cudaMalloc((cuFloatComplex**)&d_imgIn, nb_in);
cudaMalloc((cuFloatComplex**)&d_imgOut, nb_out); cudaMalloc((cuFloatComplex**)&d_imgOut, nb_out);
if (residAz != 0) cudaMalloc((double**)&d_residAz, nb_rsdAz); if (residAz != 0) cudaMalloc((float**)&d_residAz, nb_rsdAz);
if (residRg != 0) cudaMalloc((double**)&d_residRg, nb_rsdRg); if (residRg != 0) cudaMalloc((float**)&d_residRg, nb_rsdRg);
cudaMalloc((double**)&d_azOffPoly, nb_azOff); cudaMalloc((double**)&d_azOffPoly, nb_azOff);
cudaMalloc((double**)&d_rgOffPoly, nb_rgOff); cudaMalloc((double**)&d_rgOffPoly, nb_rgOff);
cudaMalloc((double**)&d_dopPoly, nb_dop); cudaMalloc((double**)&d_dopPoly, nb_dop);
@ -309,10 +311,7 @@ void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgO
endKernel = cpuSecond(); endKernel = cpuSecond();
printf("Done. (%f s.)\n", (endKernel-startKernel)); printf("Done. (%f s.)\n", (endKernel-startKernel));
dim3 block(32);
dim3 grid(int((nInPix + 31) / 32));
if ((grid.x * 32) > nInPix) printf(" (DEBUG: There will be %d 'empty' threads in the last thread block).\n", ((grid.x*32)-nInPix));
printf(" Running GPU ResampSlc... "); printf(" Running GPU ResampSlc... ");
fflush(stdout); fflush(stdout);
@ -332,11 +331,18 @@ void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgO
inData.rgCarrierPoly = d_rgCarrierPoly; inData.rgCarrierPoly = d_rgCarrierPoly;
inData.fintp = d_fintp; inData.fintp = d_fintp;
GPUResamp <<<grid, block>>>(inData); // remove carriers from the input image
int threads = 1024;
int blocks = (nInPix + threads-1) / threads;
removeCarrier<<<blocks, threads>>>(inData);
checkKernelErrors();
// resample
blocks = (nOutPix + threads -1) / threads;
GPUResamp <<<blocks, threads>>>(inData);
checkKernelErrors(); checkKernelErrors();
endKernel = cpuSecond(); endKernel = cpuSecond();
printf("Done. (%f s.)\n", (endKernel-startKernel)); printf("Done. (%f s.)\n", (endKernel-startKernel));
printf(" Copying memory back to host... "); printf(" Copying memory back to host... ");
@ -364,6 +370,6 @@ void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgO
cudaFree(d_rgCarrierPoly); cudaFree(d_rgCarrierPoly);
cudaFree(d_fintp); cudaFree(d_fintp);
cudaDeviceReset(); cudaDeviceReset();
printf(" Exiting GPU ResampSlc\n\n"); printf(" Exiting GPU ResampSlc\n\n");
} }

View File

@ -6,6 +6,6 @@
#ifndef GPU_RESAMP_H #ifndef GPU_RESAMP_H
#define GPU_RESAMP_H #define GPU_RESAMP_H
void runGPUResamp(double*,int*,void*,void*,double*,double*,double*,double*,double*,double*,double*,float*); void runGPUResamp(double*,int*,void*,void*,float*,float*,double*,double*,double*,double*,double*,float*);
#endif #endif

View File

@ -15,18 +15,18 @@ struct Interpolator {
template<class U> template<class U>
static U bilinear(double,double,std::vector<std::vector<U>>&); static U bilinear(double,double,std::vector<std::vector<U>>&);
template<class U> template<class U>
static U bicubic(double,double,std::vector<std::vector<U>>&); static U bicubic(double,double,std::vector<std::vector<U>>&);
static void sinc_coef(double,double,int,double,int,int&,int&,std::vector<double>&); static void sinc_coef(double,double,int,double,int,int&,int&,std::vector<double>&);
template<class U, class V> template<class U, class V>
static U sinc_eval(std::vector<U>&,std::vector<V>&,int,int,int,double,int); static U sinc_eval(std::vector<U>&,std::vector<V>&,int,int,int,double,int);
template<class U, class V> template<class U, class V>
static U sinc_eval_2d(std::vector<std::vector<U>>&,std::vector<V>&,int,int,int,int,double,double,int,int); static U sinc_eval_2d(std::vector<std::vector<U>>&,std::vector<V>&,int,int,int,int,double,double,int,int);
static float interp_2d_spline(int,int,int,std::vector<std::vector<float>>&,double,double); static float interp_2d_spline(int,int,int,std::vector<std::vector<float>>&,double,double);
static double quadInterpolate(std::vector<double>&,std::vector<double>&,double); static double quadInterpolate(std::vector<double>&,std::vector<double>&,double);
static double akima(int,int,std::vector<std::vector<float>>&,double,double); static double akima(int,int,std::vector<std::vector<float>>&,double,double);

View File

@ -14,7 +14,7 @@ using std::vector;
struct ResampMethods { struct ResampMethods {
vector<float> fintp; vector<float> fintp;
float f_delay; float f_delay;
ResampMethods(); ResampMethods();
void prepareMethods(int); void prepareMethods(int);
complex<float> interpolate_cx(vector<vector<complex<float> > >&,int,int,double,double,int,int,int); complex<float> interpolate_cx(vector<vector<complex<float> > >&,int,int,double,double,int,int,int);

View File

@ -34,6 +34,9 @@ struct ResampSlc {
void clearPolys(); void clearPolys();
void resetPolys(); void resetPolys();
void resamp(); void resamp();
void _resamp_cpu();
void _resamp_gpu();
}; };
#endif #endif

View File

@ -24,7 +24,7 @@ using std::vector;
template<class U> template<class U>
U Interpolator::bilinear(double x, double y, vector<vector<U>> &z) { U Interpolator::bilinear(double x, double y, vector<vector<U>> &z) {
int x1 = floor(x); int x1 = floor(x);
int x2 = ceil(x); int x2 = ceil(x);
int y1 = ceil(y); int y1 = ceil(y);
@ -54,7 +54,7 @@ template float Interpolator::bilinear(double,double,vector<vector<float>>&);
template<class U> template<class U>
U Interpolator::bicubic(double x, double y, vector<vector<U>> &z) { U Interpolator::bicubic(double x, double y, vector<vector<U>> &z) {
vector<vector<float>> wt = {{1.0, 0.0,-3.0, 2.0, 0.0, 0.0, 0.0, 0.0,-3.0, 0.0, 9.0,-6.0, 2.0, 0.0,-6.0, 4.0}, vector<vector<float>> wt = {{1.0, 0.0,-3.0, 2.0, 0.0, 0.0, 0.0, 0.0,-3.0, 0.0, 9.0,-6.0, 2.0, 0.0,-6.0, 4.0},
{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0,-9.0, 6.0,-2.0, 0.0, 6.0,-4.0}, {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0,-9.0, 6.0,-2.0, 0.0, 6.0,-4.0},
{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0,-6.0, 0.0, 0.0,-6.0, 4.0}, {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0,-6.0, 0.0, 0.0,-6.0, 4.0},
@ -76,9 +76,9 @@ U Interpolator::bicubic(double x, double y, vector<vector<U>> &z) {
int x2 = ceil(x) - 1; int x2 = ceil(x) - 1;
int y1 = floor(y) - 1; int y1 = floor(y) - 1;
int y2 = ceil(y) - 1; int y2 = ceil(y) - 1;
vector<U> zz = {z[y1][x1], z[y1][x2], z[y2][x2], z[y2][x1]}; vector<U> zz = {z[y1][x1], z[y1][x2], z[y2][x2], z[y2][x1]};
vector<U> dzdx = {(z[y1][x1+1] - z[y1][x1-1]) / static_cast<U>(2.0), (z[y1][x2+1] - z[y1][x2-1]) / static_cast<U>(2.0), vector<U> dzdx = {(z[y1][x1+1] - z[y1][x1-1]) / static_cast<U>(2.0), (z[y1][x2+1] - z[y1][x2-1]) / static_cast<U>(2.0),
(z[y2][x2+1] - z[y2][x2-1]) / static_cast<U>(2.0), (z[y2][x1+1] - z[y2][x1-1]) / static_cast<U>(2.0)}; (z[y2][x2+1] - z[y2][x2-1]) / static_cast<U>(2.0), (z[y2][x1+1] - z[y2][x1-1]) / static_cast<U>(2.0)};
vector<U> dzdy = {(z[y1+1][x1] - z[y1-1][x1]) / static_cast<U>(2.0), (z[y1+1][x2+1] - z[y1-1][x2]) / static_cast<U>(2.0), vector<U> dzdy = {(z[y1+1][x1] - z[y1-1][x1]) / static_cast<U>(2.0), (z[y1+1][x2+1] - z[y1-1][x2]) / static_cast<U>(2.0),
(z[y2+1][x2+1] - z[y2-1][x2]) / static_cast<U>(2.0), (z[y2+1][x1+1] - z[y2-1][x1]) / static_cast<U>(2.0)}; (z[y2+1][x2+1] - z[y2-1][x2]) / static_cast<U>(2.0), (z[y2+1][x1+1] - z[y2-1][x1]) / static_cast<U>(2.0)};
@ -86,7 +86,7 @@ U Interpolator::bicubic(double x, double y, vector<vector<U>> &z) {
static_cast<U>(.25)*(z[y1+1][x2+1] - z[y1-1][x2+1] - z[y1+1][x2-1] + z[y1-1][x2-1]), static_cast<U>(.25)*(z[y1+1][x2+1] - z[y1-1][x2+1] - z[y1+1][x2-1] + z[y1-1][x2-1]),
static_cast<U>(.25)*(z[y2+1][x2+1] - z[y2-1][x2+1] - z[y2+1][x2-1] + z[y2-1][x2-1]), static_cast<U>(.25)*(z[y2+1][x2+1] - z[y2-1][x2+1] - z[y2+1][x2-1] + z[y2-1][x2-1]),
static_cast<U>(.25)*(z[y2+1][x1+1] - z[y2-1][x1+1] - z[y2+1][x1-1] + z[y2-1][x1-1])}; static_cast<U>(.25)*(z[y2+1][x1+1] - z[y2-1][x1+1] - z[y2+1][x1-1] + z[y2-1][x1-1])};
vector<U> q(16); vector<U> q(16);
for (int i=0; i<4; i++) { for (int i=0; i<4; i++) {
q[i] = zz[i]; q[i] = zz[i];
@ -101,7 +101,7 @@ U Interpolator::bicubic(double x, double y, vector<vector<U>> &z) {
c[i] += static_cast<U>(wt[i][j]) * q[j]; c[i] += static_cast<U>(wt[i][j]) * q[j];
} }
} }
U t = x - x1; U t = x - x1;
U u = y - y1; U u = y - y1;
U ret = 0.; U ret = 0.;
@ -116,17 +116,18 @@ template float Interpolator::bicubic(double,double,vector<vector<float>>&);
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
void Interpolator::sinc_coef(double beta, double relfiltlen, int decfactor, double pedestal, int weight, int &intplength, int &filtercoef, vector<double> &filter) { void Interpolator::sinc_coef(double beta, double relfiltlen, int decfactor, double pedestal, int weight,
int &intplength, int &filtercoef, vector<double> &filter) {
intplength = round(relfiltlen / beta);
intplength = rint(relfiltlen / beta);
filtercoef = intplength * decfactor; filtercoef = intplength * decfactor;
double wgthgt = 1. - (pedestal / 2.); double wgthgt = (1. - pedestal) / 2.;
double soff = (filtercoef - 1.) / 2.; double soff = filtercoef / 2.;
double wgt, s, fct; double wgt, s, fct;
for (int i=0; i<filtercoef; i++) { for (int i=0; i<filtercoef; i++) {
wgt = (1. - wgthgt) + (wgthgt * cos((M_PI * (i - soff)) / soff)); wgt = (1. - wgthgt) + wgthgt * cos(M_PI * (i - soff) / soff);
s = (floor(i - soff) * beta) / (1. * decfactor); s = (i - soff) * beta / (1. * decfactor);
if (s != 0.) fct = sin(M_PI * s) / (M_PI * s); if (s != 0.) fct = sin(M_PI * s) / (M_PI * s);
else fct = 1.; else fct = 1.;
if (weight == 1) filter[i] = fct * wgt; if (weight == 1) filter[i] = fct * wgt;
@ -138,7 +139,7 @@ void Interpolator::sinc_coef(double beta, double relfiltlen, int decfactor, doub
template<class U, class V> template<class U, class V>
U Interpolator::sinc_eval(vector<U> &arr, vector<V> &intarr, int idec, int ilen, int intp, double frp, int nsamp) { U Interpolator::sinc_eval(vector<U> &arr, vector<V> &intarr, int idec, int ilen, int intp, double frp, int nsamp) {
U ret = 0.; U ret = 0.;
if ((intp >= (ilen-1)) && (intp < nsamp)) { if ((intp >= (ilen-1)) && (intp < nsamp)) {
int ifrc = min(max(0, int(frp*idec)), idec-1); int ifrc = min(max(0, int(frp*idec)), idec-1);
@ -160,7 +161,7 @@ template float Interpolator::sinc_eval(vector<float>&,vector<float>&,int,int,int
template<class U, class V> template<class U, class V>
U Interpolator::sinc_eval_2d(vector<vector<U>> &arrin, vector<V> &intarr, int idec, int ilen, int intpx, int intpy, double frpx, double frpy, int xlen, int ylen) { U Interpolator::sinc_eval_2d(vector<vector<U>> &arrin, vector<V> &intarr, int idec, int ilen, int intpx, int intpy, double frpx, double frpy, int xlen, int ylen) {
U ret(0.); U ret(0.);
if ((intpx >= (ilen-1)) && (intpx < xlen) && (intpy >= (ilen-1)) && (intpy < ylen)) { if ((intpx >= (ilen-1)) && (intpx < xlen) && (intpy >= (ilen-1)) && (intpy < ylen)) {
int ifracx = min(max(0, int(frpx*idec)), idec-1); int ifracx = min(max(0, int(frpx*idec)), idec-1);
@ -263,7 +264,7 @@ double Interpolator::quadInterpolate(vector<double> &x, vector<double> &y, doubl
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
double Interpolator::akima(int nx, int ny, vector<vector<float>> &z, double x, double y) { double Interpolator::akima(int nx, int ny, vector<vector<float>> &z, double x, double y) {
vector<vector<double>> sx(2,vector<double>(2)), sy(2,vector<double>(2)), sxy(2,vector<double>(2)), e(2,vector<double>(2)); vector<vector<double>> sx(2,vector<double>(2)), sy(2,vector<double>(2)), sxy(2,vector<double>(2)), e(2,vector<double>(2));
vector<double> m(4); vector<double> m(4);
double wx2,wx3,wy2,wy3; double wx2,wx3,wy2,wy3;
@ -339,7 +340,7 @@ double Interpolator::akima(int nx, int ny, vector<vector<float>> &z, double x, d
poly[13] = -((3 * (z[ix-1][iy-1] - z[ix-1][iy])) + ((2 * sy[0][0]) + sy[0][1])); poly[13] = -((3 * (z[ix-1][iy-1] - z[ix-1][iy])) + ((2 * sy[0][0]) + sy[0][1]));
poly[14] = sy[0][0]; poly[14] = sy[0][0];
poly[15] = z[ix-1][iy-1]; poly[15] = z[ix-1][iy-1];
//return polyvalAkima(int(x),int(y),x,y,poly); //return polyvalAkima(int(x),int(y),x,y,poly);
m[0] = (((((poly[0] * (y - iy)) + poly[1]) * (y - iy)) + poly[2]) * (y - iy)) + poly[3]; m[0] = (((((poly[0] * (y - iy)) + poly[1]) * (y - iy)) + poly[2]) * (y - iy)) + poly[3];
m[1] = (((((poly[4] * (y - iy)) + poly[5]) * (y - iy)) + poly[6]) * (y - iy)) + poly[7]; m[1] = (((((poly[4] * (y - iy)) + poly[5]) * (y - iy)) + poly[6]) * (y - iy)) + poly[7];

View File

@ -19,7 +19,7 @@ ResampMethods::ResampMethods() {
void ResampMethods::prepareMethods(int method) { void ResampMethods::prepareMethods(int method) {
if (method == SINC_METHOD) { if (method == SINC_METHOD) {
vector<double> filter((SINC_SUB*SINC_LEN)+1); vector<double> filter(SINC_SUB*SINC_LEN);
double ssum; double ssum;
int intplength, filtercoef; int intplength, filtercoef;
Interpolator interp; Interpolator interp;
@ -28,16 +28,7 @@ void ResampMethods::prepareMethods(int method) {
interp.sinc_coef(1.,SINC_LEN,SINC_SUB,0.,1,intplength,filtercoef,filter); interp.sinc_coef(1.,SINC_LEN,SINC_SUB,0.,1,intplength,filtercoef,filter);
// Normalize filter // note also the type conversion
for (int i=0; i<SINC_SUB; i++) {
ssum = 0.;
for (int j=0; j<SINC_LEN; j++) {
ssum = ssum + filter[i+(j*SINC_SUB)];
}
for (int j=0; j<SINC_LEN; j++) {
filter[i+(j*SINC_SUB)] = filter[i+(j*SINC_SUB)] / ssum;
}
}
fintp.resize(SINC_SUB*SINC_LEN); fintp.resize(SINC_SUB*SINC_LEN);
for (int i=0; i<SINC_LEN; i++) { for (int i=0; i<SINC_LEN; i++) {
for (int j=0; j<SINC_SUB; j++) { for (int j=0; j<SINC_SUB; j++) {

View File

@ -144,8 +144,25 @@ void copyPolyToArr(Poly2d *poly, vector<double> &destArr) {
for (int i=0; i<((destArr[0]+1)*(destArr[1]+1)); i++) destArr[6+i] = poly->coeffs[i]; for (int i=0; i<((destArr[0]+1)*(destArr[1]+1)); i++) destArr[6+i] = poly->coeffs[i];
} }
void ResampSlc::resamp() {
// wrapper for calling cpu or gpu methods
void ResampSlc::resamp()
{
#ifndef GPU_ACC_ENABLED
usr_enable_gpu = false;
#endif
if (usr_enable_gpu) {
_resamp_gpu();
}
else {
_resamp_cpu();
}
}
// not checked
void ResampSlc::_resamp_cpu() {
vector<double> residAz(outWidth,0.), residRg(outWidth,0.); vector<double> residAz(outWidth,0.), residRg(outWidth,0.);
double ro, ao, ph, dop, fracr, fraca, t0, k, kk; double ro, ao, ph, dop, fracr, fraca, t0, k, kk;
@ -167,9 +184,6 @@ void ResampSlc::resamp() {
if (residAzAccessor != 0) residAzAccObj = (DataAccessor*)residAzAccessor; if (residAzAccessor != 0) residAzAccObj = (DataAccessor*)residAzAccessor;
else residAzAccObj = NULL; else residAzAccObj = NULL;
#ifndef GPU_ACC_ENABLED
usr_enable_gpu = false;
#endif
// Moving this here so we don't waste any time // Moving this here so we don't waste any time
if (!isComplex) { if (!isComplex) {
@ -220,7 +234,7 @@ void ResampSlc::resamp() {
// index from the reference input image). Then we eval the bottom 40 lines worth of pixel offsets and find the largest // index from the reference input image). Then we eval the bottom 40 lines worth of pixel offsets and find the largest
// positive row shift (the maximum row index). This gives us the number of lines to read, as well as the firstImageRow // positive row shift (the maximum row index). This gives us the number of lines to read, as well as the firstImageRow
// reference offset for the tile. Note that firstImageRow/lastImageRow are row indices relative to the reference input image. // reference offset for the tile. Note that firstImageRow/lastImageRow are row indices relative to the reference input image.
printf("Reading in image data for tile %d\n", tile); printf("Reading in image data for tile %d\n", tile);
firstImageRow = outLength - 1; // Initialize to last image row firstImageRow = outLength - 1; // Initialize to last image row
@ -232,9 +246,9 @@ void ResampSlc::resamp() {
imgLine = int(i+ao) - SINC_HALF; imgLine = int(i+ao) - SINC_HALF;
firstImageRow = min(firstImageRow, imgLine); // Set the first input image line idx to the smallest value firstImageRow = min(firstImageRow, imgLine); // Set the first input image line idx to the smallest value
} }
} }
firstImageRow = max(firstImageRow, 0); // firstImageRow now has the lowest image row called in the tile processing firstImageRow = max(firstImageRow, 0); // firstImageRow now has the lowest image row called in the tile processing
lastImageRow = 0; // Initialize to first image row lastImageRow = 0; // Initialize to first image row
for (int i=(firstTileRow+LINES_PER_TILE-40); i<(firstTileRow+LINES_PER_TILE); i++) { // Iterate over last 40 lines of tile for (int i=(firstTileRow+LINES_PER_TILE-40); i<(firstTileRow+LINES_PER_TILE); i++) { // Iterate over last 40 lines of tile
if (residAzAccessor != 0) residAzAccObj->getLine((char *)&residAz[0], i); // Read in azimuth residual if (residAzAccessor != 0) residAzAccObj->getLine((char *)&residAz[0], i); // Read in azimuth residual
@ -254,7 +268,7 @@ void ResampSlc::resamp() {
if (imgIn.size() < size_t(nRowsInBlock*inWidth)) imgIn.resize(nRowsInBlock*inWidth); if (imgIn.size() < size_t(nRowsInBlock*inWidth)) imgIn.resize(nRowsInBlock*inWidth);
for (int i=0; i<nRowsInBlock; i++) { // Read in nRowsInBlock lines of data from the input image to the image block for (int i=0; i<nRowsInBlock; i++) { // Read in nRowsInBlock lines of data from the input image to the image block
slcInAccObj->getLine((char *)&(imgIn[IDX1D(i,0,inWidth)]), firstImageRow+i); // Sets imgIn[0] == reference_image[firstImageRow] slcInAccObj->getLine((char *)&(imgIn[IDX1D(i,0,inWidth)]), firstImageRow+i); // Sets imgIn[0] == reference_image[firstImageRow]
// Remove the carriers using OpenMP acceleration // Remove the carriers using OpenMP acceleration
#pragma omp parallel for private(ph) #pragma omp parallel for private(ph)
for (int j=0; j<inWidth; j++) { for (int j=0; j<inWidth; j++) {
@ -262,133 +276,73 @@ void ResampSlc::resamp() {
imgIn[IDX1D(i,j,inWidth)] = imgIn[IDX1D(i,j,inWidth)] * complex<float>(cos(ph), -sin(ph)); // Remove the carrier imgIn[IDX1D(i,j,inWidth)] = imgIn[IDX1D(i,j,inWidth)] * complex<float>(cos(ph), -sin(ph)); // Remove the carrier
} }
} }
// Loop over lines // Loop over lines
printf("Interpolating tile %d\n", tile); printf("Interpolating tile %d\n", tile);
if (usr_enable_gpu) {
#ifdef GPU_ACC_ENABLED
vector<complex<float> > imgOutBlock(LINES_PER_TILE*outWidth);
vector<double> residAzBlock(1,0.); // Dummy length to use for GPU // Interpolation of the complex image. Note that we don't need to make very many changes to the original code in this loop
double *residAzPtr = 0; // since the i-index should numerically match the original i-index
if (residAzAccessor != 0) { for (int i=firstTileRow; i<(firstTileRow+LINES_PER_TILE); i++) {
residAzBlock.resize(LINES_PER_TILE*outWidth); // GetLineSequential is fine here, we don't need specific lines, just continue grabbing them
for (int i=0; i<LINES_PER_TILE; i++) residAzAccObj->getLineSequential((char *)&residAzBlock[i*LINES_PER_TILE]); if (residAzAccessor != 0) residAzAccObj->getLineSequential((char *)&residAz[0]);
residAzPtr = &residAzBlock[0]; if (residRgAccessor != 0) residRgAccObj->getLineSequential((char *)&residRg[0]);
}
vector<double> residRgBlock(1,0.);
double *residRgPtr = 0;
if (residRgAccessor != 0) {
residRgBlock.resize(LINES_PER_TILE*outWidth);
for (int i=0; i<LINES_PER_TILE; i++) residRgAccObj->getLineSequential((char *)&residRgBlock[i*LINES_PER_TILE]);
residRgPtr = &residRgBlock[0];
}
vector<double> azOffPolyArr(((azOffsetsPoly->azimuthOrder+1)*(azOffsetsPoly->rangeOrder+1))+6); #pragma omp parallel for private(ro,ao,fracr,fraca,ph,cval,dop,chipi,chipj,k,kk) \
vector<double> rgOffPolyArr(((rgOffsetsPoly->azimuthOrder+1)*(rgOffsetsPoly->rangeOrder+1))+6); firstprivate(chip)
vector<double> dopPolyArr(((dopplerPoly->azimuthOrder+1)*(dopplerPoly->rangeOrder+1))+6); for (int j=0; j<outWidth; j++) {
vector<double> azCarPolyArr(((azCarrier->azimuthOrder+1)*(azCarrier->rangeOrder+1))+6);
vector<double> rgCarPolyArr(((rgCarrier->azimuthOrder+1)*(rgCarrier->rangeOrder+1))+6);
copyPolyToArr(azOffsetsPoly, azOffPolyArr); // arrs are: [azord, rgord, azmean, rgmean, aznorm, rgnorm, coeffs...] ao = azOffsetsPoly->eval(i+1,j+1) + residAz[j];
copyPolyToArr(rgOffsetsPoly, rgOffPolyArr); ro = rgOffsetsPoly->eval(i+1,j+1) + residRg[j];
copyPolyToArr(dopplerPoly, dopPolyArr);
copyPolyToArr(azCarrier, azCarPolyArr);
copyPolyToArr(rgCarrier, rgCarPolyArr);
double gpu_inputs_d[6]; fraca = modf(i+ao, &k);
int gpu_inputs_i[8]; if ((k < SINC_HALF) || (k >= (inLength-SINC_HALF))) continue;
gpu_inputs_d[0] = wvl; fracr = modf(j+ro, &kk);
gpu_inputs_d[1] = refwvl; if ((kk < SINC_HALF) || (kk >= (inWidth-SINC_HALF))) continue;
gpu_inputs_d[2] = r0;
gpu_inputs_d[3] = refr0;
gpu_inputs_d[4] = slr;
gpu_inputs_d[5] = refslr;
gpu_inputs_i[0] = inLength; dop = dopplerPoly->eval(i+1,j+1);
gpu_inputs_i[1] = inWidth;
gpu_inputs_i[2] = outWidth;
gpu_inputs_i[3] = firstImageRow;
gpu_inputs_i[4] = firstTileRow;
gpu_inputs_i[5] = nRowsInBlock;
gpu_inputs_i[6] = LINES_PER_TILE;
gpu_inputs_i[7] = int(flatten);
// 1000 lines per tile is peanuts for this algorithm to run (for test set is only 20M pixels) // Data chip without the carriers
for (int ii=0; ii<SINC_ONE; ii++) {
runGPUResamp(gpu_inputs_d, gpu_inputs_i, (void*)&imgIn[0], (void*)&imgOutBlock[0], residAzPtr, residRgPtr, // Subtracting off firstImageRow removes the offset from the first row in the reference
&azOffPolyArr[0], &rgOffPolyArr[0], &dopPolyArr[0], &azCarPolyArr[0], &rgCarPolyArr[0], // image to the first row actually contained in imgIn
&(rMethods.fintp[0])); chipi = k - firstImageRow + ii - SINC_HALF;
cval = complex<float>(cos((ii-4.)*dop), -sin((ii-4.)*dop));
for (int i=0; i<LINES_PER_TILE; i++) slcOutAccObj->setLineSequential((char *)&imgOutBlock[i*outWidth]); for (int jj=0; jj<SINC_ONE; jj++) {
#endif chipj = kk + jj - SINC_HALF;
} else { // Take out doppler in azimuth
chip[ii][jj] = imgIn[IDX1D(chipi,chipj,inWidth)] * cval;
// Interpolation of the complex image. Note that we don't need to make very many changes to the original code in this loop
// since the i-index should numerically match the original i-index
for (int i=firstTileRow; i<(firstTileRow+LINES_PER_TILE); i++) {
// GetLineSequential is fine here, we don't need specific lines, just continue grabbing them
if (residAzAccessor != 0) residAzAccObj->getLineSequential((char *)&residAz[0]);
if (residRgAccessor != 0) residRgAccObj->getLineSequential((char *)&residRg[0]);
#pragma omp parallel for private(ro,ao,fracr,fraca,ph,cval,dop,chipi,chipj,k,kk) \
firstprivate(chip)
for (int j=0; j<outWidth; j++) {
ao = azOffsetsPoly->eval(i+1,j+1) + residAz[j];
ro = rgOffsetsPoly->eval(i+1,j+1) + residRg[j];
fraca = modf(i+ao, &k);
if ((k < SINC_HALF) || (k >= (inLength-SINC_HALF))) continue;
fracr = modf(j+ro, &kk);
if ((kk < SINC_HALF) || (kk >= (inWidth-SINC_HALF))) continue;
dop = dopplerPoly->eval(i+1,j+1);
// Data chip without the carriers
for (int ii=0; ii<SINC_ONE; ii++) {
// Subtracting off firstImageRow removes the offset from the first row in the reference
// image to the first row actually contained in imgIn
chipi = k - firstImageRow + ii - SINC_HALF;
cval = complex<float>(cos((ii-4.)*dop), -sin((ii-4.)*dop));
for (int jj=0; jj<SINC_ONE; jj++) {
chipj = kk + jj - SINC_HALF;
// Take out doppler in azimuth
chip[ii][jj] = imgIn[IDX1D(chipi,chipj,inWidth)] * cval;
}
} }
// Doppler to be added back. Simultaneously evaluate carrier that needs to be added back after interpolation
ph = (dop * fraca) + rgCarrier->eval(i+ao,j+ro) + azCarrier->eval(i+ao,j+ro);
// Flatten the carrier if the user wants to
if (flatten) {
ph = ph + ((4. * (M_PI / wvl)) * ((r0 - refr0) + (j * (slr - refslr)) + (ro * slr))) +
((4. * M_PI * (refr0 + (j * refslr))) * ((1. / refwvl) - (1. / wvl)));
}
ph = modulo_f(ph, 2.*M_PI);
cval = rMethods.interpolate_cx(chip,(SINC_HALF+1),(SINC_HALF+1),fraca,fracr,SINC_ONE,SINC_ONE,SINC_METHOD);
imgOut[j] = cval * complex<float>(cos(ph), sin(ph));
} }
slcOutAccObj->setLineSequential((char *)&imgOut[0]);
// Doppler to be added back. Simultaneously evaluate carrier that needs to be added back after interpolation
ph = (dop * fraca) + rgCarrier->eval(i+ao,j+ro) + azCarrier->eval(i+ao,j+ro);
// Flatten the carrier if the user wants to
if (flatten) {
ph = ph + ((4. * (M_PI / wvl)) * ((r0 - refr0) + (j * (slr - refslr)) + (ro * slr))) +
((4. * M_PI * (refr0 + (j * refslr))) * ((1. / refwvl) - (1. / wvl)));
}
ph = modulo_f(ph, 2.*M_PI);
cval = rMethods.interpolate_cx(chip,(SINC_HALF+1),(SINC_HALF+1),fraca,fracr,SINC_ONE,SINC_ONE,SINC_METHOD);
imgOut[j] = cval * complex<float>(cos(ph), sin(ph));
} }
slcOutAccObj->setLineSequential((char *)&imgOut[0]);
} }
} }
// And if there is a final partial tile... // And if there is a final partial tile...
if (lastLines > 0) { if (lastLines > 0) {
firstTileRow = nTiles * LINES_PER_TILE; firstTileRow = nTiles * LINES_PER_TILE;
nRowsInTile = outLength - firstTileRow + 1; // NOT EQUIVALENT TO NUMBER OF ROWS IN IMAGE BLOCK nRowsInTile = outLength - firstTileRow ; // NOT EQUIVALENT TO NUMBER OF ROWS IN IMAGE BLOCK
printf("Reading in image data for final partial tile\n"); printf("Reading in image data for final partial tile\n");
firstImageRow = outLength - 1; firstImageRow = outLength - 1;
for (int i=firstTileRow; i<min(firstTileRow+40,outLength); i++) { // Make sure if nRowsInTile < 40 to not read too many lines for (int i=firstTileRow; i<min(firstTileRow+40,outLength); i++) { // Make sure if nRowsInTile < 40 to not read too many lines
if (residAzAccessor != 0) residAzAccObj->getLine((char *)&residAz[0], i); if (residAzAccessor != 0) residAzAccObj->getLine((char *)&residAz[0], i);
@ -407,7 +361,7 @@ void ResampSlc::resamp() {
ao = azOffsetsPoly->eval(i+1, j+1) + residAz[j]; ao = azOffsetsPoly->eval(i+1, j+1) + residAz[j];
imgLine = int(i+ao) + SINC_HALF; imgLine = int(i+ao) + SINC_HALF;
lastImageRow = max(lastImageRow, imgLine); lastImageRow = max(lastImageRow, imgLine);
} }
} }
lastImageRow = min(lastImageRow, inLength-1); lastImageRow = min(lastImageRow, inLength-1);
@ -426,119 +380,166 @@ void ResampSlc::resamp() {
printf("Interpolating final partial tile\n"); printf("Interpolating final partial tile\n");
if (usr_enable_gpu) {
#ifdef GPU_ACC_ENABLED
vector<complex<float> > imgOutBlock(nRowsInTile*outWidth);
vector<double> residAzBlock(1,0.); // Dummy length to use for GPU for (int i=firstTileRow; i<(firstTileRow+nRowsInTile); i++) {
double *residAzPtr = 0;
if (residAzAccessor != 0) {
residAzBlock.resize(nRowsInTile*outWidth);
for (int i=0; i<nRowsInTile; i++) residAzAccObj->getLineSequential((char *)&residAzBlock[i*nRowsInTile]);
residAzPtr = &residAzBlock[0];
}
vector<double> residRgBlock(1,0.);
double *residRgPtr = 0;
if (residRgAccessor != 0) {
residRgBlock.resize(nRowsInTile*outWidth);
for (int i=0; i<nRowsInTile; i++) residRgAccObj->getLineSequential((char *)&residRgBlock[i*nRowsInTile]);
residRgPtr = &residRgBlock[0];
}
vector<double> azOffPolyArr(((azOffsetsPoly->azimuthOrder+1)*(azOffsetsPoly->rangeOrder+1))+6); if (residAzAccessor != 0) residAzAccObj->getLineSequential((char *)&residAz[0]);
vector<double> rgOffPolyArr(((rgOffsetsPoly->azimuthOrder+1)*(rgOffsetsPoly->rangeOrder+1))+6); if (residRgAccessor != 0) residRgAccObj->getLineSequential((char *)&residRg[0]);
vector<double> dopPolyArr(((dopplerPoly->azimuthOrder+1)*(dopplerPoly->rangeOrder+1))+6);
vector<double> azCarPolyArr(((azCarrier->azimuthOrder+1)*(azCarrier->rangeOrder+1))+6);
vector<double> rgCarPolyArr(((rgCarrier->azimuthOrder+1)*(rgCarrier->rangeOrder+1))+6);
copyPolyToArr(azOffsetsPoly, azOffPolyArr); // arrs are: [azord, rgord, azmean, rgmean, aznorm, rgnorm, coeffs...] #pragma omp parallel for private(ro,ao,fracr,fraca,ph,cval,dop,chipi,chipj,k,kk) \
copyPolyToArr(rgOffsetsPoly, rgOffPolyArr); firstprivate(chip)
copyPolyToArr(dopplerPoly, dopPolyArr); for (int j=0; j<outWidth; j++) {
copyPolyToArr(azCarrier, azCarPolyArr);
copyPolyToArr(rgCarrier, rgCarPolyArr);
double gpu_inputs_d[6]; ro = rgOffsetsPoly->eval(i+1,j+1) + residRg[j];
int gpu_inputs_i[8]; ao = azOffsetsPoly->eval(i+1,j+1) + residAz[j];
gpu_inputs_d[0] = wvl; fraca = modf(i+ao, &k);
gpu_inputs_d[1] = refwvl; if ((k < SINC_HALF) || (k >= (inLength-SINC_HALF))) continue;
gpu_inputs_d[2] = r0;
gpu_inputs_d[3] = refr0;
gpu_inputs_d[4] = slr;
gpu_inputs_d[5] = refslr;
gpu_inputs_i[0] = inLength; fracr = modf(j+ro, &kk);
gpu_inputs_i[1] = inWidth; if ((kk < SINC_HALF) || (kk >= (inWidth-SINC_HALF))) continue;
gpu_inputs_i[2] = outWidth;
gpu_inputs_i[3] = firstImageRow;
gpu_inputs_i[4] = firstTileRow;
gpu_inputs_i[5] = nRowsInBlock;
gpu_inputs_i[6] = nRowsInTile;
gpu_inputs_i[7] = int(flatten);
// 1000 lines per tile is peanuts for this algorithm to run (for test set is only 20M pixels) dop = dopplerPoly->eval(i+1,j+1);
runGPUResamp(gpu_inputs_d, gpu_inputs_i, (void*)&imgIn[0], (void*)&imgOutBlock[0], residAzPtr, residRgPtr, // Data chip without the carriers
&azOffPolyArr[0], &rgOffPolyArr[0], &dopPolyArr[0], &azCarPolyArr[0], &rgCarPolyArr[0], for (int ii=0; ii<SINC_ONE; ii++) {
&(rMethods.fintp[0])); // Subtracting off firstImageRow removes the offset from the first row in the reference
// image to the first row actually contained in imgIn
for (int i=0; i<nRowsInTile; i++) slcOutAccObj->setLineSequential((char *)&imgOutBlock[i*outWidth]); chipi = k - firstImageRow + ii - SINC_HALF;
#endif cval = complex<float>(cos((ii-4.)*dop), -sin((ii-4.)*dop));
} else { for (int jj=0; jj<SINC_ONE; jj++) {
chipj = kk + jj - SINC_HALF;
for (int i=firstTileRow; i<(firstTileRow+nRowsInTile); i++) { // Take out doppler in azimuth
chip[ii][jj] = imgIn[IDX1D(chipi,chipj,inWidth)] * cval;
if (residAzAccessor != 0) residAzAccObj->getLineSequential((char *)&residAz[0]);
if (residRgAccessor != 0) residRgAccObj->getLineSequential((char *)&residRg[0]);
#pragma omp parallel for private(ro,ao,fracr,fraca,ph,cval,dop,chipi,chipj,k,kk) \
firstprivate(chip)
for (int j=0; j<outWidth; j++) {
ro = rgOffsetsPoly->eval(i+1,j+1) + residRg[j];
ao = azOffsetsPoly->eval(i+1,j+1) + residAz[j];
fraca = modf(i+ao, &k);
if ((k < SINC_HALF) || (k >= (inLength-SINC_HALF))) continue;
fracr = modf(j+ro, &kk);
if ((kk < SINC_HALF) || (kk >= (inWidth-SINC_HALF))) continue;
dop = dopplerPoly->eval(i+1,j+1);
// Data chip without the carriers
for (int ii=0; ii<SINC_ONE; ii++) {
// Subtracting off firstImageRow removes the offset from the first row in the reference
// image to the first row actually contained in imgIn
chipi = k - firstImageRow + ii - SINC_HALF;
cval = complex<float>(cos((ii-4.)*dop), -sin((ii-4.)*dop));
for (int jj=0; jj<SINC_ONE; jj++) {
chipj = kk + jj - SINC_HALF;
// Take out doppler in azimuth
chip[ii][jj] = imgIn[IDX1D(chipi,chipj,inWidth)] * cval;
}
} }
// Doppler to be added back. Simultaneously evaluate carrier that needs to be added back after interpolation
ph = (dop * fraca) + rgCarrier->eval(i+ao,j+ro) + azCarrier->eval(i+ao,j+ro);
// Flatten the carrier if the user wants to
if (flatten) {
ph = ph + ((4. * (M_PI / wvl)) * ((r0 - refr0) + (j * (slr - refslr)) + (ro * slr))) +
((4. * M_PI * (refr0 + (j * refslr))) * ((1. / refwvl) - (1. / wvl)));
}
ph = modulo_f(ph, 2.*M_PI);
cval = rMethods.interpolate_cx(chip,(SINC_HALF+1),(SINC_HALF+1),fraca,fracr,SINC_ONE,SINC_ONE,SINC_METHOD);
imgOut[j] = cval * complex<float>(cos(ph), sin(ph));
} }
slcOutAccObj->setLineSequential((char *)&imgOut[0]);
// Doppler to be added back. Simultaneously evaluate carrier that needs to be added back after interpolation
ph = (dop * fraca) + rgCarrier->eval(i+ao,j+ro) + azCarrier->eval(i+ao,j+ro);
// Flatten the carrier if the user wants to
if (flatten) {
ph = ph + ((4. * (M_PI / wvl)) * ((r0 - refr0) + (j * (slr - refslr)) + (ro * slr))) +
((4. * M_PI * (refr0 + (j * refslr))) * ((1. / refwvl) - (1. / wvl)));
}
ph = modulo_f(ph, 2.*M_PI);
cval = rMethods.interpolate_cx(chip,(SINC_HALF+1),(SINC_HALF+1),fraca,fracr,SINC_ONE,SINC_ONE,SINC_METHOD);
imgOut[j] = cval * complex<float>(cos(ph), sin(ph));
} }
slcOutAccObj->setLineSequential((char *)&imgOut[0]);
} }
} }
printf("Elapsed time: %f\n", (omp_get_wtime()-t0)); printf("Elapsed time: %f\n", (omp_get_wtime()-t0));
} }
void ResampSlc::_resamp_gpu()
{
vector<complex<float> > imgIn(inLength*inWidth);
vector<complex<float> > imgOut(outLength*outWidth);
vector<float> residAz(outLength*outWidth), residRg(outLength*outWidth);
ResampMethods rMethods;
DataAccessor *slcInAccObj = (DataAccessor*)slcInAccessor;
DataAccessor *slcOutAccObj = (DataAccessor*)slcOutAccessor;
DataAccessor *residRgAccObj, *residAzAccObj;
if (residRgAccessor != 0) residRgAccObj = (DataAccessor*)residRgAccessor;
else residRgAccObj = NULL;
if (residAzAccessor != 0) residAzAccObj = (DataAccessor*)residAzAccessor;
else residAzAccObj = NULL;
// Moving this here so we don't waste any time
if (!isComplex) {
printf("Real data interpolation not implemented yet.\n");
return;
}
double t0 = omp_get_wtime();
printf("\n << Resample one image to another image coordinates >> \n\n");
printf("Input Image Dimensions: %6d lines, %6d pixels\n\n", inLength, inWidth);
printf("Output Image Dimensions: %6d lines, %6d pixels\n\n", outLength, outWidth);
printf("Complex data interpolation\n");
rMethods.prepareMethods(SINC_METHOD);
printf("Azimuth Carrier Poly\n");
azCarrier->printPoly();
printf("Range Carrier Poly\n");
rgCarrier->printPoly();
printf("Range Offsets Poly\n");
rgOffsetsPoly->printPoly();
printf("Azimuth Offsets Poly\n");
azOffsetsPoly->printPoly();
printf("Doppler Poly\n");
dopplerPoly->printPoly();
printf("Reading in image data ... \n");
// read the whole input SLC image
for (int i=0; i<inLength; i++) {
slcInAccObj->getLineSequential((char *)&imgIn[i*inWidth]);
}
// read the residAz if providied
if (residAzAccessor != 0) {
for (int i=0; i<outLength; i++)
residAzAccObj->getLineSequential((char *)&residAz[i*outWidth]);
}
if (residRgAccessor != 0) {
for (int i=0; i<outLength; i++)
residRgAccObj->getLineSequential((char *)&residRg[i*outWidth]);
}
// set up and copy the Poly objects
vector<double> azOffPolyArr(((azOffsetsPoly->azimuthOrder+1)*(azOffsetsPoly->rangeOrder+1))+6);
vector<double> rgOffPolyArr(((rgOffsetsPoly->azimuthOrder+1)*(rgOffsetsPoly->rangeOrder+1))+6);
vector<double> dopPolyArr(((dopplerPoly->azimuthOrder+1)*(dopplerPoly->rangeOrder+1))+6);
vector<double> azCarPolyArr(((azCarrier->azimuthOrder+1)*(azCarrier->rangeOrder+1))+6);
vector<double> rgCarPolyArr(((rgCarrier->azimuthOrder+1)*(rgCarrier->rangeOrder+1))+6);
copyPolyToArr(azOffsetsPoly, azOffPolyArr); // arrs are: [azord, rgord, azmean, rgmean, aznorm, rgnorm, coeffs...]
copyPolyToArr(rgOffsetsPoly, rgOffPolyArr);
copyPolyToArr(dopplerPoly, dopPolyArr);
copyPolyToArr(azCarrier, azCarPolyArr);
copyPolyToArr(rgCarrier, rgCarPolyArr);
double gpu_inputs_d[6];
int gpu_inputs_i[8];
gpu_inputs_d[0] = wvl;
gpu_inputs_d[1] = refwvl;
gpu_inputs_d[2] = r0;
gpu_inputs_d[3] = refr0;
gpu_inputs_d[4] = slr;
gpu_inputs_d[5] = refslr;
gpu_inputs_i[0] = inLength;
gpu_inputs_i[1] = inWidth;
gpu_inputs_i[2] = outWidth;
int firstImageRow = 0;
int firstTileRow = 0;
int nRowsInBlock = outLength;
gpu_inputs_i[3] = firstImageRow;
gpu_inputs_i[4] = firstTileRow;
gpu_inputs_i[5] = nRowsInBlock;
gpu_inputs_i[6] = outLength; //LINES_PER_TILE;
gpu_inputs_i[7] = int(flatten);
// call gpu routine
runGPUResamp(gpu_inputs_d, gpu_inputs_i, (void*)&imgIn[0], (void*)&imgOut[0], &residAz[0], &residRg[0],
&azOffPolyArr[0], &rgOffPolyArr[0], &dopPolyArr[0], &azCarPolyArr[0], &rgCarPolyArr[0],
&(rMethods.fintp[0]));
// write the output file
for (int i=0; i<outLength; i++) slcOutAccObj->setLineSequential((char *)&imgOut[i*outWidth]);
printf("Elapsed time: %f\n", (omp_get_wtime()-t0));
// all done
}