GPUresamp module updates (#325)

* GPUresamp module updates

  * modify the gpu resamp code to produce results consistent with cpu
  * add the gpuresamp module to runFineResamp.py in TopsProc, which can be loaded topsApp.py
  * also fine-tune the sinc interpolation in cpu code: the sinc coef center value and normalization
LT1AB
Lijun Zhu 2021-09-23 14:04:19 -07:00 committed by GitHub
parent 5e3f28311b
commit 0dcbbf67d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 633 additions and 502 deletions

View File

@ -540,6 +540,7 @@ class TopsProc(Component):
try:
from zerodop.GPUtopozero.GPUtopozero import PyTopozero
from zerodop.GPUgeo2rdr.GPUgeo2rdr import PyGeo2rdr
from zerodop.GPUresampslc.GPUresampslc import PyResampSlc
flag = True
except:
pass

View File

@ -2,6 +2,7 @@
# Author: Piyush Agram
# Copyright 2016
#
#
import isce
import isceobj
@ -12,8 +13,11 @@ from isceobj.Util.Poly2D import Poly2D
import os
import copy
from isceobj.Sensor.TOPS import createTOPSSwathSLCProduct
import logging
def resampSecondary(mas, slv, rdict, outname ):
logger = logging.getLogger('isce.topsinsar.fineresamp')
def resampSecondaryCPU(reference, secondary, rdict, outname):
'''
Resample burst by burst.
'''
@ -32,13 +36,12 @@ def resampSecondary(mas, slv, rdict, outname ):
aziImg.setAccessMode('READ')
inimg = isceobj.createSlcImage()
inimg.load(slv.image.filename + '.xml')
inimg.load(secondary.image.filename + '.xml')
inimg.setAccessMode('READ')
rObj = stdproc.createResamp_slc()
rObj.slantRangePixelSpacing = slv.rangePixelSize
rObj.radarWavelength = slv.radarWavelength
rObj.slantRangePixelSpacing = secondary.rangePixelSize
rObj.radarWavelength = secondary.radarWavelength
rObj.azimuthCarrierPoly = azcarrpoly
rObj.dopplerPoly = dpoly
@ -47,14 +50,14 @@ def resampSecondary(mas, slv, rdict, outname ):
rObj.imageIn = inimg
####Setting reference values
rObj.startingRange = slv.startingRange
rObj.referenceSlantRangePixelSpacing = mas.rangePixelSize
rObj.referenceStartingRange = mas.startingRange
rObj.referenceWavelength = mas.radarWavelength
rObj.startingRange = secondary.startingRange
rObj.referenceSlantRangePixelSpacing = reference.rangePixelSize
rObj.referenceStartingRange = reference.startingRange
rObj.referenceWavelength = reference.radarWavelength
width = mas.numberOfSamples
length = mas.numberOfLines
width = reference.numberOfSamples
length = reference.numberOfLines
imgOut = isceobj.createSlcImage()
imgOut.setWidth(width)
imgOut.filename = outname
@ -70,8 +73,119 @@ def resampSecondary(mas, slv, rdict, outname ):
imgOut.renderHdr()
return imgOut
def convertPoly2D(poly):
'''
Convert a isceobj.Util.Poly2D {poly} into zerodop.GPUresampslc.GPUresampslc.PyPloy2d
'''
from zerodop.GPUresampslc.GPUresampslc import PyPoly2d
import itertools
def getRelativeShifts(mFrame, sFrame, minBurst, maxBurst, secondaryBurstStart):
# get parameters from poly
azimuthOrder = poly.getAzimuthOrder()
rangeOrder = poly.getRangeOrder()
azimuthMean = poly.getMeanAzimuth()
rangeMean = poly.getMeanRange()
azimuthNorm = poly.getNormAzimuth()
rangeNorm = poly.getNormRange()
# create the PyPoly2d object
pPoly = PyPoly2d(azimuthOrder, rangeOrder, azimuthMean, rangeMean, azimuthNorm, rangeNorm)
# copy the coeffs, need to flatten into 1d list
pPoly.coeffs = list(itertools.chain.from_iterable(poly.getCoeffs()))
# all done
return pPoly
def resampSecondaryGPU(reference, secondary, rdict, outname):
'''
Resample burst by burst with GPU
'''
# import the GPU module
import zerodop.GPUresampslc
# get Poly2D objects from rdict and convert them into PyPoly2d objects
azpoly = convertPoly2D(rdict['azpoly'])
rgpoly = convertPoly2D(rdict['rgpoly'])
azcarrpoly = convertPoly2D(rdict['carrPoly'])
dpoly = convertPoly2D(rdict['doppPoly'])
rngImg = isceobj.createImage()
rngImg.load(rdict['rangeOff'] + '.xml')
rngImg.setCaster('read', 'FLOAT')
rngImg.createImage()
aziImg = isceobj.createImage()
aziImg.load(rdict['azimuthOff'] + '.xml')
aziImg.setCaster('read', 'FLOAT')
aziImg.createImage()
inimg = isceobj.createSlcImage()
inimg.load(secondary.image.filename + '.xml')
inimg.setAccessMode('READ')
inimg.createImage()
# create a GPU resample processor
rObj = zerodop.GPUresampslc.createResampSlc()
# set parameters
rObj.slr = secondary.rangePixelSize
rObj.wvl = secondary.radarWavelength
# set polynomials
rObj.azCarrier = azcarrpoly
rObj.dopplerPoly = dpoly
rObj.azOffsetsPoly = azpoly
rObj.rgOffsetsPoly = rgpoly
# need to create an empty rgCarrier poly
rgCarrier = Poly2D()
rgCarrier.initPoly(rangeOrder=0, azimuthOrder=0, coeffs=[[0.]])
rgCarrier = convertPoly2D(rgCarrier)
rObj.rgCarrier = rgCarrier
# input secondary image
rObj.slcInAccessor = inimg.getImagePointer()
rObj.inWidth = inimg.getWidth()
rObj.inLength = inimg.getLength()
####Setting reference values
rObj.r0 = secondary.startingRange
rObj.refr0 = reference.rangePixelSize
rObj.refslr = reference.startingRange
rObj.refwvl = reference.radarWavelength
# set output image
width = reference.numberOfSamples
length = reference.numberOfLines
imgOut = isceobj.createSlcImage()
imgOut.setWidth(width)
imgOut.filename = outname
imgOut.setAccessMode('write')
imgOut.createImage()
rObj.slcOutAccessor = imgOut.getImagePointer()
rObj.outWidth = width
rObj.outLength = length
rObj.residRgAccessor = rngImg.getImagePointer()
rObj.residAzAccessor = aziImg.getImagePointer()
# need to specify data type, only complex is currently supported
rObj.isComplex = (inimg.dataType == 'CFLOAT')
# run resampling
rObj.resamp_slc()
# finalize images
inimg.finalizeImage()
imgOut.finalizeImage()
rngImg.finalizeImage()
aziImg.finalizeImage()
imgOut.renderHdr()
return imgOut
def getRelativeShifts(referenceFrame, secondaryFrame, minBurst, maxBurst, secondaryBurstStart):
'''
Estimate the relative shifts between the start of the bursts.
'''
@ -79,15 +193,15 @@ def getRelativeShifts(mFrame, sFrame, minBurst, maxBurst, secondaryBurstStart):
azReferenceOff = {}
azSecondaryOff = {}
azRelOff = {}
tm = mFrame.bursts[minBurst].sensingStart
dt = mFrame.bursts[minBurst].azimuthTimeInterval
ts = sFrame.bursts[secondaryBurstStart].sensingStart
tm = referenceFrame.bursts[minBurst].sensingStart
dt = referenceFrame.bursts[minBurst].azimuthTimeInterval
ts = secondaryFrame.bursts[secondaryBurstStart].sensingStart
for index in range(minBurst, maxBurst):
burst = mFrame.bursts[index]
burst = referenceFrame.bursts[index]
azReferenceOff[index] = int(np.round((burst.sensingStart - tm).total_seconds() / dt))
burst = sFrame.bursts[secondaryBurstStart + index - minBurst]
burst = secondaryFrame.bursts[secondaryBurstStart + index - minBurst]
azSecondaryOff[secondaryBurstStart + index - minBurst] = int(np.round((burst.sensingStart - ts).total_seconds() / dt))
azRelOff[secondaryBurstStart + index - minBurst] = azSecondaryOff[secondaryBurstStart + index - minBurst] - azReferenceOff[index]
@ -189,9 +303,18 @@ def getValidLines(secondary, rdict, inname, misreg_az=0.0, misreg_rng=0.0):
def runFineResamp(self):
'''
Create coregistered overlap secondarys.
Create coregistered overlap secondary image.
'''
# decide whether to use CPU or GPU
hasGPU = self.useGPU and self._insar.hasGPU()
if hasGPU:
resampSecondary = resampSecondaryGPU
print('Using GPU for fineresamp')
else:
resampSecondary = resampSecondaryCPU
swathList = self._insar.getValidSwathList(self.swaths)
@ -209,7 +332,6 @@ def runFineResamp(self):
outdir = os.path.join(self._insar.fineCoregDirname, 'IW{0}'.format(swath))
os.makedirs(outdir, exist_ok=True)
###Directory with offsets
offdir = os.path.join(self._insar.fineOffsetsDirname, 'IW{0}'.format(swath))
@ -242,8 +364,8 @@ def runFineResamp(self):
for ii in range(minBurst, maxBurst):
jj = secondaryBurstStart + ii - minBurst
masBurst = reference.bursts[ii]
slvBurst = secondary.bursts[jj]
referenceBurst = reference.bursts[ii]
secondaryBurst = secondary.bursts[jj]
try:
offset = relShifts[jj]
@ -262,19 +384,19 @@ def runFineResamp(self):
###For future - should account for azimuth and range misreg here .. ignoring for now.
azCarrPoly, dpoly = secondary.estimateAzimuthCarrierPolynomials(slvBurst, offset = -1.0 * offset)
azCarrPoly, dpoly = secondary.estimateAzimuthCarrierPolynomials(secondaryBurst, offset = -1.0 * offset)
rdict['carrPoly'] = azCarrPoly
rdict['doppPoly'] = dpoly
outimg = resampSecondary(masBurst, slvBurst, rdict, outname)
minAz, maxAz, minRg, maxRg = getValidLines(slvBurst, rdict, outname,
outimg = resampSecondary(referenceBurst, secondaryBurst, rdict, outname)
minAz, maxAz, minRg, maxRg = getValidLines(secondaryBurst, rdict, outname,
misreg_az = misreg_az - offset, misreg_rng = misreg_rg)
# copyBurst = copy.deepcopy(masBurst)
copyBurst = masBurst.clone()
adjustValidSampleLine(copyBurst, slvBurst,
# copyBurst = copy.deepcopy(referenceBurst)
copyBurst = referenceBurst.clone()
adjustValidSampleLine(copyBurst, secondaryBurst,
minAz=minAz, maxAz=maxAz,
minRng=minRg, maxRng=maxRg)
copyBurst.image.filename = outimg.filename
@ -285,4 +407,3 @@ def runFineResamp(self):
coreg.numberOfBursts = len(coreg.bursts)
self._insar.saveProduct(coreg, os.path.join(self._insar.fineCoregDirname, 'IW{0}.xml'.format(swath)))

View File

@ -316,7 +316,8 @@ contains
!!$c**
!!$c** Date Changed Reason Changed CR # and Version #
!!$c** ------------ ---------------- -----------------
!!$c**
!!$c** 06/18/21 adjust r_soff to make sure coef at the center is 1.
!!$c** note that the routine doesn't work well for odd sequences
!!$c*****************************************************************
use fortranUtils
@ -360,19 +361,18 @@ contains
i_intplength = nint(r_relfiltlen/r_beta)
i_filtercoef = i_intplength*i_decfactor
r_wgthgt = (1.d0 - r_pedestal)/2.d0
r_soff = ((i_filtercoef - 1.d0)/2.d0)
r_soff = i_filtercoef/2.d0
do i=0,i_filtercoef-1
r_wa = i - r_soff
r_wgt = (1.d0 - r_wgthgt) + r_wgthgt*cos((pi*r_wa)/r_soff)
j = floor(i - r_soff)
r_s = j*r_beta/(1.0d0 * i_decfactor)
r_s = r_wa*r_beta/(1.0d0 * i_decfactor)
if(r_s .ne. 0.0)then
r_fct = sin(pi*r_s)/(pi*r_s)
else
r_fct = 1.0d0
endif
if(i_weight .eq. 1)then
r_wgt = (1.d0 - r_wgthgt) + r_wgthgt*cos((pi*r_wa)/r_soff)
r_filter(i+1) = r_fct*r_wgt
else
r_filter(i+1) = r_fct
@ -456,7 +456,7 @@ contains
complex function sinc_eval_2d_cx(arrin,intarr,idec,ilen,intpx,intpy,frpx,frpy,xlen,ylen)
integer ilen,idec,intpx,intpy,xlen,ylen,k,m,ifracx,ifracy
real*8 frpx,frpy
real*8 frpx,frpy, fweight, fweightsum
complex arrin(0:xlen-1,0:ylen-1)
real*4 intarr(0:idec*ilen-1)
@ -468,12 +468,17 @@ contains
ifracx= min(max(0,int(frpx*idec)),idec-1)
ifracy= min(max(0,int(frpy*idec)),idec-1)
! to normalize the sinc interpolator
fweightsum = 0.
do k=0,ilen-1
do m=0,ilen-1
sinc_eval_2d_cx = sinc_eval_2d_cx + arrin(intpx-k,intpy-m)*&
intarr(k + ifracx*ilen)*intarr(m + ifracy*ilen)
fweight = intarr(k + ifracx*ilen)*intarr(m + ifracy*ilen)
sinc_eval_2d_cx = sinc_eval_2d_cx + arrin(intpx-k,intpy-m) * fweight
fweightsum = fweightsum + fweight
enddo
enddo
sinc_eval_2d_cx = sinc_eval_2d_cx/fweightsum
end if
end function sinc_eval_2d_cx

View File

@ -118,6 +118,8 @@
print *, 'Azimuth offsets poly'
call printpoly2d_f(azOffsetsPoly)
print *, 'Doppler poly'
call printpoly2d_f(dopplerPoly)
print *, 'Reading in the image'
!c read in the reference image
@ -199,14 +201,14 @@
r_ao = evalPoly2d_f(azOffsetsPoly,r_at,r_rt) + residaz(i)
k=int(i+r_ro) !range offset
k=floor(i+r_ro) !range offset
fracr=i+r_ro-k
if ((k .le. sinchalf) .or. (k.ge.(inwidth-sinchalf))) then
cycle
endif
kk=int(j+r_ao) !azimuth offset
kk=floor(j+r_ao) !azimuth offset
fraca=j+r_ao-kk
if ((kk .le. sinchalf) .or. (kk.ge.(inlength-sinchalf))) then

View File

@ -170,8 +170,8 @@ cdef class PyResampSlc:
def __cinit__(self):
self.c_resamp = new ResampSlc()
def __dealloc__(self):
del self.c_resamp
#def __dealloc__(self):
# del self.c_resamp
@property
def slcInAccessor(self):

View File

@ -15,14 +15,16 @@
#define SINC_HALF (SINC_LEN/2)
#define SINC_ONE (SINC_LEN+1)
#define IDX1D(i,j,w) (((i)*(w))+(j))
#define modulo_f(a,b) fmod(fmod(a,b)+(b),(b))
struct InputData {
cuFloatComplex *imgIn;
cuFloatComplex *imgOut;
double *residAz;
double *residRg;
float *residAz;
float *residRg;
double *azOffPoly;
double *rgOffPoly;
double *dopPoly;
@ -38,7 +40,7 @@ __constant__ int ini[8];
// GPU Helper Functions
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
// Data usage: 8 doubles/pointers, 2 ints -- 72 bytes/call
// Data usage: 8 floats/pointers, 2 ints -- 72 bytes/call
__device__ double evalPolyAt(double *polyArr, double azi, double rng) {
// C-style eval method of Poly2d (adjusted to work with the array-format Poly2d where:
// polyArr[0] = azimuthOrder
@ -65,123 +67,120 @@ __device__ double evalPolyAt(double *polyArr, double azi, double rng) {
return val;
}
// Data usage: 4 doubles, 1 int -- 36 bytes/call
__device__ cuFloatComplex fintp(int idx) {
// Replaces storing ~4MB and copying in/out of constant storage by calculating an element of fintp on the fly
__global__ void removeCarrier(struct InputData inData) {
// remove the carriers from input slc
// thread id, as the pixel index for the input image
int pix = blockDim.x * blockIdx.x + threadIdx.x;
// check the thread range
// ini[0] - inLength
// ini[1] - inWidth
if(pix >= ini[0]*ini[1])
return;
double weight, filtFact, sinFact, i;
i = int(int(idx / SINC_LEN) + ((idx % SINC_LEN) * (SINC_SUB / 2)) - 1) - 16383.5;
weight = .5 + (.5 * cos((M_PI * i) / 16383.5));
sinFact = i * (.75 / (SINC_SUB / 2));
filtFact = (sin(M_PI * sinFact) / (M_PI * sinFact)) * weight; // No need to check if sinFact is 0 since SINC_SUB != 0 and i-16383.5 != 0 as i is an int
return make_cuFloatComplex(filtFact*weight, 0.);
}
// get pixel location along azimuth/range
int idxi = pix/ini[1];
int idxj = pix%ini[1];
// Data usage: 6 double/complex/pointers, 4 ints -- 64 bytes/call
// Add call to fintp (36 bytes) -- 100 bytes/call
__device__ cuFloatComplex sinc_interp(cuFloatComplex *chip, double fraca, double fracr, double dop, float *fintp) {
// This is a modified/hardwired form of sinc_eval_2d from Interpolator. We eliminate a couple of checks that we know will pass, and
// adjusted the algorithm to account for modifications to the main kernel below. Primarily three things are of interest:
// 1. Chip is actually a pointer to the top-left of the chip location in the main image block, so chip's 'width' actually is
// ini[1], not SINC_ONE.
// 2. We account for removing doppler effects using cval and taking in dop. In the older version of the main kernel, this
// value was calculated and multiplied as the data was copied into the smaller chip. Here we calculate it on the fly using
// the same index for 'ii' as the row index of the chip being operated on (so in this case since it happens from the bottom
// up, we use SINC_LEN-i instead of i).
cuFloatComplex ret = make_cuFloatComplex(0.,0.);
cuFloatComplex cval;
int ifracx, ifracy, i, j;
ifracx = min(max(0,int(fraca*SINC_SUB)), SINC_SUB-1);
ifracy = min(max(0,int(fracr*SINC_SUB)), SINC_SUB-1);
for (i=0; i<SINC_LEN; i++) {
cval = make_cuFloatComplex(cos((SINC_LEN-i-4.)*dop), -sin((SINC_LEN-i-4.)*dop));
for (j=0; j<SINC_LEN; j++) {
ret = cuCaddf(ret,
cuCmulf(
cuCmulf(chip[IDX1D(SINC_LEN-i,SINC_LEN-j,ini[1])], cval),
make_cuFloatComplex(fintp[IDX1D(ifracx,i,SINC_LEN)]*fintp[IDX1D(ifracy,j,SINC_LEN)], 0.)
)
);
}
}
return ret;
// the poly uses fortran 1-indexing
double r_i = idxi +1;
double r_j = idxj +1;
// get the phase shift due to carriers
double ph = evalPolyAt(inData.rgCarrierPoly, r_i, r_j) +
evalPolyAt(inData.azCarrierPoly, r_i, r_j);
ph = modulo_f(ph, 2.*M_PI);
// remove the phase shift from the data
cuFloatComplex cval = cuCmulf(inData.imgIn[pix], make_cuFloatComplex(cosf(ph), -sinf(ph)));
// assign the new value
inData.imgIn[pix] = cval;
// all done
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
// GPU Main Kernel
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
// Data Usage: 15 pointers/doubles, 5 ints, 1 bool -- 144 bytes/call (assuming 1 bool ==> 1 int)
// Add call to sinc_interp (100 bytes/call) -- 244 bytes/call (for funsies let's assume ~250 bytes/call)
// NOTE: We ignore calls to evalPolyAt since they have less
// Data Usage: 15 pointers/floats, 5 ints, 1 bool -- 144 bytes/call (assuming 1 bool ==> 1 int)
// Add call to sinfc_interp (100 bytes/call) -- 244 bytes/call (for funsies let's assume ~250 bytes/call)
// NOTE: We ignore calls to evalPolyAt sinfce they have less
// data usage and therefore do not really matter for
// max data usage
__global__ void GPUResamp(struct InputData inData) {
// Main GPU ResampSlc kernel, slightly modified from original algorithm to save significant space
int pix = (blockDim.x * blockIdx.x) + threadIdx.x;
int pix = blockDim.x * blockIdx.x + threadIdx.x;
if (pix < (ini[2] * ini[6])) {
//cuFloatComplex chip[SINC_ONE*SINC_ONE];
cuFloatComplex cval;
double ao, ro, fracr, fraca, ph, dop;
int k, kk, idxi, idxj; //chipi, chipj, ii, jj;
bool flag;
// check within outWidth*LINES_PER_TILE
if (pix >= (ini[2] * ini[6]))
return;
flag = false;
idxi = (pix / ini[2]) + ini[4];
idxj = (pix % ini[2]);
// index along row/azimuth
int idxi = (pix / ini[2]) + ini[4];
// index along width/range
int idxj = (pix % ini[2]);
ao = evalPolyAt(inData.azOffPoly, idxi+1, idxj+1);
ro = evalPolyAt(inData.rgOffPoly, idxi+1, idxj+1);
// offset
// note that the polys use 1-indexing in Fortran code
double ao = evalPolyAt(inData.azOffPoly, idxi+1, idxj+1) + inData.residAz[pix];
double ro = evalPolyAt(inData.rgOffPoly, idxi+1, idxj+1) + inData.residRg[pix];
k = idxi + ao;
fraca = idxi + ao - k;
if ((k < SINC_HALF) || (k >= (ini[0]-SINC_HALF))) flag = true;
// azimuth coordinate
int ka = floor(idxi + ao);
double fraca = idxi + ao - ka;
// range coordinate
int kr = floor(idxj + ro);
double fracr = idxj + ro - kr;
// check whether the pixel is out of the interpolation region
if ((ka < SINC_HALF) || ( ka >= (ini[0]-SINC_HALF))
|| (kr < SINC_HALF) || (kr >= (ini[1]-SINC_HALF)))
{
// out of range
inData.imgOut[pix] = make_cuFloatComplex(0., 0.);
return;
}
if (!flag) {
kk = idxj + ro;
fracr = idxj + ro - kk;
if ((kk < SINC_HALF) || (kk >= (ini[1]-SINC_HALF))) flag = true;
// in range, continue
if (!flag) {
dop = evalPolyAt(inData.dopPoly, idxi+1, idxj+1);
// evaluate the doppler phase at the secondary coordinate
double dop = evalPolyAt(inData.dopPoly, idxi+1+ao, idxj+1+ro);
/*
for (ii=0; ii<SINC_ONE; ii++) {
chipi = k - ini[3] + ii - SINC_HALF;
cval = make_cuFloatComplex(cos((ii-4.)*dop), -sin((ii-4.)*dop));
for (jj=0; jj<SINC_ONE; jj++) {
chipj = kk + jj - SINC_HALF;
chip[IDX1D(ii,jj,SINC_ONE)] = cuCmulf(inData.imgIn[IDX1D(chipi,chipj,ini[1])], cval);
}
}
// phase corrections to be added later
double ph = (dop * fraca) + evalPolyAt(inData.rgCarrierPoly, idxi+1+ao, idxj+1+ro) +
evalPolyAt(inData.azCarrierPoly, idxi+1+ao, idxj+1+ro);
top = k - ini[3] - SINC_HALF;
left = kk - SINC_HALF;
topLeft = IDX1D(top,left,ini[1]);
*/
// if flatten
if (ini[7] == 1)
ph = ph + ((4.*(M_PI/ind[0]))*((ind[2]-ind[3])+(idxj*(ind[4]-ind[5]))+(ro*ind[4])))
+((4.*M_PI*(ind[3]+(idxj*ind[5])))*((1./ind[1])-(1./ind[0])));
ph = (dop * fraca) + evalPolyAt(inData.rgCarrierPoly, idxi+ao, idxj+ro) + evalPolyAt(inData.azCarrierPoly, idxi+ao, idxj+ro);
ph = modulo_f(ph, 2.*M_PI);
if (ini[7] == 1)
ph = ph + ((4.*(M_PI/ind[0]))*((ind[2]-ind[3])+(idxj*(ind[4]-ind[5]))+(ro*ind[4])))+((4.*M_PI*(ind[3]+(idxj*ind[5])))*((1./ind[1])-(1./ind[0])));
// temp variable to keep track of the interpolated value
cuFloatComplex cval = make_cuFloatComplex(0.,0.);
// get the indices in the sinfc_coef of the fractional parts
int ifraca = int(fraca*SINC_SUB);
int ifracr = int(fracr*SINC_SUB);
ph = modulo_f(ph, 2.*M_PI);
// NOTE: This has been modified to pass the pointer to the "top left" location of what used to be the 'chip' copy of data from imgIn. This
// saves allocating 81 extra floats per pixel (since chip is SINC_ONE*SINC_ONE), and instead uses logic to determine the offsets. The
// logic simply takes the minimum values of chipi and chipj above and adds the IDX1D index to the pointer. This means that sinc_interp
// will pass this pointer as "chip" and it will still point to the correct values (since chip is just a 2D window subset of imgIn). We
// Also have to account for 'cval' by modifying the sinc_interp function to calculate 'cval' dynamically (no extra cost computationally).
// This should actually speed up the kernel as well as significantly reduce redundant data usage.
cval = sinc_interp((inData.imgIn + IDX1D(k-ini[3]-SINC_HALF,kk-SINC_HALF,ini[1])), fraca, fracr, dop, inData.fintp);
inData.imgOut[pix] = cuCmulf(cval, make_cuFloatComplex(cos(ph), sin(ph)));
}
// weight for sinfc interp coefficients
float weightsum = 0.;
// iterate over the interpolation zone, e.g. [-3, 4] x [-3, 4] for SINC_LEN = 8
for (int i=-SINC_HALF+1; i<=SINC_HALF; i++) {
cuFloatComplex cdop = make_cuFloatComplex(cosf(i*dop), -sinf(i*dop));
for (int j=-SINC_HALF+1; j<=SINC_HALF; j++) {
float weight = inData.fintp[IDX1D(ifraca,SINC_HALF-i,SINC_LEN)]
*inData.fintp[IDX1D(ifracr,SINC_HALF-j,SINC_LEN)];
// correct the doppler phase here
cuFloatComplex cin = cuCmulf(inData.imgIn[IDX1D(i+ka,j+kr,ini[1])], cdop);
cval = cuCaddf(cval, make_cuFloatComplex(cuCrealf(cin)*weight, cuCimagf(cin)*weight));
weightsum += weight;
}
}
// normalize
cval = make_cuFloatComplex(cuCrealf(cval)/weightsum, cuCimagf(cval)/weightsum);
// phase correction
cval = cuCmulf(cval, make_cuFloatComplex(cosf(ph), sinf(ph)));
// assign and return
inData.imgOut[pix] = cval;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
@ -208,8 +207,10 @@ void checkKernelErrors() {
// Main CPU Function
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgOut, double *residAz, double *residRg, double *azOffPoly, double *rgOffPoly,
double *dopPoly, double *azCarrierPoly, double *rgCarrierPoly, float *fintp) {
void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgOut,
float *residAz, float *residRg, double *azOffPoly, double *rgOffPoly,
double *dopPoly, double *azCarrierPoly, double *rgCarrierPoly, float *fintp)
{
/* * * * * * * * * * * * * * * * * * * *
* Input mapping -
*
@ -237,7 +238,8 @@ void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgO
// Create handles for device copies of inputs
cuFloatComplex *d_imgIn, *d_imgOut;
double *d_residAz, *d_residRg, *d_azOffPoly, *d_rgOffPoly, *d_dopPoly, *d_azCarrierPoly, *d_rgCarrierPoly;
float *d_residAz, *d_residRg;
double *d_azOffPoly, *d_rgOffPoly, *d_dopPoly, *d_azCarrierPoly, *d_rgCarrierPoly;
float *d_fintp;
double startRun, endRun, startKernel, endKernel;
@ -267,8 +269,8 @@ void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgO
size_t nb_in = nInPix * sizeof(cuFloatComplex);
size_t nb_out = nOutPix * sizeof(cuFloatComplex);
size_t nb_rsdAz = nResidAzPix * sizeof(double);
size_t nb_rsdRg = nResidRgPix * sizeof(double);
size_t nb_rsdAz = nResidAzPix * sizeof(float);
size_t nb_rsdRg = nResidRgPix * sizeof(float);
size_t nb_azOff = nAzOffPix * sizeof(double);
size_t nb_rgOff = nRgOffPix * sizeof(double);
size_t nb_dop = nDopPix * sizeof(double);
@ -277,8 +279,8 @@ void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgO
cudaMalloc((cuFloatComplex**)&d_imgIn, nb_in);
cudaMalloc((cuFloatComplex**)&d_imgOut, nb_out);
if (residAz != 0) cudaMalloc((double**)&d_residAz, nb_rsdAz);
if (residRg != 0) cudaMalloc((double**)&d_residRg, nb_rsdRg);
if (residAz != 0) cudaMalloc((float**)&d_residAz, nb_rsdAz);
if (residRg != 0) cudaMalloc((float**)&d_residRg, nb_rsdRg);
cudaMalloc((double**)&d_azOffPoly, nb_azOff);
cudaMalloc((double**)&d_rgOffPoly, nb_rgOff);
cudaMalloc((double**)&d_dopPoly, nb_dop);
@ -310,9 +312,6 @@ void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgO
printf("Done. (%f s.)\n", (endKernel-startKernel));
dim3 block(32);
dim3 grid(int((nInPix + 31) / 32));
if ((grid.x * 32) > nInPix) printf(" (DEBUG: There will be %d 'empty' threads in the last thread block).\n", ((grid.x*32)-nInPix));
printf(" Running GPU ResampSlc... ");
fflush(stdout);
@ -332,7 +331,14 @@ void runGPUResamp(double *h_inpts_dbl, int *h_inpts_int, void *imgIn, void *imgO
inData.rgCarrierPoly = d_rgCarrierPoly;
inData.fintp = d_fintp;
GPUResamp <<<grid, block>>>(inData);
// remove carriers from the input image
int threads = 1024;
int blocks = (nInPix + threads-1) / threads;
removeCarrier<<<blocks, threads>>>(inData);
checkKernelErrors();
// resample
blocks = (nOutPix + threads -1) / threads;
GPUResamp <<<blocks, threads>>>(inData);
checkKernelErrors();
endKernel = cpuSecond();

View File

@ -6,6 +6,6 @@
#ifndef GPU_RESAMP_H
#define GPU_RESAMP_H
void runGPUResamp(double*,int*,void*,void*,double*,double*,double*,double*,double*,double*,double*,float*);
void runGPUResamp(double*,int*,void*,void*,float*,float*,double*,double*,double*,double*,double*,float*);
#endif

View File

@ -34,6 +34,9 @@ struct ResampSlc {
void clearPolys();
void resetPolys();
void resamp();
void _resamp_cpu();
void _resamp_gpu();
};
#endif

View File

@ -116,17 +116,18 @@ template float Interpolator::bicubic(double,double,vector<vector<float>>&);
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
void Interpolator::sinc_coef(double beta, double relfiltlen, int decfactor, double pedestal, int weight, int &intplength, int &filtercoef, vector<double> &filter) {
void Interpolator::sinc_coef(double beta, double relfiltlen, int decfactor, double pedestal, int weight,
int &intplength, int &filtercoef, vector<double> &filter) {
intplength = round(relfiltlen / beta);
intplength = rint(relfiltlen / beta);
filtercoef = intplength * decfactor;
double wgthgt = 1. - (pedestal / 2.);
double soff = (filtercoef - 1.) / 2.;
double wgthgt = (1. - pedestal) / 2.;
double soff = filtercoef / 2.;
double wgt, s, fct;
for (int i=0; i<filtercoef; i++) {
wgt = (1. - wgthgt) + (wgthgt * cos((M_PI * (i - soff)) / soff));
s = (floor(i - soff) * beta) / (1. * decfactor);
wgt = (1. - wgthgt) + wgthgt * cos(M_PI * (i - soff) / soff);
s = (i - soff) * beta / (1. * decfactor);
if (s != 0.) fct = sin(M_PI * s) / (M_PI * s);
else fct = 1.;
if (weight == 1) filter[i] = fct * wgt;

View File

@ -19,7 +19,7 @@ ResampMethods::ResampMethods() {
void ResampMethods::prepareMethods(int method) {
if (method == SINC_METHOD) {
vector<double> filter((SINC_SUB*SINC_LEN)+1);
vector<double> filter(SINC_SUB*SINC_LEN);
double ssum;
int intplength, filtercoef;
Interpolator interp;
@ -28,16 +28,7 @@ void ResampMethods::prepareMethods(int method) {
interp.sinc_coef(1.,SINC_LEN,SINC_SUB,0.,1,intplength,filtercoef,filter);
// Normalize filter
for (int i=0; i<SINC_SUB; i++) {
ssum = 0.;
for (int j=0; j<SINC_LEN; j++) {
ssum = ssum + filter[i+(j*SINC_SUB)];
}
for (int j=0; j<SINC_LEN; j++) {
filter[i+(j*SINC_SUB)] = filter[i+(j*SINC_SUB)] / ssum;
}
}
// note also the type conversion
fintp.resize(SINC_SUB*SINC_LEN);
for (int i=0; i<SINC_LEN; i++) {
for (int j=0; j<SINC_SUB; j++) {

View File

@ -144,7 +144,24 @@ void copyPolyToArr(Poly2d *poly, vector<double> &destArr) {
for (int i=0; i<((destArr[0]+1)*(destArr[1]+1)); i++) destArr[6+i] = poly->coeffs[i];
}
void ResampSlc::resamp() {
// wrapper for calling cpu or gpu methods
void ResampSlc::resamp()
{
#ifndef GPU_ACC_ENABLED
usr_enable_gpu = false;
#endif
if (usr_enable_gpu) {
_resamp_gpu();
}
else {
_resamp_cpu();
}
}
// not checked
void ResampSlc::_resamp_cpu() {
vector<double> residAz(outWidth,0.), residRg(outWidth,0.);
double ro, ao, ph, dop, fracr, fraca, t0, k, kk;
@ -167,9 +184,6 @@ void ResampSlc::resamp() {
if (residAzAccessor != 0) residAzAccObj = (DataAccessor*)residAzAccessor;
else residAzAccObj = NULL;
#ifndef GPU_ACC_ENABLED
usr_enable_gpu = false;
#endif
// Moving this here so we don't waste any time
if (!isComplex) {
@ -266,118 +280,58 @@ void ResampSlc::resamp() {
// Loop over lines
printf("Interpolating tile %d\n", tile);
if (usr_enable_gpu) {
#ifdef GPU_ACC_ENABLED
vector<complex<float> > imgOutBlock(LINES_PER_TILE*outWidth);
vector<double> residAzBlock(1,0.); // Dummy length to use for GPU
double *residAzPtr = 0;
if (residAzAccessor != 0) {
residAzBlock.resize(LINES_PER_TILE*outWidth);
for (int i=0; i<LINES_PER_TILE; i++) residAzAccObj->getLineSequential((char *)&residAzBlock[i*LINES_PER_TILE]);
residAzPtr = &residAzBlock[0];
}
vector<double> residRgBlock(1,0.);
double *residRgPtr = 0;
if (residRgAccessor != 0) {
residRgBlock.resize(LINES_PER_TILE*outWidth);
for (int i=0; i<LINES_PER_TILE; i++) residRgAccObj->getLineSequential((char *)&residRgBlock[i*LINES_PER_TILE]);
residRgPtr = &residRgBlock[0];
}
// Interpolation of the complex image. Note that we don't need to make very many changes to the original code in this loop
// since the i-index should numerically match the original i-index
for (int i=firstTileRow; i<(firstTileRow+LINES_PER_TILE); i++) {
// GetLineSequential is fine here, we don't need specific lines, just continue grabbing them
if (residAzAccessor != 0) residAzAccObj->getLineSequential((char *)&residAz[0]);
if (residRgAccessor != 0) residRgAccObj->getLineSequential((char *)&residRg[0]);
vector<double> azOffPolyArr(((azOffsetsPoly->azimuthOrder+1)*(azOffsetsPoly->rangeOrder+1))+6);
vector<double> rgOffPolyArr(((rgOffsetsPoly->azimuthOrder+1)*(rgOffsetsPoly->rangeOrder+1))+6);
vector<double> dopPolyArr(((dopplerPoly->azimuthOrder+1)*(dopplerPoly->rangeOrder+1))+6);
vector<double> azCarPolyArr(((azCarrier->azimuthOrder+1)*(azCarrier->rangeOrder+1))+6);
vector<double> rgCarPolyArr(((rgCarrier->azimuthOrder+1)*(rgCarrier->rangeOrder+1))+6);
#pragma omp parallel for private(ro,ao,fracr,fraca,ph,cval,dop,chipi,chipj,k,kk) \
firstprivate(chip)
for (int j=0; j<outWidth; j++) {
copyPolyToArr(azOffsetsPoly, azOffPolyArr); // arrs are: [azord, rgord, azmean, rgmean, aznorm, rgnorm, coeffs...]
copyPolyToArr(rgOffsetsPoly, rgOffPolyArr);
copyPolyToArr(dopplerPoly, dopPolyArr);
copyPolyToArr(azCarrier, azCarPolyArr);
copyPolyToArr(rgCarrier, rgCarPolyArr);
ao = azOffsetsPoly->eval(i+1,j+1) + residAz[j];
ro = rgOffsetsPoly->eval(i+1,j+1) + residRg[j];
double gpu_inputs_d[6];
int gpu_inputs_i[8];
fraca = modf(i+ao, &k);
if ((k < SINC_HALF) || (k >= (inLength-SINC_HALF))) continue;
gpu_inputs_d[0] = wvl;
gpu_inputs_d[1] = refwvl;
gpu_inputs_d[2] = r0;
gpu_inputs_d[3] = refr0;
gpu_inputs_d[4] = slr;
gpu_inputs_d[5] = refslr;
fracr = modf(j+ro, &kk);
if ((kk < SINC_HALF) || (kk >= (inWidth-SINC_HALF))) continue;
gpu_inputs_i[0] = inLength;
gpu_inputs_i[1] = inWidth;
gpu_inputs_i[2] = outWidth;
gpu_inputs_i[3] = firstImageRow;
gpu_inputs_i[4] = firstTileRow;
gpu_inputs_i[5] = nRowsInBlock;
gpu_inputs_i[6] = LINES_PER_TILE;
gpu_inputs_i[7] = int(flatten);
dop = dopplerPoly->eval(i+1,j+1);
// 1000 lines per tile is peanuts for this algorithm to run (for test set is only 20M pixels)
runGPUResamp(gpu_inputs_d, gpu_inputs_i, (void*)&imgIn[0], (void*)&imgOutBlock[0], residAzPtr, residRgPtr,
&azOffPolyArr[0], &rgOffPolyArr[0], &dopPolyArr[0], &azCarPolyArr[0], &rgCarPolyArr[0],
&(rMethods.fintp[0]));
for (int i=0; i<LINES_PER_TILE; i++) slcOutAccObj->setLineSequential((char *)&imgOutBlock[i*outWidth]);
#endif
} else {
// Interpolation of the complex image. Note that we don't need to make very many changes to the original code in this loop
// since the i-index should numerically match the original i-index
for (int i=firstTileRow; i<(firstTileRow+LINES_PER_TILE); i++) {
// GetLineSequential is fine here, we don't need specific lines, just continue grabbing them
if (residAzAccessor != 0) residAzAccObj->getLineSequential((char *)&residAz[0]);
if (residRgAccessor != 0) residRgAccObj->getLineSequential((char *)&residRg[0]);
#pragma omp parallel for private(ro,ao,fracr,fraca,ph,cval,dop,chipi,chipj,k,kk) \
firstprivate(chip)
for (int j=0; j<outWidth; j++) {
ao = azOffsetsPoly->eval(i+1,j+1) + residAz[j];
ro = rgOffsetsPoly->eval(i+1,j+1) + residRg[j];
fraca = modf(i+ao, &k);
if ((k < SINC_HALF) || (k >= (inLength-SINC_HALF))) continue;
fracr = modf(j+ro, &kk);
if ((kk < SINC_HALF) || (kk >= (inWidth-SINC_HALF))) continue;
dop = dopplerPoly->eval(i+1,j+1);
// Data chip without the carriers
for (int ii=0; ii<SINC_ONE; ii++) {
// Subtracting off firstImageRow removes the offset from the first row in the reference
// image to the first row actually contained in imgIn
chipi = k - firstImageRow + ii - SINC_HALF;
cval = complex<float>(cos((ii-4.)*dop), -sin((ii-4.)*dop));
for (int jj=0; jj<SINC_ONE; jj++) {
chipj = kk + jj - SINC_HALF;
// Take out doppler in azimuth
chip[ii][jj] = imgIn[IDX1D(chipi,chipj,inWidth)] * cval;
}
// Data chip without the carriers
for (int ii=0; ii<SINC_ONE; ii++) {
// Subtracting off firstImageRow removes the offset from the first row in the reference
// image to the first row actually contained in imgIn
chipi = k - firstImageRow + ii - SINC_HALF;
cval = complex<float>(cos((ii-4.)*dop), -sin((ii-4.)*dop));
for (int jj=0; jj<SINC_ONE; jj++) {
chipj = kk + jj - SINC_HALF;
// Take out doppler in azimuth
chip[ii][jj] = imgIn[IDX1D(chipi,chipj,inWidth)] * cval;
}
// Doppler to be added back. Simultaneously evaluate carrier that needs to be added back after interpolation
ph = (dop * fraca) + rgCarrier->eval(i+ao,j+ro) + azCarrier->eval(i+ao,j+ro);
// Flatten the carrier if the user wants to
if (flatten) {
ph = ph + ((4. * (M_PI / wvl)) * ((r0 - refr0) + (j * (slr - refslr)) + (ro * slr))) +
((4. * M_PI * (refr0 + (j * refslr))) * ((1. / refwvl) - (1. / wvl)));
}
ph = modulo_f(ph, 2.*M_PI);
cval = rMethods.interpolate_cx(chip,(SINC_HALF+1),(SINC_HALF+1),fraca,fracr,SINC_ONE,SINC_ONE,SINC_METHOD);
imgOut[j] = cval * complex<float>(cos(ph), sin(ph));
}
slcOutAccObj->setLineSequential((char *)&imgOut[0]);
// Doppler to be added back. Simultaneously evaluate carrier that needs to be added back after interpolation
ph = (dop * fraca) + rgCarrier->eval(i+ao,j+ro) + azCarrier->eval(i+ao,j+ro);
// Flatten the carrier if the user wants to
if (flatten) {
ph = ph + ((4. * (M_PI / wvl)) * ((r0 - refr0) + (j * (slr - refslr)) + (ro * slr))) +
((4. * M_PI * (refr0 + (j * refslr))) * ((1. / refwvl) - (1. / wvl)));
}
ph = modulo_f(ph, 2.*M_PI);
cval = rMethods.interpolate_cx(chip,(SINC_HALF+1),(SINC_HALF+1),fraca,fracr,SINC_ONE,SINC_ONE,SINC_METHOD);
imgOut[j] = cval * complex<float>(cos(ph), sin(ph));
}
slcOutAccObj->setLineSequential((char *)&imgOut[0]);
}
}
@ -385,7 +339,7 @@ void ResampSlc::resamp() {
if (lastLines > 0) {
firstTileRow = nTiles * LINES_PER_TILE;
nRowsInTile = outLength - firstTileRow + 1; // NOT EQUIVALENT TO NUMBER OF ROWS IN IMAGE BLOCK
nRowsInTile = outLength - firstTileRow ; // NOT EQUIVALENT TO NUMBER OF ROWS IN IMAGE BLOCK
printf("Reading in image data for final partial tile\n");
@ -426,119 +380,166 @@ void ResampSlc::resamp() {
printf("Interpolating final partial tile\n");
if (usr_enable_gpu) {
#ifdef GPU_ACC_ENABLED
vector<complex<float> > imgOutBlock(nRowsInTile*outWidth);
vector<double> residAzBlock(1,0.); // Dummy length to use for GPU
double *residAzPtr = 0;
if (residAzAccessor != 0) {
residAzBlock.resize(nRowsInTile*outWidth);
for (int i=0; i<nRowsInTile; i++) residAzAccObj->getLineSequential((char *)&residAzBlock[i*nRowsInTile]);
residAzPtr = &residAzBlock[0];
}
vector<double> residRgBlock(1,0.);
double *residRgPtr = 0;
if (residRgAccessor != 0) {
residRgBlock.resize(nRowsInTile*outWidth);
for (int i=0; i<nRowsInTile; i++) residRgAccObj->getLineSequential((char *)&residRgBlock[i*nRowsInTile]);
residRgPtr = &residRgBlock[0];
}
for (int i=firstTileRow; i<(firstTileRow+nRowsInTile); i++) {
vector<double> azOffPolyArr(((azOffsetsPoly->azimuthOrder+1)*(azOffsetsPoly->rangeOrder+1))+6);
vector<double> rgOffPolyArr(((rgOffsetsPoly->azimuthOrder+1)*(rgOffsetsPoly->rangeOrder+1))+6);
vector<double> dopPolyArr(((dopplerPoly->azimuthOrder+1)*(dopplerPoly->rangeOrder+1))+6);
vector<double> azCarPolyArr(((azCarrier->azimuthOrder+1)*(azCarrier->rangeOrder+1))+6);
vector<double> rgCarPolyArr(((rgCarrier->azimuthOrder+1)*(rgCarrier->rangeOrder+1))+6);
if (residAzAccessor != 0) residAzAccObj->getLineSequential((char *)&residAz[0]);
if (residRgAccessor != 0) residRgAccObj->getLineSequential((char *)&residRg[0]);
copyPolyToArr(azOffsetsPoly, azOffPolyArr); // arrs are: [azord, rgord, azmean, rgmean, aznorm, rgnorm, coeffs...]
copyPolyToArr(rgOffsetsPoly, rgOffPolyArr);
copyPolyToArr(dopplerPoly, dopPolyArr);
copyPolyToArr(azCarrier, azCarPolyArr);
copyPolyToArr(rgCarrier, rgCarPolyArr);
#pragma omp parallel for private(ro,ao,fracr,fraca,ph,cval,dop,chipi,chipj,k,kk) \
firstprivate(chip)
for (int j=0; j<outWidth; j++) {
double gpu_inputs_d[6];
int gpu_inputs_i[8];
ro = rgOffsetsPoly->eval(i+1,j+1) + residRg[j];
ao = azOffsetsPoly->eval(i+1,j+1) + residAz[j];
gpu_inputs_d[0] = wvl;
gpu_inputs_d[1] = refwvl;
gpu_inputs_d[2] = r0;
gpu_inputs_d[3] = refr0;
gpu_inputs_d[4] = slr;
gpu_inputs_d[5] = refslr;
fraca = modf(i+ao, &k);
if ((k < SINC_HALF) || (k >= (inLength-SINC_HALF))) continue;
gpu_inputs_i[0] = inLength;
gpu_inputs_i[1] = inWidth;
gpu_inputs_i[2] = outWidth;
gpu_inputs_i[3] = firstImageRow;
gpu_inputs_i[4] = firstTileRow;
gpu_inputs_i[5] = nRowsInBlock;
gpu_inputs_i[6] = nRowsInTile;
gpu_inputs_i[7] = int(flatten);
fracr = modf(j+ro, &kk);
if ((kk < SINC_HALF) || (kk >= (inWidth-SINC_HALF))) continue;
// 1000 lines per tile is peanuts for this algorithm to run (for test set is only 20M pixels)
dop = dopplerPoly->eval(i+1,j+1);
runGPUResamp(gpu_inputs_d, gpu_inputs_i, (void*)&imgIn[0], (void*)&imgOutBlock[0], residAzPtr, residRgPtr,
&azOffPolyArr[0], &rgOffPolyArr[0], &dopPolyArr[0], &azCarPolyArr[0], &rgCarPolyArr[0],
&(rMethods.fintp[0]));
for (int i=0; i<nRowsInTile; i++) slcOutAccObj->setLineSequential((char *)&imgOutBlock[i*outWidth]);
#endif
} else {
for (int i=firstTileRow; i<(firstTileRow+nRowsInTile); i++) {
if (residAzAccessor != 0) residAzAccObj->getLineSequential((char *)&residAz[0]);
if (residRgAccessor != 0) residRgAccObj->getLineSequential((char *)&residRg[0]);
#pragma omp parallel for private(ro,ao,fracr,fraca,ph,cval,dop,chipi,chipj,k,kk) \
firstprivate(chip)
for (int j=0; j<outWidth; j++) {
ro = rgOffsetsPoly->eval(i+1,j+1) + residRg[j];
ao = azOffsetsPoly->eval(i+1,j+1) + residAz[j];
fraca = modf(i+ao, &k);
if ((k < SINC_HALF) || (k >= (inLength-SINC_HALF))) continue;
fracr = modf(j+ro, &kk);
if ((kk < SINC_HALF) || (kk >= (inWidth-SINC_HALF))) continue;
dop = dopplerPoly->eval(i+1,j+1);
// Data chip without the carriers
for (int ii=0; ii<SINC_ONE; ii++) {
// Subtracting off firstImageRow removes the offset from the first row in the reference
// image to the first row actually contained in imgIn
chipi = k - firstImageRow + ii - SINC_HALF;
cval = complex<float>(cos((ii-4.)*dop), -sin((ii-4.)*dop));
for (int jj=0; jj<SINC_ONE; jj++) {
chipj = kk + jj - SINC_HALF;
// Take out doppler in azimuth
chip[ii][jj] = imgIn[IDX1D(chipi,chipj,inWidth)] * cval;
}
// Data chip without the carriers
for (int ii=0; ii<SINC_ONE; ii++) {
// Subtracting off firstImageRow removes the offset from the first row in the reference
// image to the first row actually contained in imgIn
chipi = k - firstImageRow + ii - SINC_HALF;
cval = complex<float>(cos((ii-4.)*dop), -sin((ii-4.)*dop));
for (int jj=0; jj<SINC_ONE; jj++) {
chipj = kk + jj - SINC_HALF;
// Take out doppler in azimuth
chip[ii][jj] = imgIn[IDX1D(chipi,chipj,inWidth)] * cval;
}
// Doppler to be added back. Simultaneously evaluate carrier that needs to be added back after interpolation
ph = (dop * fraca) + rgCarrier->eval(i+ao,j+ro) + azCarrier->eval(i+ao,j+ro);
// Flatten the carrier if the user wants to
if (flatten) {
ph = ph + ((4. * (M_PI / wvl)) * ((r0 - refr0) + (j * (slr - refslr)) + (ro * slr))) +
((4. * M_PI * (refr0 + (j * refslr))) * ((1. / refwvl) - (1. / wvl)));
}
ph = modulo_f(ph, 2.*M_PI);
cval = rMethods.interpolate_cx(chip,(SINC_HALF+1),(SINC_HALF+1),fraca,fracr,SINC_ONE,SINC_ONE,SINC_METHOD);
imgOut[j] = cval * complex<float>(cos(ph), sin(ph));
}
slcOutAccObj->setLineSequential((char *)&imgOut[0]);
// Doppler to be added back. Simultaneously evaluate carrier that needs to be added back after interpolation
ph = (dop * fraca) + rgCarrier->eval(i+ao,j+ro) + azCarrier->eval(i+ao,j+ro);
// Flatten the carrier if the user wants to
if (flatten) {
ph = ph + ((4. * (M_PI / wvl)) * ((r0 - refr0) + (j * (slr - refslr)) + (ro * slr))) +
((4. * M_PI * (refr0 + (j * refslr))) * ((1. / refwvl) - (1. / wvl)));
}
ph = modulo_f(ph, 2.*M_PI);
cval = rMethods.interpolate_cx(chip,(SINC_HALF+1),(SINC_HALF+1),fraca,fracr,SINC_ONE,SINC_ONE,SINC_METHOD);
imgOut[j] = cval * complex<float>(cos(ph), sin(ph));
}
slcOutAccObj->setLineSequential((char *)&imgOut[0]);
}
}
printf("Elapsed time: %f\n", (omp_get_wtime()-t0));
}
void ResampSlc::_resamp_gpu()
{
vector<complex<float> > imgIn(inLength*inWidth);
vector<complex<float> > imgOut(outLength*outWidth);
vector<float> residAz(outLength*outWidth), residRg(outLength*outWidth);
ResampMethods rMethods;
DataAccessor *slcInAccObj = (DataAccessor*)slcInAccessor;
DataAccessor *slcOutAccObj = (DataAccessor*)slcOutAccessor;
DataAccessor *residRgAccObj, *residAzAccObj;
if (residRgAccessor != 0) residRgAccObj = (DataAccessor*)residRgAccessor;
else residRgAccObj = NULL;
if (residAzAccessor != 0) residAzAccObj = (DataAccessor*)residAzAccessor;
else residAzAccObj = NULL;
// Moving this here so we don't waste any time
if (!isComplex) {
printf("Real data interpolation not implemented yet.\n");
return;
}
double t0 = omp_get_wtime();
printf("\n << Resample one image to another image coordinates >> \n\n");
printf("Input Image Dimensions: %6d lines, %6d pixels\n\n", inLength, inWidth);
printf("Output Image Dimensions: %6d lines, %6d pixels\n\n", outLength, outWidth);
printf("Complex data interpolation\n");
rMethods.prepareMethods(SINC_METHOD);
printf("Azimuth Carrier Poly\n");
azCarrier->printPoly();
printf("Range Carrier Poly\n");
rgCarrier->printPoly();
printf("Range Offsets Poly\n");
rgOffsetsPoly->printPoly();
printf("Azimuth Offsets Poly\n");
azOffsetsPoly->printPoly();
printf("Doppler Poly\n");
dopplerPoly->printPoly();
printf("Reading in image data ... \n");
// read the whole input SLC image
for (int i=0; i<inLength; i++) {
slcInAccObj->getLineSequential((char *)&imgIn[i*inWidth]);
}
// read the residAz if providied
if (residAzAccessor != 0) {
for (int i=0; i<outLength; i++)
residAzAccObj->getLineSequential((char *)&residAz[i*outWidth]);
}
if (residRgAccessor != 0) {
for (int i=0; i<outLength; i++)
residRgAccObj->getLineSequential((char *)&residRg[i*outWidth]);
}
// set up and copy the Poly objects
vector<double> azOffPolyArr(((azOffsetsPoly->azimuthOrder+1)*(azOffsetsPoly->rangeOrder+1))+6);
vector<double> rgOffPolyArr(((rgOffsetsPoly->azimuthOrder+1)*(rgOffsetsPoly->rangeOrder+1))+6);
vector<double> dopPolyArr(((dopplerPoly->azimuthOrder+1)*(dopplerPoly->rangeOrder+1))+6);
vector<double> azCarPolyArr(((azCarrier->azimuthOrder+1)*(azCarrier->rangeOrder+1))+6);
vector<double> rgCarPolyArr(((rgCarrier->azimuthOrder+1)*(rgCarrier->rangeOrder+1))+6);
copyPolyToArr(azOffsetsPoly, azOffPolyArr); // arrs are: [azord, rgord, azmean, rgmean, aznorm, rgnorm, coeffs...]
copyPolyToArr(rgOffsetsPoly, rgOffPolyArr);
copyPolyToArr(dopplerPoly, dopPolyArr);
copyPolyToArr(azCarrier, azCarPolyArr);
copyPolyToArr(rgCarrier, rgCarPolyArr);
double gpu_inputs_d[6];
int gpu_inputs_i[8];
gpu_inputs_d[0] = wvl;
gpu_inputs_d[1] = refwvl;
gpu_inputs_d[2] = r0;
gpu_inputs_d[3] = refr0;
gpu_inputs_d[4] = slr;
gpu_inputs_d[5] = refslr;
gpu_inputs_i[0] = inLength;
gpu_inputs_i[1] = inWidth;
gpu_inputs_i[2] = outWidth;
int firstImageRow = 0;
int firstTileRow = 0;
int nRowsInBlock = outLength;
gpu_inputs_i[3] = firstImageRow;
gpu_inputs_i[4] = firstTileRow;
gpu_inputs_i[5] = nRowsInBlock;
gpu_inputs_i[6] = outLength; //LINES_PER_TILE;
gpu_inputs_i[7] = int(flatten);
// call gpu routine
runGPUResamp(gpu_inputs_d, gpu_inputs_i, (void*)&imgIn[0], (void*)&imgOut[0], &residAz[0], &residRg[0],
&azOffPolyArr[0], &rgOffPolyArr[0], &dopPolyArr[0], &azCarPolyArr[0], &rgCarPolyArr[0],
&(rMethods.fintp[0]));
// write the output file
for (int i=0; i<outLength; i++) slcOutAccObj->setLineSequential((char *)&imgOut[i*outWidth]);
printf("Elapsed time: %f\n", (omp_get_wtime()-t0));
// all done
}