PyCuAmpcor: more code cleanup
* replace tabs with spaces to align the code * remove extra spaces at the end of lines * add more docstringsLT1AB
parent
38646456d3
commit
94caa1ea5c
|
@ -63,17 +63,17 @@ GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useM
|
||||||
if(cacheSizeInGB > 0)
|
if(cacheSizeInGB > 0)
|
||||||
papszOptions = CSLSetNameValue( papszOptions,
|
papszOptions = CSLSetNameValue( papszOptions,
|
||||||
"CACHE_SIZE",
|
"CACHE_SIZE",
|
||||||
std::to_string(_bufferSize).c_str());
|
std::to_string(_bufferSize).c_str());
|
||||||
|
|
||||||
// space between two lines
|
// space between two lines
|
||||||
GIntBig pnLineSpace;
|
GIntBig pnLineSpace;
|
||||||
// set up the virtual mem buffer
|
// set up the virtual mem buffer
|
||||||
_poBandVirtualMem = GDALGetVirtualMemAuto(
|
_poBandVirtualMem = GDALGetVirtualMemAuto(
|
||||||
static_cast<GDALRasterBandH>(_poBand),
|
static_cast<GDALRasterBandH>(_poBand),
|
||||||
GF_Read,
|
GF_Read,
|
||||||
&_pixelSize,
|
&_pixelSize,
|
||||||
&pnLineSpace,
|
&pnLineSpace,
|
||||||
papszOptions);
|
papszOptions);
|
||||||
if(!_poBandVirtualMem)
|
if(!_poBandVirtualMem)
|
||||||
throw;
|
throw;
|
||||||
|
|
||||||
|
|
|
@ -3,20 +3,20 @@ PROJECT = CUAMPCOR
|
||||||
LDFLAGS = -lcuda -lcudart -lcufft -lgdal
|
LDFLAGS = -lcuda -lcudart -lcufft -lgdal
|
||||||
CXXFLAGS = -std=c++11 -fpermissive -DNDEBUG -fPIC -shared
|
CXXFLAGS = -std=c++11 -fpermissive -DNDEBUG -fPIC -shared
|
||||||
NVCCFLAGS = -std=c++11 -m64 -DNDEBUG \
|
NVCCFLAGS = -std=c++11 -m64 -DNDEBUG \
|
||||||
-gencode arch=compute_35,code=sm_35 \
|
-gencode arch=compute_35,code=sm_35 \
|
||||||
-gencode arch=compute_60,code=sm_60 \
|
-gencode arch=compute_60,code=sm_60 \
|
||||||
-Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \
|
-Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \
|
||||||
-ftz=false -prec-div=true -prec-sqrt=true \
|
-ftz=false -prec-div=true -prec-sqrt=true \
|
||||||
-I/usr/include/gdal
|
-I/usr/include/gdal
|
||||||
|
|
||||||
CXX=g++
|
CXX=g++
|
||||||
NVCC=nvcc
|
NVCC=nvcc
|
||||||
|
|
||||||
DEPS = cudaUtil.h cudaError.h cuArrays.h GDALImage.h cuAmpcorParameter.h
|
DEPS = cudaUtil.h cudaError.h cuArrays.h GDALImage.h cuAmpcorParameter.h
|
||||||
OBJS = GDALImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \
|
OBJS = GDALImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \
|
||||||
cuSincOverSampler.o cuDeramp.o cuOffset.o \
|
cuSincOverSampler.o cuDeramp.o cuOffset.o \
|
||||||
cuCorrNormalization.o cuAmpcorParameter.o cuCorrTimeDomain.o cuCorrFrequency.o \
|
cuCorrNormalization.o cuAmpcorParameter.o cuCorrTimeDomain.o cuCorrFrequency.o \
|
||||||
cuAmpcorChunk.o cuAmpcorController.o cuEstimateStats.o
|
cuAmpcorChunk.o cuAmpcorController.o cuEstimateStats.o
|
||||||
|
|
||||||
all: pyampcor
|
all: pyampcor
|
||||||
|
|
||||||
|
|
|
@ -23,95 +23,97 @@ def version():
|
||||||
cdef extern from "cuAmpcorParameter.h":
|
cdef extern from "cuAmpcorParameter.h":
|
||||||
cdef cppclass cuAmpcorParameter:
|
cdef cppclass cuAmpcorParameter:
|
||||||
cuAmpcorParameter() except +
|
cuAmpcorParameter() except +
|
||||||
int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain
|
int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain
|
||||||
int deviceID ## Targeted GPU device ID
|
int deviceID ## Targeted GPU device ID
|
||||||
int nStreams ## Number of streams to asynchonize data transfers and compute kernels
|
int nStreams ## Number of streams to asynchonize data transfers and compute kernels
|
||||||
int derampMethod ## Method for deramping 0=None, 1=average, 2=phase gradient
|
int derampMethod ## Method for deramping 0=None, 1=average, 2=phase gradient
|
||||||
|
|
||||||
## chip or window size for raw data
|
## chip or window size for raw data
|
||||||
int windowSizeHeightRaw ## Template window height (original size)
|
int windowSizeHeightRaw ## Template window height (original size)
|
||||||
int windowSizeWidthRaw ## Template window width (original size)
|
int windowSizeWidthRaw ## Template window width (original size)
|
||||||
int searchWindowSizeHeightRaw ## Search window height (original size)
|
int searchWindowSizeHeightRaw ## Search window height (original size)
|
||||||
int searchWindowSizeWidthRaw ## Search window width (orignal size)
|
int searchWindowSizeWidthRaw ## Search window width (orignal size)
|
||||||
int halfSearchRangeDownRaw ##(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
|
int halfSearchRangeDownRaw ##(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
|
||||||
int halfSearchRangeAcrossRaw ##(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
|
int halfSearchRangeAcrossRaw ##(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
|
||||||
## chip or window size after oversampling
|
## chip or window size after oversampling
|
||||||
int rawDataOversamplingFactor ## Raw data overampling factor (from original size to oversampled size)
|
int rawDataOversamplingFactor ## Raw data overampling factor (from original size to oversampled size)
|
||||||
|
|
||||||
## strides between chips/windows
|
## strides between chips/windows
|
||||||
int skipSampleDownRaw ## Skip size between neighboring windows in Down direction (original size)
|
int skipSampleDownRaw ## Skip size between neighboring windows in Down direction (original size)
|
||||||
int skipSampleAcrossRaw ## Skip size between neighboring windows in across direction (original size)
|
int skipSampleAcrossRaw ## Skip size between neighboring windows in across direction (original size)
|
||||||
|
|
||||||
int corrStatWindowSize ## Size of the raw correlation surface extracted for statistics
|
int corrStatWindowSize ## Size of the raw correlation surface extracted for statistics
|
||||||
|
|
||||||
## Zoom in region near location of max correlation
|
## Zoom in region near location of max correlation
|
||||||
int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions)
|
int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions)
|
||||||
int oversamplingFactor ## Oversampling factor for interpolating correlation surface
|
int oversamplingFactor ## Oversampling factor for interpolating correlation surface
|
||||||
int oversamplingMethod ## Correlation surface oversampling method 0=fft, 1=sinc
|
int oversamplingMethod ## Correlation surface oversampling method 0=fft, 1=sinc
|
||||||
|
|
||||||
float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data
|
float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data
|
||||||
|
|
||||||
##reference image
|
##reference image
|
||||||
string referenceImageName ## reference SLC image name
|
string referenceImageName ## reference SLC image name
|
||||||
int imageDataType1 ## reference image data type, 2=cfloat=complex=float2 1=float
|
int imageDataType1 ## reference image data type, 2=cfloat=complex=float2 1=float
|
||||||
int referenceImageHeight ## reference image height
|
int referenceImageHeight ## reference image height
|
||||||
int referenceImageWidth ## reference image width
|
int referenceImageWidth ## reference image width
|
||||||
|
|
||||||
##secondary image
|
##secondary image
|
||||||
string secondaryImageName ## secondary SLC image name
|
string secondaryImageName ## secondary SLC image name
|
||||||
int imageDataType2 ## secondary image data type, 2=cfloat=complex=float2 1=float
|
int imageDataType2 ## secondary image data type, 2=cfloat=complex=float2 1=float
|
||||||
int secondaryImageHeight ## secondary image height
|
int secondaryImageHeight ## secondary image height
|
||||||
int secondaryImageWidth ## secondary image width
|
int secondaryImageWidth ## secondary image width
|
||||||
|
|
||||||
int useMmap ## whether to use mmap
|
int useMmap ## whether to use mmap
|
||||||
int mmapSizeInGB ## mmap buffer size in unit of Gigabytes (if not mmmap, the buffer size)
|
int mmapSizeInGB ## mmap buffer size in unit of Gigabytes (if not mmmap, the buffer size)
|
||||||
|
|
||||||
## total number of chips/windows
|
## total number of chips/windows
|
||||||
int numberWindowDown ## number of total windows (down)
|
int numberWindowDown ## number of total windows (down)
|
||||||
int numberWindowAcross ## number of total windows (across)
|
int numberWindowAcross ## number of total windows (across)
|
||||||
int numberWindows ## numberWindowDown*numberWindowAcross
|
int numberWindows ## numberWindowDown*numberWindowAcross
|
||||||
|
|
||||||
## number of chips/windows in a batch/chunk
|
## number of chips/windows in a batch/chunk
|
||||||
int numberWindowDownInChunk ## number of windows processed in a chunk (down)
|
int numberWindowDownInChunk ## number of windows processed in a chunk (down)
|
||||||
int numberWindowAcrossInChunk ## number of windows processed in a chunk (across)
|
int numberWindowAcrossInChunk ## number of windows processed in a chunk (across)
|
||||||
int numberWindowsInChunk ## numberWindowDownInChunk*numberWindowAcrossInChunk
|
int numberWindowsInChunk ## numberWindowDownInChunk*numberWindowAcrossInChunk
|
||||||
int numberChunkDown ## number of chunks (down)
|
int numberChunkDown ## number of chunks (down)
|
||||||
int numberChunkAcross ## number of chunks (across)
|
int numberChunkAcross ## number of chunks (across)
|
||||||
int numberChunks
|
int numberChunks
|
||||||
|
|
||||||
int *referenceStartPixelDown ## reference starting pixels for each window (down)
|
int *referenceStartPixelDown ## reference starting pixels for each window (down)
|
||||||
int *referenceStartPixelAcross ## reference starting pixels for each window (across)
|
int *referenceStartPixelAcross ## reference starting pixels for each window (across)
|
||||||
int *secondaryStartPixelDown ## secondary starting pixels for each window (down)
|
int *secondaryStartPixelDown ## secondary starting pixels for each window (down)
|
||||||
int *secondaryStartPixelAcross ## secondary starting pixels for each window (across)
|
int *secondaryStartPixelAcross ## secondary starting pixels for each window (across)
|
||||||
int *grossOffsetDown ## Gross offsets between reference and secondary windows (down) : secondaryStartPixel - referenceStartPixel
|
int *grossOffsetDown ## Gross offsets between reference and secondary windows (down) : secondaryStartPixel - referenceStartPixel
|
||||||
int *grossOffsetAcross ## Gross offsets between reference and secondary windows (across)
|
int *grossOffsetAcross ## Gross offsets between reference and secondary windows (across)
|
||||||
int grossOffsetDown0 ## constant gross offset (down)
|
int grossOffsetDown0 ## constant gross offset (down)
|
||||||
int grossOffsetAcross0 ## constant gross offset (across)
|
int grossOffsetAcross0 ## constant gross offset (across)
|
||||||
int referenceStartPixelDown0 ## the first pixel of reference image (down), be adjusted with margins and gross offset
|
int referenceStartPixelDown0 ## the first pixel of reference image (down), be adjusted with margins and gross offset
|
||||||
int referenceStartPixelAcross0 ## the first pixel of reference image (across)
|
int referenceStartPixelAcross0 ## the first pixel of reference image (across)
|
||||||
int *referenceChunkStartPixelDown ## array of starting pixels for all reference chunks (down)
|
int *referenceChunkStartPixelDown ## array of starting pixels for all reference chunks (down)
|
||||||
int *referenceChunkStartPixelAcross ## array of starting pixels for all reference chunks (across)
|
int *referenceChunkStartPixelAcross ## array of starting pixels for all reference chunks (across)
|
||||||
int *secondaryChunkStartPixelDown ## array of starting pixels for all secondary chunks (down)
|
int *secondaryChunkStartPixelDown ## array of starting pixels for all secondary chunks (down)
|
||||||
int *secondaryChunkStartPixelAcross ## array of starting pixels for all secondary chunks (across)
|
int *secondaryChunkStartPixelAcross ## array of starting pixels for all secondary chunks (across)
|
||||||
int *referenceChunkHeight ## array of heights of all reference chunks, required when loading chunk to GPU
|
int *referenceChunkHeight ## array of heights of all reference chunks, required when loading chunk to GPU
|
||||||
int *referenceChunkWidth ## array of width of all reference chunks
|
int *referenceChunkWidth ## array of width of all reference chunks
|
||||||
int *secondaryChunkHeight ## array of width of all reference chunks
|
int *secondaryChunkHeight ## array of width of all reference chunks
|
||||||
int *secondaryChunkWidth ## array of width of all secondary chunks
|
int *secondaryChunkWidth ## array of width of all secondary chunks
|
||||||
int maxReferenceChunkHeight ## max height for all reference/secondary chunks, determine the size of reading cache in GPU
|
int maxReferenceChunkHeight ## max height for all reference chunks, determine the size of reading cache in GPU
|
||||||
int maxReferenceChunkWidth ## max width for all reference chunks, determine the size of reading cache in GPU
|
int maxReferenceChunkWidth ## max width for all reference chunks, determine the size of reading cache in GPU
|
||||||
int maxSecondaryChunkHeight
|
int maxSecondaryChunkHeight ## max height for secondary chunk
|
||||||
int maxSecondaryChunkWidth
|
int maxSecondaryChunkWidth ## max width for secondary chunk
|
||||||
|
|
||||||
string grossOffsetImageName
|
string grossOffsetImageName ## Output Gross Offset fields filename
|
||||||
string offsetImageName ## Output Offset fields filename
|
string offsetImageName ## Output Offset fields filename
|
||||||
string snrImageName ## Output SNR filename
|
string snrImageName ## Output SNR filename
|
||||||
string covImageName ## Output COV filename
|
string covImageName ## Output COV filename
|
||||||
void setStartPixels(int*, int*, int*, int*)
|
|
||||||
void setStartPixels(int, int, int*, int*)
|
|
||||||
void setStartPixels(int, int, int, int)
|
|
||||||
void checkPixelInImageRange() ## check whether
|
|
||||||
|
|
||||||
void setupParameters() ## Process other parameters after Python Inpu
|
## set start pixels for reference/secondary windows
|
||||||
|
void setStartPixels(int*, int*, int*, int*) ## varying locations for reference and secondary
|
||||||
|
void setStartPixels(int, int, int*, int*) ## first window location for reference, varying for secondary
|
||||||
|
void setStartPixels(int, int, int, int) ## first window locations for reference and secondary
|
||||||
|
|
||||||
|
void checkPixelInImageRange() ## check whether all windows are within image range
|
||||||
|
void setupParameters() ## Process other parameters after Python Inpu
|
||||||
|
|
||||||
cdef extern from "cuAmpcorController.h":
|
cdef extern from "cuAmpcorController.h":
|
||||||
cdef cppclass cuAmpcorController:
|
cdef cppclass cuAmpcorController:
|
||||||
|
@ -326,8 +328,7 @@ cdef class PyCuAmpcor(object):
|
||||||
def numberChunks(self):
|
def numberChunks(self):
|
||||||
return self.c_cuAmpcor.param.numberChunks
|
return self.c_cuAmpcor.param.numberChunks
|
||||||
|
|
||||||
|
## gross offset
|
||||||
## gross offets
|
|
||||||
@property
|
@property
|
||||||
def grossOffsetImageName(self):
|
def grossOffsetImageName(self):
|
||||||
return self.c_cuAmpcor.param.grossOffsetImageName.decode("utf-8")
|
return self.c_cuAmpcor.param.grossOffsetImageName.decode("utf-8")
|
||||||
|
@ -449,7 +450,3 @@ cdef class PyCuAmpcor(object):
|
||||||
|
|
||||||
|
|
||||||
# end of file
|
# end of file
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -216,21 +216,21 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
|
||||||
{
|
{
|
||||||
idxChunkDown = idxDown_;
|
idxChunkDown = idxDown_;
|
||||||
idxChunkAcross = idxAcross_;
|
idxChunkAcross = idxAcross_;
|
||||||
idxChunk = idxChunkAcross + idxChunkDown*param->numberChunkAcross;
|
idxChunk = idxChunkAcross + idxChunkDown*param->numberChunkAcross;
|
||||||
|
|
||||||
if(idxChunkDown == param->numberChunkDown -1) {
|
if(idxChunkDown == param->numberChunkDown -1) {
|
||||||
nWindowsDown = param->numberWindowDown - param->numberWindowDownInChunk*(param->numberChunkDown -1);
|
nWindowsDown = param->numberWindowDown - param->numberWindowDownInChunk*(param->numberChunkDown -1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
nWindowsDown = param->numberWindowDownInChunk;
|
nWindowsDown = param->numberWindowDownInChunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(idxChunkAcross == param->numberChunkAcross -1) {
|
if(idxChunkAcross == param->numberChunkAcross -1) {
|
||||||
nWindowsAcross = param->numberWindowAcross - param->numberWindowAcrossInChunk*(param->numberChunkAcross -1);
|
nWindowsAcross = param->numberWindowAcross - param->numberWindowAcrossInChunk*(param->numberChunkAcross -1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
nWindowsAcross = param->numberWindowAcrossInChunk;
|
nWindowsAcross = param->numberWindowAcrossInChunk;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// obtain the starting pixels for each chip
|
/// obtain the starting pixels for each chip
|
||||||
|
@ -239,14 +239,14 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
|
||||||
void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff)
|
void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff)
|
||||||
{
|
{
|
||||||
for(int i=0; i<param->numberWindowDownInChunk; ++i) {
|
for(int i=0; i<param->numberWindowDownInChunk; ++i) {
|
||||||
int iDown = i;
|
int iDown = i;
|
||||||
if(i>=nWindowsDown) iDown = nWindowsDown-1;
|
if(i>=nWindowsDown) iDown = nWindowsDown-1;
|
||||||
for(int j=0; j<param->numberWindowAcrossInChunk; ++j){
|
for(int j=0; j<param->numberWindowAcrossInChunk; ++j){
|
||||||
int iAcross = j;
|
int iAcross = j;
|
||||||
if(j>=nWindowsAcross) iAcross = nWindowsAcross-1;
|
if(j>=nWindowsAcross) iAcross = nWindowsAcross-1;
|
||||||
int idxInChunk = iDown*param->numberWindowAcrossInChunk+iAcross;
|
int idxInChunk = iDown*param->numberWindowAcrossInChunk+iAcross;
|
||||||
int idxInAll = (iDown+idxChunkDown*param->numberWindowDownInChunk)*param->numberWindowAcross
|
int idxInAll = (iDown+idxChunkDown*param->numberWindowDownInChunk)*param->numberWindowAcross
|
||||||
+ idxChunkAcross*param->numberWindowAcrossInChunk+iAcross;
|
+ idxChunkAcross*param->numberWindowAcrossInChunk+iAcross;
|
||||||
rStartPixel[idxInChunk] = oStartPixel[idxInAll] - diff;
|
rStartPixel[idxInChunk] = oStartPixel[idxInAll] - diff;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -414,23 +414,23 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, G
|
||||||
c_secondaryBatchZoomIn->allocate();
|
c_secondaryBatchZoomIn->allocate();
|
||||||
|
|
||||||
c_referenceBatchOverSampled = new cuArrays<float2> (
|
c_referenceBatchOverSampled = new cuArrays<float2> (
|
||||||
param->windowSizeHeight, param->windowSizeWidth,
|
param->windowSizeHeight, param->windowSizeWidth,
|
||||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||||
c_referenceBatchOverSampled->allocate();
|
c_referenceBatchOverSampled->allocate();
|
||||||
|
|
||||||
c_secondaryBatchOverSampled = new cuArrays<float2> (
|
c_secondaryBatchOverSampled = new cuArrays<float2> (
|
||||||
param->searchWindowSizeHeight, param->searchWindowSizeWidth,
|
param->searchWindowSizeHeight, param->searchWindowSizeWidth,
|
||||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||||
c_secondaryBatchOverSampled->allocate();
|
c_secondaryBatchOverSampled->allocate();
|
||||||
|
|
||||||
r_referenceBatchOverSampled = new cuArrays<float> (
|
r_referenceBatchOverSampled = new cuArrays<float> (
|
||||||
param->windowSizeHeight, param->windowSizeWidth,
|
param->windowSizeHeight, param->windowSizeWidth,
|
||||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||||
r_referenceBatchOverSampled->allocate();
|
r_referenceBatchOverSampled->allocate();
|
||||||
|
|
||||||
r_secondaryBatchOverSampled = new cuArrays<float> (
|
r_secondaryBatchOverSampled = new cuArrays<float> (
|
||||||
param->searchWindowSizeHeight, param->searchWindowSizeWidth,
|
param->searchWindowSizeHeight, param->searchWindowSizeWidth,
|
||||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||||
r_secondaryBatchOverSampled->allocate();
|
r_secondaryBatchOverSampled->allocate();
|
||||||
|
|
||||||
referenceBatchOverSampler = new cuOverSamplerC2C(
|
referenceBatchOverSampler = new cuOverSamplerC2C(
|
||||||
|
@ -442,24 +442,24 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, G
|
||||||
c_secondaryBatchOverSampled->height, c_secondaryBatchOverSampled->width, c_secondaryBatchRaw->count, stream);
|
c_secondaryBatchOverSampled->height, c_secondaryBatchOverSampled->width, c_secondaryBatchRaw->count, stream);
|
||||||
|
|
||||||
r_corrBatchRaw = new cuArrays<float> (
|
r_corrBatchRaw = new cuArrays<float> (
|
||||||
param->searchWindowSizeHeightRaw-param->windowSizeHeightRaw+1,
|
param->searchWindowSizeHeightRaw-param->windowSizeHeightRaw+1,
|
||||||
param->searchWindowSizeWidthRaw-param->windowSizeWidthRaw+1,
|
param->searchWindowSizeWidthRaw-param->windowSizeWidthRaw+1,
|
||||||
param->numberWindowDownInChunk,
|
param->numberWindowDownInChunk,
|
||||||
param->numberWindowAcrossInChunk);
|
param->numberWindowAcrossInChunk);
|
||||||
r_corrBatchRaw->allocate();
|
r_corrBatchRaw->allocate();
|
||||||
|
|
||||||
r_corrBatchZoomIn = new cuArrays<float> (
|
r_corrBatchZoomIn = new cuArrays<float> (
|
||||||
param->searchWindowSizeHeight - param->windowSizeHeight+1,
|
param->searchWindowSizeHeight - param->windowSizeHeight+1,
|
||||||
param->searchWindowSizeWidth - param->windowSizeWidth+1,
|
param->searchWindowSizeWidth - param->windowSizeWidth+1,
|
||||||
param->numberWindowDownInChunk,
|
param->numberWindowDownInChunk,
|
||||||
param->numberWindowAcrossInChunk);
|
param->numberWindowAcrossInChunk);
|
||||||
r_corrBatchZoomIn->allocate();
|
r_corrBatchZoomIn->allocate();
|
||||||
|
|
||||||
r_corrBatchZoomInAdjust = new cuArrays<float> (
|
r_corrBatchZoomInAdjust = new cuArrays<float> (
|
||||||
param->searchWindowSizeHeight - param->windowSizeHeight,
|
param->searchWindowSizeHeight - param->windowSizeHeight,
|
||||||
param->searchWindowSizeWidth - param->windowSizeWidth,
|
param->searchWindowSizeWidth - param->windowSizeWidth,
|
||||||
param->numberWindowDownInChunk,
|
param->numberWindowDownInChunk,
|
||||||
param->numberWindowAcrossInChunk);
|
param->numberWindowAcrossInChunk);
|
||||||
r_corrBatchZoomInAdjust->allocate();
|
r_corrBatchZoomInAdjust->allocate();
|
||||||
|
|
||||||
|
|
||||||
|
@ -488,17 +488,17 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, G
|
||||||
|
|
||||||
// new arrays due to snr estimation
|
// new arrays due to snr estimation
|
||||||
r_corrBatchRawZoomIn = new cuArrays<float> (
|
r_corrBatchRawZoomIn = new cuArrays<float> (
|
||||||
param->corrRawZoomInHeight,
|
param->corrRawZoomInHeight,
|
||||||
param->corrRawZoomInWidth,
|
param->corrRawZoomInWidth,
|
||||||
param->numberWindowDownInChunk,
|
param->numberWindowDownInChunk,
|
||||||
param->numberWindowAcrossInChunk);
|
param->numberWindowAcrossInChunk);
|
||||||
r_corrBatchRawZoomIn->allocate();
|
r_corrBatchRawZoomIn->allocate();
|
||||||
|
|
||||||
i_corrBatchZoomInValid = new cuArrays<int> (
|
i_corrBatchZoomInValid = new cuArrays<int> (
|
||||||
param->corrRawZoomInHeight,
|
param->corrRawZoomInHeight,
|
||||||
param->corrRawZoomInWidth,
|
param->corrRawZoomInWidth,
|
||||||
param->numberWindowDownInChunk,
|
param->numberWindowDownInChunk,
|
||||||
param->numberWindowAcrossInChunk);
|
param->numberWindowAcrossInChunk);
|
||||||
i_corrBatchZoomInValid->allocate();
|
i_corrBatchZoomInValid->allocate();
|
||||||
|
|
||||||
|
|
||||||
|
@ -535,11 +535,11 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, G
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
corrOverSampler= new cuOverSamplerR2R(param->zoomWindowSize, param->zoomWindowSize,
|
corrOverSampler= new cuOverSamplerR2R(param->zoomWindowSize, param->zoomWindowSize,
|
||||||
(param->zoomWindowSize)*param->oversamplingFactor,
|
(param->zoomWindowSize)*param->oversamplingFactor,
|
||||||
(param->zoomWindowSize)*param->oversamplingFactor,
|
(param->zoomWindowSize)*param->oversamplingFactor,
|
||||||
param->numberWindowDownInChunk*param->numberWindowAcrossInChunk,
|
param->numberWindowDownInChunk*param->numberWindowAcrossInChunk,
|
||||||
stream);
|
stream);
|
||||||
}
|
}
|
||||||
if(param->algorithm == 0) {
|
if(param->algorithm == 0) {
|
||||||
cuCorrFreqDomain = new cuFreqCorrelator(
|
cuCorrFreqDomain = new cuFreqCorrelator(
|
||||||
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
|
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
|
||||||
|
|
|
@ -22,27 +22,27 @@
|
||||||
class cuAmpcorChunk{
|
class cuAmpcorChunk{
|
||||||
private:
|
private:
|
||||||
int idxChunkDown; ///< index of the chunk in total batches, down
|
int idxChunkDown; ///< index of the chunk in total batches, down
|
||||||
int idxChunkAcross; ///< index of the chunk in total batches, across
|
int idxChunkAcross; ///< index of the chunk in total batches, across
|
||||||
int idxChunk; ///<
|
int idxChunk; ///<
|
||||||
int nWindowsDown; ///< number of windows in one chunk, down
|
int nWindowsDown; ///< number of windows in one chunk, down
|
||||||
int nWindowsAcross; ///< number of windows in one chunk, across
|
int nWindowsAcross; ///< number of windows in one chunk, across
|
||||||
|
|
||||||
int devId; ///< GPU device ID to use
|
int devId; ///< GPU device ID to use
|
||||||
cudaStream_t stream; ///< CUDA stream to use
|
cudaStream_t stream; ///< CUDA stream to use
|
||||||
|
|
||||||
GDALImage *referenceImage; ///< reference image object
|
GDALImage *referenceImage; ///< reference image object
|
||||||
GDALImage *secondaryImage; ///< secondary image object
|
GDALImage *secondaryImage; ///< secondary image object
|
||||||
cuAmpcorParameter *param; ///< reference to the (global) parameters
|
cuAmpcorParameter *param; ///< reference to the (global) parameters
|
||||||
cuArrays<float2> *offsetImage; ///< output offsets image
|
cuArrays<float2> *offsetImage; ///< output offsets image
|
||||||
cuArrays<float> *snrImage; ///< snr image
|
cuArrays<float> *snrImage; ///< snr image
|
||||||
cuArrays<float3> *covImage; ///< cov image
|
cuArrays<float3> *covImage; ///< cov image
|
||||||
|
|
||||||
// local variables and workers
|
// local variables and workers
|
||||||
// gpu buffer to load images from file
|
// gpu buffer to load images from file
|
||||||
cuArrays<float2> * c_referenceChunkRaw, * c_secondaryChunkRaw;
|
cuArrays<float2> * c_referenceChunkRaw, * c_secondaryChunkRaw;
|
||||||
cuArrays<float> * r_referenceChunkRaw, * r_secondaryChunkRaw;
|
cuArrays<float> * r_referenceChunkRaw, * r_secondaryChunkRaw;
|
||||||
|
|
||||||
// windows raw (not oversampled) data, complex and real
|
// windows raw (not oversampled) data, complex and real
|
||||||
cuArrays<float2> * c_referenceBatchRaw, * c_secondaryBatchRaw, * c_secondaryBatchZoomIn;
|
cuArrays<float2> * c_referenceBatchRaw, * c_secondaryBatchRaw, * c_secondaryBatchZoomIn;
|
||||||
cuArrays<float> * r_referenceBatchRaw, * r_secondaryBatchRaw;
|
cuArrays<float> * r_referenceBatchRaw, * r_secondaryBatchRaw;
|
||||||
|
|
||||||
|
@ -55,20 +55,20 @@ private:
|
||||||
cuArrays<int> *ChunkOffsetDown, *ChunkOffsetAcross;
|
cuArrays<int> *ChunkOffsetDown, *ChunkOffsetAcross;
|
||||||
|
|
||||||
// oversampling processors for complex images
|
// oversampling processors for complex images
|
||||||
cuOverSamplerC2C *referenceBatchOverSampler, *secondaryBatchOverSampler;
|
cuOverSamplerC2C *referenceBatchOverSampler, *secondaryBatchOverSampler;
|
||||||
|
|
||||||
// oversampling processor for correlation surface
|
// oversampling processor for correlation surface
|
||||||
cuOverSamplerR2R *corrOverSampler;
|
cuOverSamplerR2R *corrOverSampler;
|
||||||
cuSincOverSamplerR2R *corrSincOverSampler;
|
cuSincOverSamplerR2R *corrSincOverSampler;
|
||||||
|
|
||||||
// cross-correlation processor with frequency domain algorithm
|
// cross-correlation processor with frequency domain algorithm
|
||||||
cuFreqCorrelator *cuCorrFreqDomain, *cuCorrFreqDomain_OverSampled;
|
cuFreqCorrelator *cuCorrFreqDomain, *cuCorrFreqDomain_OverSampled;
|
||||||
|
|
||||||
// save offset results in different stages
|
// save offset results in different stages
|
||||||
cuArrays<int2> *offsetInit;
|
cuArrays<int2> *offsetInit;
|
||||||
cuArrays<int2> *offsetZoomIn;
|
cuArrays<int2> *offsetZoomIn;
|
||||||
cuArrays<float2> *offsetFinal;
|
cuArrays<float2> *offsetFinal;
|
||||||
cuArrays<int2> *maxLocShift; //record the maxloc from the extract center
|
cuArrays<int2> *maxLocShift; // record the maxloc from the extract center
|
||||||
cuArrays<float> *corrMaxValue;
|
cuArrays<float> *corrMaxValue;
|
||||||
cuArrays<int2> *i_maxloc;
|
cuArrays<int2> *i_maxloc;
|
||||||
cuArrays<float> *r_maxval;
|
cuArrays<float> *r_maxval;
|
||||||
|
@ -79,25 +79,25 @@ private:
|
||||||
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
|
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
|
||||||
cuArrays<float> *r_snrValue;
|
cuArrays<float> *r_snrValue;
|
||||||
|
|
||||||
// Variance estimation.
|
// Variance estimation
|
||||||
cuArrays<float3> *r_covValue;
|
cuArrays<float3> *r_covValue;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// constructor
|
// constructor
|
||||||
cuAmpcorChunk(cuAmpcorParameter *param_,
|
cuAmpcorChunk(cuAmpcorParameter *param_,
|
||||||
GDALImage *reference_, GDALImage *secondary_,
|
GDALImage *reference_, GDALImage *secondary_,
|
||||||
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_,
|
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_,
|
||||||
cuArrays<float3> *covImage_, cudaStream_t stream_);
|
cuArrays<float3> *covImage_, cudaStream_t stream_);
|
||||||
|
// destructor
|
||||||
|
~cuAmpcorChunk();
|
||||||
|
|
||||||
//
|
// local methods
|
||||||
void setIndex(int idxDown_, int idxAcross_);
|
void setIndex(int idxDown_, int idxAcross_);
|
||||||
void loadReferenceChunk();
|
void loadReferenceChunk();
|
||||||
void loadSecondaryChunk();
|
void loadSecondaryChunk();
|
||||||
void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff);
|
void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff);
|
||||||
|
// run the given chunk
|
||||||
~cuAmpcorChunk();
|
void run(int, int);
|
||||||
|
|
||||||
void run(int, int);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -154,12 +154,12 @@ void cuAmpcorParameter::setStartPixels(int *mStartD, int *mStartA, int *gOffsetD
|
||||||
{
|
{
|
||||||
for(int i=0; i<numberWindows; i++)
|
for(int i=0; i<numberWindows; i++)
|
||||||
{
|
{
|
||||||
referenceStartPixelDown[i] = mStartD[i];
|
referenceStartPixelDown[i] = mStartD[i];
|
||||||
grossOffsetDown[i] = gOffsetD[i];
|
grossOffsetDown[i] = gOffsetD[i];
|
||||||
secondaryStartPixelDown[i] = referenceStartPixelDown[i] + grossOffsetDown[i] - halfSearchRangeDownRaw;
|
secondaryStartPixelDown[i] = referenceStartPixelDown[i] + grossOffsetDown[i] - halfSearchRangeDownRaw;
|
||||||
referenceStartPixelAcross[i] = mStartA[i];
|
referenceStartPixelAcross[i] = mStartA[i];
|
||||||
grossOffsetAcross[i] = gOffsetA[i];
|
grossOffsetAcross[i] = gOffsetA[i];
|
||||||
secondaryStartPixelAcross[i] = referenceStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
|
secondaryStartPixelAcross[i] = referenceStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
|
||||||
}
|
}
|
||||||
setChunkStartPixels();
|
setChunkStartPixels();
|
||||||
}
|
}
|
||||||
|
@ -169,16 +169,16 @@ void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int *gOffsetD,
|
||||||
{
|
{
|
||||||
for(int row=0; row<numberWindowDown; row++)
|
for(int row=0; row<numberWindowDown; row++)
|
||||||
{
|
{
|
||||||
for(int col = 0; col < numberWindowAcross; col++)
|
for(int col = 0; col < numberWindowAcross; col++)
|
||||||
{
|
{
|
||||||
int i = row*numberWindowAcross + col;
|
int i = row*numberWindowAcross + col;
|
||||||
referenceStartPixelDown[i] = mStartD + row*skipSampleDownRaw;
|
referenceStartPixelDown[i] = mStartD + row*skipSampleDownRaw;
|
||||||
grossOffsetDown[i] = gOffsetD[i];
|
grossOffsetDown[i] = gOffsetD[i];
|
||||||
secondaryStartPixelDown[i] = referenceStartPixelDown[i] + grossOffsetDown[i] - halfSearchRangeDownRaw;
|
secondaryStartPixelDown[i] = referenceStartPixelDown[i] + grossOffsetDown[i] - halfSearchRangeDownRaw;
|
||||||
referenceStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw;
|
referenceStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw;
|
||||||
grossOffsetAcross[i] = gOffsetA[i];
|
grossOffsetAcross[i] = gOffsetA[i];
|
||||||
secondaryStartPixelAcross[i] = referenceStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
|
secondaryStartPixelAcross[i] = referenceStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
setChunkStartPixels();
|
setChunkStartPixels();
|
||||||
}
|
}
|
||||||
|
@ -188,16 +188,16 @@ void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int gOffsetD, i
|
||||||
{
|
{
|
||||||
for(int row=0; row<numberWindowDown; row++)
|
for(int row=0; row<numberWindowDown; row++)
|
||||||
{
|
{
|
||||||
for(int col = 0; col < numberWindowAcross; col++)
|
for(int col = 0; col < numberWindowAcross; col++)
|
||||||
{
|
{
|
||||||
int i = row*numberWindowAcross + col;
|
int i = row*numberWindowAcross + col;
|
||||||
referenceStartPixelDown[i] = mStartD + row*skipSampleDownRaw;
|
referenceStartPixelDown[i] = mStartD + row*skipSampleDownRaw;
|
||||||
grossOffsetDown[i] = gOffsetD;
|
grossOffsetDown[i] = gOffsetD;
|
||||||
secondaryStartPixelDown[i] = referenceStartPixelDown[i] + grossOffsetDown[i] - halfSearchRangeDownRaw;
|
secondaryStartPixelDown[i] = referenceStartPixelDown[i] + grossOffsetDown[i] - halfSearchRangeDownRaw;
|
||||||
referenceStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw;
|
referenceStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw;
|
||||||
grossOffsetAcross[i] = gOffsetA;
|
grossOffsetAcross[i] = gOffsetA;
|
||||||
secondaryStartPixelAcross[i] = referenceStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
|
secondaryStartPixelAcross[i] = referenceStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
setChunkStartPixels();
|
setChunkStartPixels();
|
||||||
}
|
}
|
||||||
|
@ -227,12 +227,12 @@ void cuAmpcorParameter::setChunkStartPixels()
|
||||||
int sChunkEA = 0;
|
int sChunkEA = 0;
|
||||||
|
|
||||||
int numberWindowDownInChunkRun = numberWindowDownInChunk;
|
int numberWindowDownInChunkRun = numberWindowDownInChunk;
|
||||||
int numberWindowAcrossInChunkRun = numberWindowAcrossInChunk;
|
int numberWindowAcrossInChunkRun = numberWindowAcrossInChunk;
|
||||||
// modify the number of windows in last chunk
|
// modify the number of windows in last chunk
|
||||||
if(ichunk == numberChunkDown -1)
|
if(ichunk == numberChunkDown -1)
|
||||||
numberWindowDownInChunkRun = numberWindowDown - numberWindowDownInChunk*(numberChunkDown -1);
|
numberWindowDownInChunkRun = numberWindowDown - numberWindowDownInChunk*(numberChunkDown -1);
|
||||||
if(jchunk == numberChunkAcross -1)
|
if(jchunk == numberChunkAcross -1)
|
||||||
numberWindowAcrossInChunkRun = numberWindowAcross - numberWindowAcrossInChunk*(numberChunkAcross -1);
|
numberWindowAcrossInChunkRun = numberWindowAcross - numberWindowAcrossInChunk*(numberChunkAcross -1);
|
||||||
|
|
||||||
for(int i=0; i<numberWindowDownInChunkRun; i++)
|
for(int i=0; i<numberWindowDownInChunkRun; i++)
|
||||||
{
|
{
|
||||||
|
@ -272,65 +272,65 @@ void cuAmpcorParameter::setChunkStartPixels()
|
||||||
/// check whether reference and secondary windows are within the image range
|
/// check whether reference and secondary windows are within the image range
|
||||||
void cuAmpcorParameter::checkPixelInImageRange()
|
void cuAmpcorParameter::checkPixelInImageRange()
|
||||||
{
|
{
|
||||||
int endPixel;
|
int endPixel;
|
||||||
for(int row=0; row<numberWindowDown; row++)
|
for(int row=0; row<numberWindowDown; row++)
|
||||||
{
|
{
|
||||||
for(int col = 0; col < numberWindowAcross; col++)
|
for(int col = 0; col < numberWindowAcross; col++)
|
||||||
{
|
{
|
||||||
int i = row*numberWindowAcross + col;
|
int i = row*numberWindowAcross + col;
|
||||||
if(referenceStartPixelDown[i] <0)
|
if(referenceStartPixelDown[i] <0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Reference Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, referenceStartPixelDown[i]);
|
fprintf(stderr, "Reference Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, referenceStartPixelDown[i]);
|
||||||
exit(EXIT_FAILURE); //or raise range error
|
exit(EXIT_FAILURE); //or raise range error
|
||||||
}
|
}
|
||||||
if(referenceStartPixelAcross[i] <0)
|
if(referenceStartPixelAcross[i] <0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Reference Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, referenceStartPixelAcross[i]);
|
fprintf(stderr, "Reference Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, referenceStartPixelAcross[i]);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
endPixel = referenceStartPixelDown[i] + windowSizeHeightRaw;
|
endPixel = referenceStartPixelDown[i] + windowSizeHeightRaw;
|
||||||
if(endPixel >= referenceImageHeight)
|
if(endPixel >= referenceImageHeight)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Reference Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel);
|
fprintf(stderr, "Reference Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
endPixel = referenceStartPixelAcross[i] + windowSizeWidthRaw;
|
endPixel = referenceStartPixelAcross[i] + windowSizeWidthRaw;
|
||||||
if(endPixel >= referenceImageWidth)
|
if(endPixel >= referenceImageWidth)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Reference Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel);
|
fprintf(stderr, "Reference Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
//secondary
|
//secondary
|
||||||
if(secondaryStartPixelDown[i] <0)
|
if(secondaryStartPixelDown[i] <0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Secondary Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, secondaryStartPixelDown[i]);
|
fprintf(stderr, "Secondary Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, secondaryStartPixelDown[i]);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
if(secondaryStartPixelAcross[i] <0)
|
if(secondaryStartPixelAcross[i] <0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Secondary Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, secondaryStartPixelAcross[i]);
|
fprintf(stderr, "Secondary Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, secondaryStartPixelAcross[i]);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
endPixel = secondaryStartPixelDown[i] + searchWindowSizeHeightRaw;
|
endPixel = secondaryStartPixelDown[i] + searchWindowSizeHeightRaw;
|
||||||
if(endPixel >= secondaryImageHeight)
|
if(endPixel >= secondaryImageHeight)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Secondary Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel);
|
fprintf(stderr, "Secondary Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
endPixel = secondaryStartPixelAcross[i] + searchWindowSizeWidthRaw;
|
endPixel = secondaryStartPixelAcross[i] + searchWindowSizeWidthRaw;
|
||||||
if(endPixel >= secondaryImageWidth)
|
if(endPixel >= secondaryImageWidth)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Secondary Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel);
|
fprintf(stderr, "Secondary Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cuAmpcorParameter::~cuAmpcorParameter()
|
cuAmpcorParameter::~cuAmpcorParameter()
|
||||||
{
|
{
|
||||||
deallocateArrays();
|
deallocateArrays();
|
||||||
}
|
}
|
||||||
// end of file
|
// end of file
|
|
@ -44,7 +44,7 @@ public:
|
||||||
|
|
||||||
int halfSearchRangeDownRaw; ///< (searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
|
int halfSearchRangeDownRaw; ///< (searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
|
||||||
int halfSearchRangeAcrossRaw; ///< (searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
|
int halfSearchRangeAcrossRaw; ///< (searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
|
||||||
// search range is (-halfSearchRangeRaw, halfSearchRangeRaw)
|
// search range is (-halfSearchRangeRaw, halfSearchRangeRaw)
|
||||||
|
|
||||||
int searchWindowSizeHeightRawZoomIn; ///< search window height used for zoom in
|
int searchWindowSizeHeightRawZoomIn; ///< search window height used for zoom in
|
||||||
int searchWindowSizeWidthRawZoomIn; ///< search window width used for zoom in
|
int searchWindowSizeWidthRawZoomIn; ///< search window width used for zoom in
|
||||||
|
@ -141,9 +141,9 @@ public:
|
||||||
|
|
||||||
|
|
||||||
// Three methods to set reference/secondary starting pixels and gross offsets from input reference start pixel(s) and gross offset(s)
|
// Three methods to set reference/secondary starting pixels and gross offsets from input reference start pixel(s) and gross offset(s)
|
||||||
// 1 (int *, int *, int *, int *): varying reference start pixels and gross offsets
|
// 1 (int *, int *, int *, int *): varying reference start pixels and gross offsets
|
||||||
// 2 (int, int, int *, int *): fixed reference start pixel (first window) and varying gross offsets
|
// 2 (int, int, int *, int *): fixed reference start pixel (first window) and varying gross offsets
|
||||||
// 3 (int, int, int, int): fixed reference start pixel(first window) and fixed gross offsets
|
// 3 (int, int, int, int): fixed reference start pixel(first window) and fixed gross offsets
|
||||||
void setStartPixels(int*, int*, int*, int*);
|
void setStartPixels(int*, int*, int*, int*);
|
||||||
void setStartPixels(int, int, int*, int*);
|
void setStartPixels(int, int, int*, int*);
|
||||||
void setStartPixels(int, int, int, int);
|
void setStartPixels(int, int, int, int);
|
||||||
|
|
|
@ -20,11 +20,11 @@
|
||||||
//in cuArraysCopy.cu: various utilities for copy images file in gpu memory
|
//in cuArraysCopy.cu: various utilities for copy images file in gpu memory
|
||||||
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2, int strideH, int strideW, cudaStream_t stream);
|
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2, int strideH, int strideW, cudaStream_t stream);
|
||||||
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||||
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
||||||
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||||
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
||||||
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
|
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
|
||||||
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
||||||
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2, int strideH, int strideW, cudaStream_t stream);
|
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2, int strideH, int strideW, cudaStream_t stream);
|
||||||
|
|
||||||
// same routine name overloaded for different data type
|
// same routine name overloaded for different data type
|
||||||
|
@ -94,3 +94,5 @@ void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuAr
|
||||||
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream);
|
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// end of file
|
||||||
|
|
|
@ -58,7 +58,7 @@ public:
|
||||||
size = w*h;
|
size = w*h;
|
||||||
}
|
}
|
||||||
|
|
||||||
// constructor for multiple images with (countH, countW)
|
// constructor for multiple images with (countH, countW)
|
||||||
cuArrays(size_t h, size_t w, size_t ch, size_t cw) : width(w), height(h), countW(cw), countH(ch),
|
cuArrays(size_t h, size_t w, size_t ch, size_t cw) : width(w), height(h), countW(cw), countH(ch),
|
||||||
is_allocated(0), is_allocatedHost(0),
|
is_allocated(0), is_allocatedHost(0),
|
||||||
devData(0), hostData(0)
|
devData(0), hostData(0)
|
||||||
|
@ -77,7 +77,7 @@ public:
|
||||||
void copyToHost(cudaStream_t stream);
|
void copyToHost(cudaStream_t stream);
|
||||||
void copyToDevice(cudaStream_t stream);
|
void copyToDevice(cudaStream_t stream);
|
||||||
|
|
||||||
// get the total size
|
// get the total size
|
||||||
size_t getSize()
|
size_t getSize()
|
||||||
{
|
{
|
||||||
return size*count;
|
return size*count;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -8,24 +8,24 @@
|
||||||
|
|
||||||
// cuda kernel for cuArraysPadding
|
// cuda kernel for cuArraysPadding
|
||||||
__global__ void cuArraysPadding_kernel(
|
__global__ void cuArraysPadding_kernel(
|
||||||
const float2 *image1, const int height1, const int width1,
|
const float2 *image1, const int height1, const int width1,
|
||||||
float2 *image2, const int height2, const int width2)
|
float2 *image2, const int height2, const int width2)
|
||||||
{
|
{
|
||||||
int tx = threadIdx.x + blockDim.x*blockIdx.x;
|
int tx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||||
int ty = threadIdx.y + blockDim.y*blockIdx.y;
|
int ty = threadIdx.y + blockDim.y*blockIdx.y;
|
||||||
if(tx < height1/2 && ty < width1/2)
|
if(tx < height1/2 && ty < width1/2)
|
||||||
{
|
{
|
||||||
int tx1 = height1 - 1 - tx;
|
int tx1 = height1 - 1 - tx;
|
||||||
int ty1 = width1 -1 -ty;
|
int ty1 = width1 -1 -ty;
|
||||||
int tx2 = height2 -1 -tx;
|
int tx2 = height2 -1 -tx;
|
||||||
int ty2 = width2 -1 -ty;
|
int ty2 = width2 -1 -ty;
|
||||||
|
|
||||||
image2[IDX2R(tx, ty, width2)] = image1[IDX2R(tx, ty, width1)];
|
image2[IDX2R(tx, ty, width2)] = image1[IDX2R(tx, ty, width1)];
|
||||||
image2[IDX2R(tx2, ty, width2)] = image1[IDX2R(tx1, ty, width1)];
|
image2[IDX2R(tx2, ty, width2)] = image1[IDX2R(tx1, ty, width1)];
|
||||||
image2[IDX2R(tx, ty2, width2)] = image1[IDX2R(tx, ty1, width1)];
|
image2[IDX2R(tx, ty2, width2)] = image1[IDX2R(tx, ty1, width1)];
|
||||||
image2[IDX2R(tx2, ty2, width2)] = image1[IDX2R(tx1, ty1, width1)];
|
image2[IDX2R(tx2, ty2, width2)] = image1[IDX2R(tx1, ty1, width1)];
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -36,48 +36,48 @@ __global__ void cuArraysPadding_kernel(
|
||||||
*/
|
*/
|
||||||
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int ThreadsPerBlock = NTHREADS2D;
|
int ThreadsPerBlock = NTHREADS2D;
|
||||||
int BlockPerGridx = IDIVUP (image1->height/2, ThreadsPerBlock);
|
int BlockPerGridx = IDIVUP (image1->height/2, ThreadsPerBlock);
|
||||||
int BlockPerGridy = IDIVUP (image1->width/2, ThreadsPerBlock);
|
int BlockPerGridy = IDIVUP (image1->width/2, ThreadsPerBlock);
|
||||||
dim3 dimBlock(ThreadsPerBlock, ThreadsPerBlock);
|
dim3 dimBlock(ThreadsPerBlock, ThreadsPerBlock);
|
||||||
dim3 dimGrid(BlockPerGridx, BlockPerGridy);
|
dim3 dimGrid(BlockPerGridx, BlockPerGridy);
|
||||||
// set output image to 0
|
// set output image to 0
|
||||||
checkCudaErrors(cudaMemsetAsync(image2->devData, 0, image2->getByteSize(),stream));
|
checkCudaErrors(cudaMemsetAsync(image2->devData, 0, image2->getByteSize(),stream));
|
||||||
// copy the quads of input images to four corners of the output images
|
// copy the quads of input images to four corners of the output images
|
||||||
cuArraysPadding_kernel<<<dimGrid, dimBlock, 0, stream>>>(
|
cuArraysPadding_kernel<<<dimGrid, dimBlock, 0, stream>>>(
|
||||||
image1->devData, image1->height, image1->width,
|
image1->devData, image1->height, image1->width,
|
||||||
image2->devData, image2->height, image2->width);
|
image2->devData, image2->height, image2->width);
|
||||||
getLastCudaError("cuArraysPadding_kernel");
|
getLastCudaError("cuArraysPadding_kernel");
|
||||||
}
|
}
|
||||||
|
|
||||||
inline __device__ float2 cmplxMul(float2 c, float a)
|
inline __device__ float2 cmplxMul(float2 c, float a)
|
||||||
{
|
{
|
||||||
return make_float2(c.x*a, c.y*a);
|
return make_float2(c.x*a, c.y*a);
|
||||||
}
|
}
|
||||||
|
|
||||||
// cuda kernel for
|
// cuda kernel for
|
||||||
__global__ void cuArraysPaddingMany_kernel(
|
__global__ void cuArraysPaddingMany_kernel(
|
||||||
const float2 *image1, const int height1, const int width1, const int size1,
|
const float2 *image1, const int height1, const int width1, const int size1,
|
||||||
float2 *image2, const int height2, const int width2, const int size2, const float factor )
|
float2 *image2, const int height2, const int width2, const int size2, const float factor )
|
||||||
{
|
{
|
||||||
int tx = threadIdx.x + blockDim.x*blockIdx.x;
|
int tx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||||
int ty = threadIdx.y + blockDim.y*blockIdx.y;
|
int ty = threadIdx.y + blockDim.y*blockIdx.y;
|
||||||
if(tx < height1/2 && ty < width1/2)
|
if(tx < height1/2 && ty < width1/2)
|
||||||
{
|
{
|
||||||
|
|
||||||
int tx1 = height1 - 1 - tx;
|
int tx1 = height1 - 1 - tx;
|
||||||
int ty1 = width1 -1 -ty;
|
int ty1 = width1 -1 -ty;
|
||||||
int tx2 = height2 -1 -tx;
|
int tx2 = height2 -1 -tx;
|
||||||
int ty2 = width2 -1 -ty;
|
int ty2 = width2 -1 -ty;
|
||||||
|
|
||||||
int stride1 = blockIdx.z*size1;
|
int stride1 = blockIdx.z*size1;
|
||||||
int stride2 = blockIdx.z*size2;
|
int stride2 = blockIdx.z*size2;
|
||||||
|
|
||||||
image2[IDX2R(tx, ty, width2)+stride2] = image1[IDX2R(tx, ty, width1)+stride1]*factor;
|
image2[IDX2R(tx, ty, width2)+stride2] = image1[IDX2R(tx, ty, width1)+stride1]*factor;
|
||||||
image2[IDX2R(tx2, ty, width2)+stride2] = cmplxMul(image1[IDX2R(tx1, ty, width1)+stride1], factor);
|
image2[IDX2R(tx2, ty, width2)+stride2] = cmplxMul(image1[IDX2R(tx1, ty, width1)+stride1], factor);
|
||||||
image2[IDX2R(tx, ty2, width2)+stride2] = cmplxMul(image1[IDX2R(tx, ty1, width1)+stride1], factor);
|
image2[IDX2R(tx, ty2, width2)+stride2] = cmplxMul(image1[IDX2R(tx, ty1, width1)+stride1], factor);
|
||||||
image2[IDX2R(tx2, ty2, width2)+stride2] = cmplxMul(image1[IDX2R(tx1, ty1, width1)+stride1], factor);
|
image2[IDX2R(tx2, ty2, width2)+stride2] = cmplxMul(image1[IDX2R(tx1, ty1, width1)+stride1], factor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -88,18 +88,18 @@ __global__ void cuArraysPaddingMany_kernel(
|
||||||
*/
|
*/
|
||||||
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int ThreadsPerBlock = NTHREADS2D;
|
int ThreadsPerBlock = NTHREADS2D;
|
||||||
int BlockPerGridx = IDIVUP (image1->height/2, ThreadsPerBlock);
|
int BlockPerGridx = IDIVUP (image1->height/2, ThreadsPerBlock);
|
||||||
int BlockPerGridy = IDIVUP (image1->width/2, ThreadsPerBlock);
|
int BlockPerGridy = IDIVUP (image1->width/2, ThreadsPerBlock);
|
||||||
dim3 dimBlock(ThreadsPerBlock, ThreadsPerBlock, 1);
|
dim3 dimBlock(ThreadsPerBlock, ThreadsPerBlock, 1);
|
||||||
dim3 dimGrid(BlockPerGridx, BlockPerGridy, image1->count);
|
dim3 dimGrid(BlockPerGridx, BlockPerGridy, image1->count);
|
||||||
|
|
||||||
checkCudaErrors(cudaMemsetAsync(image2->devData, 0, image2->getByteSize(),stream));
|
checkCudaErrors(cudaMemsetAsync(image2->devData, 0, image2->getByteSize(),stream));
|
||||||
float factor = 1.0f/image1->size;
|
float factor = 1.0f/image1->size;
|
||||||
cuArraysPaddingMany_kernel<<<dimGrid, dimBlock, 0, stream>>>(
|
cuArraysPaddingMany_kernel<<<dimGrid, dimBlock, 0, stream>>>(
|
||||||
image1->devData, image1->height, image1->width, image1->size,
|
image1->devData, image1->height, image1->width, image1->size,
|
||||||
image2->devData, image2->height, image2->width, image2->size, factor);
|
image2->devData, image2->height, image2->width, image2->size, factor);
|
||||||
getLastCudaError("cuArraysPadding_kernel");
|
getLastCudaError("cuArraysPadding_kernel");
|
||||||
}
|
}
|
||||||
//end of file
|
//end of file
|
||||||
|
|
||||||
|
|
|
@ -15,22 +15,22 @@ class cuFreqCorrelator
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
// handles for forward/backward fft
|
// handles for forward/backward fft
|
||||||
cufftHandle forwardPlan;
|
cufftHandle forwardPlan;
|
||||||
cufftHandle backwardPlan;
|
cufftHandle backwardPlan;
|
||||||
// work data
|
// work data
|
||||||
cuArrays<float2> *workFM;
|
cuArrays<float2> *workFM;
|
||||||
cuArrays<float2> *workFS;
|
cuArrays<float2> *workFS;
|
||||||
cuArrays<float> *workT;
|
cuArrays<float> *workT;
|
||||||
// cuda stream
|
// cuda stream
|
||||||
cudaStream_t stream;
|
cudaStream_t stream;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// constructor
|
// constructor
|
||||||
cuFreqCorrelator(int imageNX, int imageNY, int nImages, cudaStream_t stream_);
|
cuFreqCorrelator(int imageNX, int imageNY, int nImages, cudaStream_t stream_);
|
||||||
// destructor
|
// destructor
|
||||||
~cuFreqCorrelator();
|
~cuFreqCorrelator();
|
||||||
// executor
|
// executor
|
||||||
void execute(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results);
|
void execute(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //__CUCORRFREQUENCY_H
|
#endif //__CUCORRFREQUENCY_H
|
||||||
|
|
|
@ -86,12 +86,12 @@ __global__ void cuArraysMean_kernel(float *images, float *image_sum, int imageSi
|
||||||
*/
|
*/
|
||||||
void cuArraysMeanValue(cuArrays<float> *images, cuArrays<float> *mean, cudaStream_t stream)
|
void cuArraysMeanValue(cuArrays<float> *images, cuArrays<float> *mean, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 grid(images->count, 1, 1);
|
const dim3 grid(images->count, 1, 1);
|
||||||
const int imageSize = images->width*images->height;
|
const int imageSize = images->width*images->height;
|
||||||
const float invSize = 1.0f/imageSize;
|
const float invSize = 1.0f/imageSize;
|
||||||
|
|
||||||
cuArraysMean_kernel<NTHREADS> <<<grid,NTHREADS,0,stream>>>(images->devData, mean->devData, imageSize, invSize, images->count);
|
cuArraysMean_kernel<NTHREADS> <<<grid,NTHREADS,0,stream>>>(images->devData, mean->devData, imageSize, invSize, images->count);
|
||||||
getLastCudaError("cuArraysMeanValue kernel error\n");
|
getLastCudaError("cuArraysMeanValue kernel error\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// cuda kernel to compute and subtracts mean value from the images
|
// cuda kernel to compute and subtracts mean value from the images
|
||||||
|
@ -130,12 +130,12 @@ __global__ void cuArraysSubtractMean_kernel(float *images, int imageSize, float
|
||||||
*/
|
*/
|
||||||
void cuArraysSubtractMean(cuArrays<float> *images, cudaStream_t stream)
|
void cuArraysSubtractMean(cuArrays<float> *images, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 grid(images->count, 1, 1);
|
const dim3 grid(images->count, 1, 1);
|
||||||
const int imageSize = images->width*images->height;
|
const int imageSize = images->width*images->height;
|
||||||
const float invSize = 1.0f/imageSize;
|
const float invSize = 1.0f/imageSize;
|
||||||
|
|
||||||
cuArraysSubtractMean_kernel<NTHREADS> <<<grid,NTHREADS,0,stream>>>(images->devData, imageSize, invSize, images->count);
|
cuArraysSubtractMean_kernel<NTHREADS> <<<grid,NTHREADS,0,stream>>>(images->devData, imageSize, invSize, images->count);
|
||||||
getLastCudaError("cuArraysSubtractMean kernel error\n");
|
getLastCudaError("cuArraysSubtractMean kernel error\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -229,7 +229,7 @@ __device__ float2 partialSums(const float v, volatile float* shmem, const int st
|
||||||
// cuda kernel for cuCorrNormalize
|
// cuda kernel for cuCorrNormalize
|
||||||
template<const int Nthreads2>
|
template<const int Nthreads2>
|
||||||
__global__ void cuCorrNormalize_kernel(
|
__global__ void cuCorrNormalize_kernel(
|
||||||
int nImages,
|
int nImages,
|
||||||
const float *templateIn, int templateNX, int templateNY, int templateSize,
|
const float *templateIn, int templateNX, int templateNY, int templateSize,
|
||||||
const float *imageIn, int imageNX, int imageNY, int imageSize,
|
const float *imageIn, int imageNX, int imageNY, int imageSize,
|
||||||
float *resultOut, int resultNX, int resultNY, int resultSize,
|
float *resultOut, int resultNX, int resultNY, int resultSize,
|
||||||
|
@ -325,50 +325,50 @@ __global__ void cuCorrNormalize_kernel(
|
||||||
*/
|
*/
|
||||||
void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream)
|
void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nImages = images->count;
|
const int nImages = images->count;
|
||||||
const int imageNY = images->width;
|
const int imageNY = images->width;
|
||||||
const dim3 grid(1, 1, nImages);
|
const dim3 grid(1, 1, nImages);
|
||||||
const float invTemplateSize = 1.0f/templates->size;
|
const float invTemplateSize = 1.0f/templates->size;
|
||||||
|
|
||||||
if (imageNY <= 64) {
|
if (imageNY <= 64) {
|
||||||
cuCorrNormalize_kernel< 6><<<grid, 64, 0, stream>>>(nImages,
|
cuCorrNormalize_kernel< 6><<<grid, 64, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size,
|
results->devData, results->height, results->width, results->size,
|
||||||
invTemplateSize);
|
invTemplateSize);
|
||||||
getLastCudaError("cuCorrNormalize kernel error");
|
getLastCudaError("cuCorrNormalize kernel error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 128) {
|
else if (imageNY <= 128) {
|
||||||
cuCorrNormalize_kernel< 7><<<grid, 128, 0, stream>>>(nImages,
|
cuCorrNormalize_kernel< 7><<<grid, 128, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size,
|
results->devData, results->height, results->width, results->size,
|
||||||
invTemplateSize);
|
invTemplateSize);
|
||||||
getLastCudaError("cuCorrNormalize kernel error");
|
getLastCudaError("cuCorrNormalize kernel error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 256) {
|
else if (imageNY <= 256) {
|
||||||
cuCorrNormalize_kernel< 8><<<grid, 256, 0, stream>>>(nImages,
|
cuCorrNormalize_kernel< 8><<<grid, 256, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size,
|
results->devData, results->height, results->width, results->size,
|
||||||
invTemplateSize);
|
invTemplateSize);
|
||||||
getLastCudaError("cuCorrNormalize kernel error");
|
getLastCudaError("cuCorrNormalize kernel error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 512) {
|
else if (imageNY <= 512) {
|
||||||
cuCorrNormalize_kernel< 9><<<grid, 512, 0, stream>>>(nImages,
|
cuCorrNormalize_kernel< 9><<<grid, 512, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size,
|
results->devData, results->height, results->width, results->size,
|
||||||
invTemplateSize);
|
invTemplateSize);
|
||||||
getLastCudaError("cuCorrNormalize kernel error");
|
getLastCudaError("cuCorrNormalize kernel error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 1024) {
|
else if (imageNY <= 1024) {
|
||||||
cuCorrNormalize_kernel<10><<<grid,1024, 0, stream>>>(nImages,
|
cuCorrNormalize_kernel<10><<<grid,1024, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size,
|
results->devData, results->height, results->width, results->size,
|
||||||
invTemplateSize);
|
invTemplateSize);
|
||||||
getLastCudaError("cuCorrNormalize kernel error");
|
getLastCudaError("cuCorrNormalize kernel error");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -11,9 +11,9 @@
|
||||||
// cuda kernel for cuCorrTimeDomain
|
// cuda kernel for cuCorrTimeDomain
|
||||||
template<const int nthreads, const int NPT>
|
template<const int nthreads, const int NPT>
|
||||||
__global__ void cuArraysCorrTime_kernel(
|
__global__ void cuArraysCorrTime_kernel(
|
||||||
const int nImages,
|
const int nImages,
|
||||||
const float *templateIn, const int templateNX, const int templateNY, const int templateSize,
|
const float *templateIn, const int templateNX, const int templateNY, const int templateSize,
|
||||||
const float *imageIn, const int imageNX, const int imageNY, const int imageSize,
|
const float *imageIn, const int imageNX, const int imageNY, const int imageSize,
|
||||||
float *resultOut, const int resultNX, const int resultNY, const int resultSize)
|
float *resultOut, const int resultNX, const int resultNY, const int resultSize)
|
||||||
{
|
{
|
||||||
__shared__ float shmem[nthreads*(1+NPT)];
|
__shared__ float shmem[nthreads*(1+NPT)];
|
||||||
|
@ -99,9 +99,9 @@ __global__ void cuArraysCorrTime_kernel(
|
||||||
* @param[in] stream cudaStream
|
* @param[in] stream cudaStream
|
||||||
*/
|
*/
|
||||||
void cuCorrTimeDomain(cuArrays<float> *templates,
|
void cuCorrTimeDomain(cuArrays<float> *templates,
|
||||||
cuArrays<float> *images,
|
cuArrays<float> *images,
|
||||||
cuArrays<float> *results,
|
cuArrays<float> *results,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
/* compute correlation matrix */
|
/* compute correlation matrix */
|
||||||
const int nImages = images->count;
|
const int nImages = images->count;
|
||||||
|
@ -112,73 +112,73 @@ void cuCorrTimeDomain(cuArrays<float> *templates,
|
||||||
const dim3 grid(nImages, (results->width-1)/NPT+1, 1);
|
const dim3 grid(nImages, (results->width-1)/NPT+1, 1);
|
||||||
if (imageNY <= 64) {
|
if (imageNY <= 64) {
|
||||||
cuArraysCorrTime_kernel< 64,NPT><<<grid, 64, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel< 64,NPT><<<grid, 64, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 128) {
|
else if (imageNY <= 128) {
|
||||||
cuArraysCorrTime_kernel< 128,NPT><<<grid, 128, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel< 128,NPT><<<grid, 128, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 192) {
|
else if (imageNY <= 192) {
|
||||||
cuArraysCorrTime_kernel< 192,NPT><<<grid, 192, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel< 192,NPT><<<grid, 192, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 256) {
|
else if (imageNY <= 256) {
|
||||||
cuArraysCorrTime_kernel< 256,NPT><<<grid, 256, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel< 256,NPT><<<grid, 256, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 384) {
|
else if (imageNY <= 384) {
|
||||||
cuArraysCorrTime_kernel< 384,NPT><<<grid, 384, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel< 384,NPT><<<grid, 384, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 512) {
|
else if (imageNY <= 512) {
|
||||||
cuArraysCorrTime_kernel< 512,NPT><<<grid, 512, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel< 512,NPT><<<grid, 512, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 640) {
|
else if (imageNY <= 640) {
|
||||||
cuArraysCorrTime_kernel< 640,NPT><<<grid, 640, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel< 640,NPT><<<grid, 640, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 768) {
|
else if (imageNY <= 768) {
|
||||||
cuArraysCorrTime_kernel< 768,NPT><<<grid, 768, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel< 768,NPT><<<grid, 768, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 896) {
|
else if (imageNY <= 896) {
|
||||||
cuArraysCorrTime_kernel< 896,NPT><<<grid, 896, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel< 896,NPT><<<grid, 896, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
}
|
}
|
||||||
else if (imageNY <= 1024) {
|
else if (imageNY <= 1024) {
|
||||||
cuArraysCorrTime_kernel<1024,NPT><<<grid,1024, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel<1024,NPT><<<grid,1024, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
fprintf(stderr, "The (oversampled) window size along the across direction %d should be smaller than 1024.\n", imageNY);
|
fprintf(stderr, "The (oversampled) window size along the across direction %d should be smaller than 1024.\n", imageNY);
|
||||||
|
|
|
@ -11,10 +11,10 @@
|
||||||
inline static __device__ void maxPairReduce(volatile float* maxval, volatile int* maxloc,
|
inline static __device__ void maxPairReduce(volatile float* maxval, volatile int* maxloc,
|
||||||
size_t gid, size_t strideid)
|
size_t gid, size_t strideid)
|
||||||
{
|
{
|
||||||
if(maxval[gid] < maxval[strideid]) {
|
if(maxval[gid] < maxval[strideid]) {
|
||||||
maxval[gid] = maxval[strideid];
|
maxval[gid] = maxval[strideid];
|
||||||
maxloc[gid] = maxloc[strideid];
|
maxloc[gid] = maxloc[strideid];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// max reduction kernel
|
// max reduction kernel
|
||||||
|
@ -25,21 +25,21 @@ __device__ void max_reduction(const float* const images,
|
||||||
volatile float* shval,
|
volatile float* shval,
|
||||||
volatile int* shloc)
|
volatile int* shloc)
|
||||||
{
|
{
|
||||||
int tid = threadIdx.x;
|
int tid = threadIdx.x;
|
||||||
shval[tid] = -FLT_MAX;
|
shval[tid] = -FLT_MAX;
|
||||||
int imageStart = blockIdx.x*imageSize;
|
int imageStart = blockIdx.x*imageSize;
|
||||||
int imagePixel;
|
int imagePixel;
|
||||||
|
|
||||||
// reduction for intra-block elements
|
// reduction for intra-block elements
|
||||||
// i.e., for elements with i, i+BLOCKSIZE, i+2*BLOCKSIZE ...
|
// i.e., for elements with i, i+BLOCKSIZE, i+2*BLOCKSIZE ...
|
||||||
for(int gid = tid; gid < imageSize; gid+=blockDim.x)
|
for(int gid = tid; gid < imageSize; gid+=blockDim.x)
|
||||||
{
|
{
|
||||||
imagePixel = imageStart+gid;
|
imagePixel = imageStart+gid;
|
||||||
if(shval[tid] < images[imagePixel]) {
|
if(shval[tid] < images[imagePixel]) {
|
||||||
shval[tid] = images[imagePixel];
|
shval[tid] = images[imagePixel];
|
||||||
shloc[tid] = gid;
|
shloc[tid] = gid;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
// reduction within a block
|
// reduction within a block
|
||||||
|
@ -50,12 +50,12 @@ __device__ void max_reduction(const float* const images,
|
||||||
// reduction within a warp
|
// reduction within a warp
|
||||||
if (tid < 32)
|
if (tid < 32)
|
||||||
{
|
{
|
||||||
maxPairReduce(shval, shloc, tid, tid + 32);
|
maxPairReduce(shval, shloc, tid, tid + 32);
|
||||||
maxPairReduce(shval, shloc, tid, tid + 16);
|
maxPairReduce(shval, shloc, tid, tid + 16);
|
||||||
maxPairReduce(shval, shloc, tid, tid + 8);
|
maxPairReduce(shval, shloc, tid, tid + 8);
|
||||||
maxPairReduce(shval, shloc, tid, tid + 4);
|
maxPairReduce(shval, shloc, tid, tid + 4);
|
||||||
maxPairReduce(shval, shloc, tid, tid + 2);
|
maxPairReduce(shval, shloc, tid, tid + 2);
|
||||||
maxPairReduce(shval, shloc, tid, tid + 1);
|
maxPairReduce(shval, shloc, tid, tid + 1);
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
@ -226,16 +226,16 @@ __global__ void cudaKernel_determineSecondaryExtractOffset(int2 * maxLoc, int2 *
|
||||||
const size_t nImages, int xOldRange, int yOldRange, int xNewRange, int yNewRange)
|
const size_t nImages, int xOldRange, int yOldRange, int xNewRange, int yNewRange)
|
||||||
{
|
{
|
||||||
int imageIndex = threadIdx.x + blockDim.x *blockIdx.x; //image index
|
int imageIndex = threadIdx.x + blockDim.x *blockIdx.x; //image index
|
||||||
if (imageIndex < nImages)
|
if (imageIndex < nImages)
|
||||||
{
|
{
|
||||||
// get the starting pixel (stored back to maxloc) and shift
|
// get the starting pixel (stored back to maxloc) and shift
|
||||||
int2 result = dev_adjustOffset(xOldRange, xNewRange, maxLoc[imageIndex].x);
|
int2 result = dev_adjustOffset(xOldRange, xNewRange, maxLoc[imageIndex].x);
|
||||||
maxLoc[imageIndex].x = result.x;
|
maxLoc[imageIndex].x = result.x;
|
||||||
shift[imageIndex].x = result.y;
|
shift[imageIndex].x = result.y;
|
||||||
result = dev_adjustOffset(yOldRange, yNewRange, maxLoc[imageIndex].y);
|
result = dev_adjustOffset(yOldRange, yNewRange, maxLoc[imageIndex].y);
|
||||||
maxLoc[imageIndex].y = result.x;
|
maxLoc[imageIndex].y = result.x;
|
||||||
shift[imageIndex].y = result.y;
|
shift[imageIndex].y = result.y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -250,10 +250,10 @@ __global__ void cudaKernel_determineSecondaryExtractOffset(int2 * maxLoc, int2 *
|
||||||
void cuDetermineSecondaryExtractOffset(cuArrays<int2> *maxLoc, cuArrays<int2> *maxLocShift,
|
void cuDetermineSecondaryExtractOffset(cuArrays<int2> *maxLoc, cuArrays<int2> *maxLocShift,
|
||||||
int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream)
|
int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int threadsperblock=NTHREADS;
|
int threadsperblock=NTHREADS;
|
||||||
int blockspergrid=IDIVUP(maxLoc->size, threadsperblock);
|
int blockspergrid=IDIVUP(maxLoc->size, threadsperblock);
|
||||||
cudaKernel_determineSecondaryExtractOffset<<<blockspergrid, threadsperblock, 0, stream>>>
|
cudaKernel_determineSecondaryExtractOffset<<<blockspergrid, threadsperblock, 0, stream>>>
|
||||||
(maxLoc->devData, maxLocShift->devData, maxLoc->size, xOldRange, yOldRange, xNewRange, yNewRange);
|
(maxLoc->devData, maxLocShift->devData, maxLoc->size, xOldRange, yOldRange, xNewRange, yNewRange);
|
||||||
}
|
}
|
||||||
|
|
||||||
// end of file
|
// end of file
|
||||||
|
|
|
@ -18,40 +18,40 @@
|
||||||
class cuOverSamplerC2C
|
class cuOverSamplerC2C
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
cufftHandle forwardPlan; // forward fft handle
|
cufftHandle forwardPlan; // forward fft handle
|
||||||
cufftHandle backwardPlan; // backward fft handle
|
cufftHandle backwardPlan; // backward fft handle
|
||||||
cudaStream_t stream; // cuda stream
|
cudaStream_t stream; // cuda stream
|
||||||
cuArrays<float2> *workIn; // work array to hold forward fft data
|
cuArrays<float2> *workIn; // work array to hold forward fft data
|
||||||
cuArrays<float2> *workOut; // work array to hold padded data
|
cuArrays<float2> *workOut; // work array to hold padded data
|
||||||
public:
|
public:
|
||||||
// disable the default constructor
|
// disable the default constructor
|
||||||
cuOverSamplerC2C() = delete;
|
cuOverSamplerC2C() = delete;
|
||||||
// constructor
|
// constructor
|
||||||
cuOverSamplerC2C(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_);
|
cuOverSamplerC2C(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_);
|
||||||
// set cuda stream
|
// set cuda stream
|
||||||
void setStream(cudaStream_t stream_);
|
void setStream(cudaStream_t stream_);
|
||||||
// execute oversampling
|
// execute oversampling
|
||||||
void execute(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int deramp_method=0);
|
void execute(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int deramp_method=0);
|
||||||
// destructor
|
// destructor
|
||||||
~cuOverSamplerC2C();
|
~cuOverSamplerC2C();
|
||||||
};
|
};
|
||||||
|
|
||||||
// FFT Oversampler for complex images
|
// FFT Oversampler for complex images
|
||||||
class cuOverSamplerR2R
|
class cuOverSamplerR2R
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
cufftHandle forwardPlan;
|
cufftHandle forwardPlan;
|
||||||
cufftHandle backwardPlan;
|
cufftHandle backwardPlan;
|
||||||
cudaStream_t stream;
|
cudaStream_t stream;
|
||||||
cuArrays<float2> *workSizeIn;
|
cuArrays<float2> *workSizeIn;
|
||||||
cuArrays<float2> *workSizeOut;
|
cuArrays<float2> *workSizeOut;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
cuOverSamplerR2R() = delete;
|
cuOverSamplerR2R() = delete;
|
||||||
cuOverSamplerR2R(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_);
|
cuOverSamplerR2R(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_);
|
||||||
void setStream(cudaStream_t stream_);
|
void setStream(cudaStream_t stream_);
|
||||||
void execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut);
|
void execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut);
|
||||||
~cuOverSamplerR2R();
|
~cuOverSamplerR2R();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -195,6 +195,3 @@ void cuSincOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *i
|
||||||
}
|
}
|
||||||
|
|
||||||
// end of file
|
// end of file
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -61,6 +61,3 @@ class cuSincOverSamplerR2R
|
||||||
|
|
||||||
#endif // _CUSINCOVERSAMPLER_H
|
#endif // _CUSINCOVERSAMPLER_H
|
||||||
// end of file
|
// end of file
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -76,7 +76,7 @@ inline int gpuDeviceInit(int devID)
|
||||||
|
|
||||||
if (devID < 0 || devID > device_count-1)
|
if (devID < 0 || devID > device_count-1)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "gpuDeviceInit() Device %d is not a valid GPU device. \n", devID);
|
fprintf(stderr, "gpuDeviceInit() Device %d is not a valid GPU device. \n", devID);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ inline __host__ __device__ float2 operator-(float2 &a)
|
||||||
// complex conjugate
|
// complex conjugate
|
||||||
inline __host__ __device__ float2 conjugate(float2 a)
|
inline __host__ __device__ float2 conjugate(float2 a)
|
||||||
{
|
{
|
||||||
return make_float2(a.x, -a.y);
|
return make_float2(a.x, -a.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
// addition
|
// addition
|
||||||
|
@ -92,11 +92,11 @@ inline __host__ __device__ void operator*=(float2 &a, int b)
|
||||||
}
|
}
|
||||||
inline __host__ __device__ float2 complexMul(float2 a, float2 b)
|
inline __host__ __device__ float2 complexMul(float2 a, float2 b)
|
||||||
{
|
{
|
||||||
return a*b;
|
return a*b;
|
||||||
}
|
}
|
||||||
inline __host__ __device__ float2 complexMulConj(float2 a, float2 b)
|
inline __host__ __device__ float2 complexMulConj(float2 a, float2 b)
|
||||||
{
|
{
|
||||||
return make_float2(a.x*b.x + a.y*b.y, a.y*b.x - a.x*b.y);
|
return make_float2(a.x*b.x + a.y*b.y, a.y*b.x - a.x*b.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline __host__ __device__ float2 operator/(float2 a, float b)
|
inline __host__ __device__ float2 operator/(float2 a, float b)
|
||||||
|
@ -112,17 +112,17 @@ inline __host__ __device__ void operator/=(float2 &a, float b)
|
||||||
// abs, arg
|
// abs, arg
|
||||||
inline __host__ __device__ float complexAbs(float2 a)
|
inline __host__ __device__ float complexAbs(float2 a)
|
||||||
{
|
{
|
||||||
return sqrtf(a.x*a.x+a.y*a.y);
|
return sqrtf(a.x*a.x+a.y*a.y);
|
||||||
}
|
}
|
||||||
inline __host__ __device__ float complexArg(float2 a)
|
inline __host__ __device__ float complexArg(float2 a)
|
||||||
{
|
{
|
||||||
return atan2f(a.y, a.x);
|
return atan2f(a.y, a.x);
|
||||||
}
|
}
|
||||||
|
|
||||||
// make a complex number from phase
|
// make a complex number from phase
|
||||||
inline __host__ __device__ float2 complexExp(float arg)
|
inline __host__ __device__ float2 complexExp(float arg)
|
||||||
{
|
{
|
||||||
return make_float2(cosf(arg), sinf(arg));
|
return make_float2(cosf(arg), sinf(arg));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //__FLOAT2_H
|
#endif //__FLOAT2_H
|
||||||
|
|
Loading…
Reference in New Issue