2020-11-18 07:22:37 +00:00
|
|
|
/*
|
|
|
|
* @file cuCorrTimetime.cu
|
|
|
|
* @brief Correlation between two sets of images in time domain
|
|
|
|
*
|
|
|
|
* This code is adapted from the nxcor package.
|
2019-01-16 19:40:08 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "cuAmpcorUtil.h"
|
|
|
|
|
2020-11-18 07:22:37 +00:00
|
|
|
|
|
|
|
// cuda kernel for cuCorrTimeDomain
|
2019-01-16 19:40:08 +00:00
|
|
|
template<const int nthreads, const int NPT>
|
|
|
|
__global__ void cuArraysCorrTime_kernel(
|
2020-11-18 07:22:37 +00:00
|
|
|
const int nImages,
|
|
|
|
const float *templateIn, const int templateNX, const int templateNY, const int templateSize,
|
|
|
|
const float *imageIn, const int imageNX, const int imageNY, const int imageSize,
|
|
|
|
float *resultOut, const int resultNX, const int resultNY, const int resultSize)
|
2019-01-16 19:40:08 +00:00
|
|
|
{
|
|
|
|
__shared__ float shmem[nthreads*(1+NPT)];
|
|
|
|
const int tid = threadIdx.x;
|
|
|
|
const int bid = blockIdx.x;
|
|
|
|
const int yc = blockIdx.y*NPT;
|
2020-11-18 07:22:37 +00:00
|
|
|
|
2019-01-16 19:40:08 +00:00
|
|
|
const int imageIdx = bid;
|
|
|
|
const int imageOffset = imageIdx * imageSize;
|
|
|
|
const int templateOffset = imageIdx * templateSize;
|
|
|
|
const int resultOffset = imageIdx * resultSize;
|
2020-11-18 07:22:37 +00:00
|
|
|
|
2019-01-16 19:40:08 +00:00
|
|
|
const float * imageD = imageIn + imageOffset + tid;
|
|
|
|
const float *templateD = templateIn + templateOffset + tid;
|
|
|
|
float * resultD = resultOut + resultOffset;
|
2020-11-18 07:22:37 +00:00
|
|
|
|
|
|
|
const int q = min(nthreads/resultNY, 4);
|
2019-01-16 19:40:08 +00:00
|
|
|
const int nt = nthreads/q;
|
|
|
|
const int ty = threadIdx.x / nt;
|
|
|
|
const int tx = threadIdx.x - nt * ty;
|
2020-11-18 07:22:37 +00:00
|
|
|
|
|
|
|
const int templateNYq = templateNY/q;
|
|
|
|
const int jbeg = templateNYq * ty;
|
|
|
|
const int jend = ty+1 >= q ? templateNY : templateNYq + jbeg;
|
|
|
|
|
2019-01-16 19:40:08 +00:00
|
|
|
float *shTemplate = shmem;
|
|
|
|
float *shImage = shmem + nthreads;
|
|
|
|
float *shImage1 = shImage + tx;
|
2020-11-18 07:22:37 +00:00
|
|
|
|
2019-01-16 19:40:08 +00:00
|
|
|
float corrCoeff[NPT];
|
|
|
|
for (int k = 0; k < NPT; k++)
|
|
|
|
corrCoeff[k] = 0.0f;
|
2020-11-18 07:22:37 +00:00
|
|
|
|
|
|
|
int iaddr = yc*imageNY;
|
|
|
|
|
2019-01-16 19:40:08 +00:00
|
|
|
|
|
|
|
float img[NPT];
|
2020-11-18 07:22:37 +00:00
|
|
|
for (int k = 0; k < NPT-1; k++, iaddr += imageNY)
|
|
|
|
img[k] = imageD[iaddr];
|
|
|
|
for (int taddr = 0; taddr < templateSize; taddr += templateNY, iaddr += imageNY)
|
2019-01-16 19:40:08 +00:00
|
|
|
{
|
|
|
|
shTemplate[tid] = templateD[taddr];
|
|
|
|
img [NPT-1] = imageD[iaddr];
|
|
|
|
for (int k = 0; k < NPT; k++)
|
|
|
|
shImage[tid + nthreads*k] = img[k];
|
|
|
|
for (int k = 0; k < NPT-1; k++)
|
|
|
|
img[k] = img[k+1];
|
|
|
|
__syncthreads();
|
2020-11-18 07:22:37 +00:00
|
|
|
|
|
|
|
if (tx < resultNY && ty < q)
|
2019-01-16 19:40:08 +00:00
|
|
|
{
|
2020-11-18 07:22:37 +00:00
|
|
|
#pragma unroll 8
|
2019-01-16 19:40:08 +00:00
|
|
|
for (int j = jbeg; j < jend; j++)
|
|
|
|
for (int k = 0; k < NPT; k++)
|
|
|
|
corrCoeff[k] += shTemplate[j]*shImage1[j + nthreads*k];
|
|
|
|
}
|
|
|
|
__syncthreads();
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int k = 0; k < NPT; k++)
|
|
|
|
shmem[tid + nthreads*k] = corrCoeff[k];
|
|
|
|
__syncthreads();
|
2020-11-18 07:22:37 +00:00
|
|
|
|
2019-01-16 19:40:08 +00:00
|
|
|
for (int j = tx + nt; j < nthreads; j += nt)
|
|
|
|
for (int k = 0; k < NPT; k++)
|
|
|
|
corrCoeff[k] += shmem[j + nthreads*k];
|
|
|
|
__syncthreads();
|
2020-11-18 07:22:37 +00:00
|
|
|
|
|
|
|
if (tid < resultNY)
|
2019-01-16 19:40:08 +00:00
|
|
|
{
|
2020-11-18 07:22:37 +00:00
|
|
|
int raddr = yc*resultNY + tid;
|
|
|
|
for (int k = 0; k < NPT; k++, raddr += resultNY)
|
2019-01-16 19:40:08 +00:00
|
|
|
if (raddr < resultSize)
|
|
|
|
resultD[raddr] = corrCoeff[k];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-18 07:22:37 +00:00
|
|
|
/**
|
|
|
|
* Perform cross correlation in time domain
|
|
|
|
* @param[in] templates Reference images
|
|
|
|
* @param[in] images Secondary images
|
|
|
|
* @param[out] results Output correlation surface
|
|
|
|
* @param[in] stream cudaStream
|
|
|
|
*/
|
2019-01-16 19:40:08 +00:00
|
|
|
void cuCorrTimeDomain(cuArrays<float> *templates,
|
|
|
|
cuArrays<float> *images,
|
|
|
|
cuArrays<float> *results,
|
|
|
|
cudaStream_t stream)
|
|
|
|
{
|
|
|
|
/* compute correlation matrix */
|
|
|
|
const int nImages = images->count;
|
2020-11-18 07:22:37 +00:00
|
|
|
const int imageNY = images->width;
|
2019-01-16 19:40:08 +00:00
|
|
|
const int NPT = 8;
|
2020-11-18 07:22:37 +00:00
|
|
|
|
|
|
|
|
2019-01-16 19:40:08 +00:00
|
|
|
const dim3 grid(nImages, (results->width-1)/NPT+1, 1);
|
2020-11-18 07:22:37 +00:00
|
|
|
if (imageNY <= 64) {
|
|
|
|
cuArraysCorrTime_kernel< 64,NPT><<<grid, 64, 0, stream>>>(nImages,
|
|
|
|
templates->devData, templates->height, templates->width, templates->size,
|
|
|
|
images->devData, images->height, images->width, images->size,
|
|
|
|
results->devData, results->height, results->width, results->size);
|
|
|
|
getLastCudaError("cuArraysCorrTime error");
|
|
|
|
}
|
|
|
|
else if (imageNY <= 128) {
|
|
|
|
cuArraysCorrTime_kernel< 128,NPT><<<grid, 128, 0, stream>>>(nImages,
|
|
|
|
templates->devData, templates->height, templates->width, templates->size,
|
|
|
|
images->devData, images->height, images->width, images->size,
|
|
|
|
results->devData, results->height, results->width, results->size);
|
|
|
|
getLastCudaError("cuArraysCorrTime error");
|
|
|
|
}
|
|
|
|
else if (imageNY <= 192) {
|
|
|
|
cuArraysCorrTime_kernel< 192,NPT><<<grid, 192, 0, stream>>>(nImages,
|
|
|
|
templates->devData, templates->height, templates->width, templates->size,
|
|
|
|
images->devData, images->height, images->width, images->size,
|
|
|
|
results->devData, results->height, results->width, results->size);
|
|
|
|
getLastCudaError("cuArraysCorrTime error");
|
|
|
|
}
|
|
|
|
else if (imageNY <= 256) {
|
|
|
|
cuArraysCorrTime_kernel< 256,NPT><<<grid, 256, 0, stream>>>(nImages,
|
|
|
|
templates->devData, templates->height, templates->width, templates->size,
|
|
|
|
images->devData, images->height, images->width, images->size,
|
|
|
|
results->devData, results->height, results->width, results->size);
|
|
|
|
getLastCudaError("cuArraysCorrTime error");
|
|
|
|
}
|
|
|
|
else if (imageNY <= 384) {
|
|
|
|
cuArraysCorrTime_kernel< 384,NPT><<<grid, 384, 0, stream>>>(nImages,
|
|
|
|
templates->devData, templates->height, templates->width, templates->size,
|
|
|
|
images->devData, images->height, images->width, images->size,
|
|
|
|
results->devData, results->height, results->width, results->size);
|
|
|
|
getLastCudaError("cuArraysCorrTime error");
|
|
|
|
}
|
|
|
|
else if (imageNY <= 512) {
|
|
|
|
cuArraysCorrTime_kernel< 512,NPT><<<grid, 512, 0, stream>>>(nImages,
|
|
|
|
templates->devData, templates->height, templates->width, templates->size,
|
|
|
|
images->devData, images->height, images->width, images->size,
|
|
|
|
results->devData, results->height, results->width, results->size);
|
|
|
|
getLastCudaError("cuArraysCorrTime error");
|
|
|
|
}
|
|
|
|
else if (imageNY <= 640) {
|
|
|
|
cuArraysCorrTime_kernel< 640,NPT><<<grid, 640, 0, stream>>>(nImages,
|
|
|
|
templates->devData, templates->height, templates->width, templates->size,
|
|
|
|
images->devData, images->height, images->width, images->size,
|
|
|
|
results->devData, results->height, results->width, results->size);
|
|
|
|
getLastCudaError("cuArraysCorrTime error");
|
|
|
|
}
|
|
|
|
else if (imageNY <= 768) {
|
|
|
|
cuArraysCorrTime_kernel< 768,NPT><<<grid, 768, 0, stream>>>(nImages,
|
|
|
|
templates->devData, templates->height, templates->width, templates->size,
|
|
|
|
images->devData, images->height, images->width, images->size,
|
|
|
|
results->devData, results->height, results->width, results->size);
|
|
|
|
getLastCudaError("cuArraysCorrTime error");
|
|
|
|
}
|
|
|
|
else if (imageNY <= 896) {
|
|
|
|
cuArraysCorrTime_kernel< 896,NPT><<<grid, 896, 0, stream>>>(nImages,
|
|
|
|
templates->devData, templates->height, templates->width, templates->size,
|
|
|
|
images->devData, images->height, images->width, images->size,
|
|
|
|
results->devData, results->height, results->width, results->size);
|
|
|
|
getLastCudaError("cuArraysCorrTime error");
|
|
|
|
}
|
|
|
|
else if (imageNY <= 1024) {
|
|
|
|
cuArraysCorrTime_kernel<1024,NPT><<<grid,1024, 0, stream>>>(nImages,
|
|
|
|
templates->devData, templates->height, templates->width, templates->size,
|
|
|
|
images->devData, images->height, images->width, images->size,
|
|
|
|
results->devData, results->height, results->width, results->size);
|
|
|
|
getLastCudaError("cuArraysCorrTime error");
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
fprintf(stderr, "The (oversampled) window size along the across direction %d should be smaller than 1024.\n", imageNY);
|
|
|
|
throw;
|
|
|
|
}
|
2019-01-16 19:40:08 +00:00
|
|
|
}
|
2020-11-18 07:22:37 +00:00
|
|
|
// end of file
|