2020-11-12 23:02:44 +00:00
|
|
|
/*
|
2019-01-16 19:40:08 +00:00
|
|
|
* cuSincOverSampler.cu
|
|
|
|
*/
|
|
|
|
#include "cuArrays.h"
|
|
|
|
#include "cuSincOverSampler.h"
|
|
|
|
#include "cuArrays.h"
|
|
|
|
#include "cudaUtil.h"
|
|
|
|
#include "cudaError.h"
|
|
|
|
#include "cuAmpcorUtil.h"
|
|
|
|
|
2020-11-12 23:02:44 +00:00
|
|
|
cuSincOverSamplerR2R::cuSincOverSamplerR2R(const int i_covs_, cudaStream_t stream_)
|
|
|
|
: i_covs(i_covs_)
|
2019-01-16 19:40:08 +00:00
|
|
|
{
|
|
|
|
setStream(stream_);
|
2020-11-12 23:02:44 +00:00
|
|
|
i_intplength = int(r_relfiltlen/r_beta+0.5f);
|
2019-01-16 19:40:08 +00:00
|
|
|
i_filtercoef = i_intplength*i_decfactor;
|
|
|
|
checkCudaErrors(cudaMalloc((void **)&r_filter, (i_filtercoef+1)*sizeof(float)));
|
|
|
|
cuSetupSincKernel();
|
|
|
|
}
|
|
|
|
|
|
|
|
void cuSincOverSamplerR2R::setStream(cudaStream_t stream_)
|
|
|
|
{
|
|
|
|
stream = stream_;
|
|
|
|
}
|
|
|
|
|
2020-11-12 23:02:44 +00:00
|
|
|
cuSincOverSamplerR2R::~cuSincOverSamplerR2R()
|
2019-01-16 19:40:08 +00:00
|
|
|
{
|
|
|
|
checkCudaErrors(cudaFree(r_filter));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-11-12 23:02:44 +00:00
|
|
|
__global__ void cuSetupSincKernel_kernel(float *r_filter_, const int i_filtercoef_,
|
2019-01-16 19:40:08 +00:00
|
|
|
const float r_soff_, const float r_wgthgt_, const int i_weight_,
|
2020-11-12 23:02:44 +00:00
|
|
|
const float r_soff_inverse_, const float r_beta_, const float r_decfactor_inverse_)
|
2019-01-16 19:40:08 +00:00
|
|
|
{
|
|
|
|
int i = threadIdx.x + blockDim.x*blockIdx.x;
|
|
|
|
if(i > i_filtercoef_) return;
|
|
|
|
float r_wa = i - r_soff_;
|
|
|
|
float r_wgt = (1.0f - r_wgthgt_) + r_wgthgt_*cos(PI*r_wa*r_soff_inverse_);
|
|
|
|
float r_s = r_wa*r_beta_*r_decfactor_inverse_*PI;
|
2020-11-12 23:02:44 +00:00
|
|
|
float r_fct;
|
2019-01-16 19:40:08 +00:00
|
|
|
if(r_s != 0.0f) {
|
|
|
|
r_fct = sin(r_s)/r_s;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
r_fct = 1.0f;
|
|
|
|
}
|
|
|
|
if(i_weight_ == 1) {
|
|
|
|
r_filter_[i] = r_fct*r_wgt;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
r_filter_[i] = r_fct;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void cuSincOverSamplerR2R::cuSetupSincKernel()
|
|
|
|
{
|
|
|
|
const int nthreads = 128;
|
2020-11-12 23:02:44 +00:00
|
|
|
const int nblocks = IDIVUP(i_filtercoef+1, nthreads);
|
|
|
|
|
|
|
|
// compute some commonly used constants at first
|
|
|
|
float r_wgthgt = (1.0f - r_pedestal)/2.0f;
|
|
|
|
float r_soff = (i_filtercoef-1.0f)/2.0f;
|
|
|
|
float r_soff_inverse = 1.0f/r_soff;
|
|
|
|
float r_decfactor_inverse = 1.0f/i_decfactor;
|
|
|
|
|
2019-01-16 19:40:08 +00:00
|
|
|
cuSetupSincKernel_kernel<<<nblocks, nthreads, 0, stream>>> (
|
2020-11-12 23:02:44 +00:00
|
|
|
r_filter, i_filtercoef, r_soff, r_wgthgt, i_weight,
|
|
|
|
r_soff_inverse, r_beta, r_decfactor_inverse);
|
2019-01-16 19:40:08 +00:00
|
|
|
getLastCudaError("cuSetupSincKernel_kernel");
|
|
|
|
}
|
|
|
|
|
2020-11-12 23:02:44 +00:00
|
|
|
__global__ void cuSincInterpolation_kernel(const int nImages,
|
2019-01-16 19:40:08 +00:00
|
|
|
const float * imagesIn, const int inNX, const int inNY,
|
2020-11-12 23:02:44 +00:00
|
|
|
float * imagesOut, const int outNX, const int outNY,
|
|
|
|
int2 *centerShift, int factor,
|
|
|
|
const float * r_filter_, const int i_covs_, const int i_decfactor_, const int i_intplength_,
|
2019-01-16 19:40:08 +00:00
|
|
|
const int i_startX, const int i_startY, const int i_int_size)
|
|
|
|
{
|
2020-11-12 23:02:44 +00:00
|
|
|
// get image index
|
2019-01-16 19:40:08 +00:00
|
|
|
int idxImage = blockIdx.z;
|
2020-11-12 23:02:44 +00:00
|
|
|
// get the xy threads for output image pixel indices
|
|
|
|
int idxX = threadIdx.x + blockDim.x*blockIdx.x;
|
2019-01-16 19:40:08 +00:00
|
|
|
int idxY = threadIdx.y + blockDim.y*blockIdx.y;
|
2020-11-12 23:02:44 +00:00
|
|
|
// cuda: to make sure extra allocated threads do nothing
|
2019-01-16 19:40:08 +00:00
|
|
|
if(idxImage >=nImages || idxX >= i_int_size || idxY >= i_int_size) return;
|
2020-11-12 23:02:44 +00:00
|
|
|
// decide the center shift
|
|
|
|
int2 shift = centerShift[idxImage];
|
|
|
|
// determine the output pixel indices
|
|
|
|
int outx = idxX + i_startX + shift.x*factor;
|
|
|
|
if (outx >= outNX) outx-=outNX;
|
|
|
|
int outy = idxY + i_startY + shift.y*factor;
|
|
|
|
if (outy >= outNY) outy-=outNY;
|
|
|
|
// flattened to 1d
|
2019-01-16 19:40:08 +00:00
|
|
|
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
|
2020-11-12 23:02:44 +00:00
|
|
|
|
|
|
|
// index in input grids
|
2019-01-16 19:40:08 +00:00
|
|
|
float r_xout = (float)outx/i_covs_;
|
2020-11-12 23:02:44 +00:00
|
|
|
// integer part
|
2019-01-16 19:40:08 +00:00
|
|
|
int i_xout = int(r_xout);
|
2020-11-12 23:02:44 +00:00
|
|
|
// factional part
|
2019-01-16 19:40:08 +00:00
|
|
|
float r_xfrac = r_xout - i_xout;
|
2020-11-12 23:02:44 +00:00
|
|
|
// fractional part in terms of the interpolation kernel grids
|
2019-01-16 19:40:08 +00:00
|
|
|
int i_xfrac = int(r_xfrac*i_decfactor_);
|
2020-11-12 23:02:44 +00:00
|
|
|
|
|
|
|
// same procedure for y
|
2019-01-16 19:40:08 +00:00
|
|
|
float r_yout = (float)outy/i_covs_;
|
|
|
|
int i_yout = int(r_yout);
|
|
|
|
float r_yfrac = r_yout - i_yout;
|
|
|
|
int i_yfrac = int(r_yfrac*i_decfactor_);
|
2020-11-12 23:02:44 +00:00
|
|
|
|
|
|
|
// temp variables
|
|
|
|
float intpData = 0.0f; // interpolated value
|
|
|
|
float r_sincwgt = 0.0f; // total filter weight
|
|
|
|
float r_sinc_coef; // filter weight
|
|
|
|
|
|
|
|
// iterate over lines of input image
|
|
|
|
// i=0 -> -i_intplength/2
|
|
|
|
for(int i=0; i < i_intplength_; i++) {
|
|
|
|
// find the corresponding pixel in input(unsampled) image
|
|
|
|
|
|
|
|
int inx = i_xout - i + i_intplength_/2;
|
|
|
|
|
|
|
|
if(inx < 0) inx+= inNX;
|
|
|
|
if(inx >= inNX) inx-= inNY;
|
|
|
|
|
|
|
|
float r_xsinc_coef = r_filter_[i*i_decfactor_+i_xfrac];
|
|
|
|
|
|
|
|
for(int j=0; j< i_intplength_; j++) {
|
|
|
|
// find the corresponding pixel in input(unsampled) image
|
|
|
|
int iny = i_yout - j + i_intplength_/2;
|
|
|
|
if(iny < 0) iny += inNY;
|
|
|
|
if(iny >= inNY) iny -= inNY;
|
|
|
|
|
|
|
|
float r_ysinc_coef = r_filter_[j*i_decfactor_+i_yfrac];
|
|
|
|
// multiply the factors from xy
|
2019-01-16 19:40:08 +00:00
|
|
|
r_sinc_coef = r_xsinc_coef*r_ysinc_coef;
|
2020-11-12 23:02:44 +00:00
|
|
|
// add to total sinc weight
|
2019-01-16 19:40:08 +00:00
|
|
|
r_sincwgt += r_sinc_coef;
|
2020-11-12 23:02:44 +00:00
|
|
|
// multiply by the original signal and add to results
|
|
|
|
intpData += imagesIn[idxImage*inNX*inNY+inx*inNY+iny]*r_sinc_coef;
|
|
|
|
|
2019-01-16 19:40:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
imagesOut[idxOut] = intpData/r_sincwgt;
|
|
|
|
//printf("test int kernel %d %d %f %f %f\n", outx, outy, intpData, r_sincwgt, imagesOut[idxOut]);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-11-12 23:02:44 +00:00
|
|
|
void cuSincOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
|
|
|
|
cuArrays<int2> *centerShift, int oversamplingFactor)
|
2019-01-16 19:40:08 +00:00
|
|
|
{
|
|
|
|
const int nImages = imagesIn->count;
|
|
|
|
const int inNX = imagesIn->height;
|
|
|
|
const int inNY = imagesIn->width;
|
2020-11-12 23:02:44 +00:00
|
|
|
const int outNX = imagesOut->height;
|
2019-01-16 19:40:08 +00:00
|
|
|
const int outNY = imagesOut->width;
|
2020-11-12 23:02:44 +00:00
|
|
|
|
|
|
|
// only compute the overampled signals within a window
|
|
|
|
const int i_int_range = i_sincwindow * i_covs;
|
|
|
|
// set the start pixel, will be shifted by centerShift*oversamplingFactor (from raw image)
|
2019-01-16 19:40:08 +00:00
|
|
|
const int i_int_startX = outNX/2 - i_int_range;
|
|
|
|
const int i_int_startY = outNY/2 - i_int_range;
|
|
|
|
const int i_int_size = 2*i_int_range + 1;
|
2020-11-12 23:02:44 +00:00
|
|
|
// preset all pixels in out image to 0
|
2019-01-16 19:40:08 +00:00
|
|
|
imagesOut->setZero(stream);
|
2020-11-12 23:02:44 +00:00
|
|
|
|
2019-01-16 19:40:08 +00:00
|
|
|
static const int nthreads = 16;
|
|
|
|
dim3 threadsperblock(nthreads, nthreads, 1);
|
|
|
|
dim3 blockspergrid (IDIVUP(i_int_size, nthreads), IDIVUP(i_int_size, nthreads), nImages);
|
2020-11-12 23:02:44 +00:00
|
|
|
cuSincInterpolation_kernel<<<blockspergrid, threadsperblock, 0, stream>>>(nImages,
|
2019-01-16 19:40:08 +00:00
|
|
|
imagesIn->devData, inNX, inNY,
|
|
|
|
imagesOut->devData, outNX, outNY,
|
2020-11-12 23:02:44 +00:00
|
|
|
centerShift->devData, oversamplingFactor,
|
2019-01-16 19:40:08 +00:00
|
|
|
r_filter, i_covs, i_decfactor, i_intplength, i_int_startX, i_int_startY, i_int_size);
|
|
|
|
getLastCudaError("cuSincInterpolation_kernel");
|
|
|
|
}
|
|
|
|
|
|
|
|
// end of file
|
|
|
|
|
|
|
|
|
|
|
|
|