Templatize normalization routines

LT1AB
Ryan Burns 2021-07-19 16:34:35 -07:00
parent b0641ef2f5
commit 14d708863d
4 changed files with 46 additions and 117 deletions

View File

@ -61,11 +61,9 @@ void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cud
//in cuCorrNormalization.cu: utilities to normalize the cross correlation function
void cuArraysSubtractMean(cuArrays<float> *images, cudaStream_t stream);
void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
void cuCorrNormalize64(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream);
void cuCorrNormalize128(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream);
void cuCorrNormalize256(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream);
void cuCorrNormalize512(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream);
void cuCorrNormalize1024(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream);
template<int Size>
void cuCorrNormalizeFixed(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream);
// in cuCorrNormalizationSAT.cu: to normalize the cross correlation function with sum area table
void cuCorrNormalizeSAT(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary,

View File

@ -377,64 +377,20 @@ void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArra
}
void cuCorrNormalize64(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
template<int N> struct Log2;
template<> struct Log2<64> { static const int value = 6; };
template<> struct Log2<128> { static const int value = 7; };
template<> struct Log2<256> { static const int value = 8; };
template<> struct Log2<512> { static const int value = 9; };
template<> struct Log2<1024> { static const int value = 10; };
template<int Size>
void cuCorrNormalizeFixed(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
{
const int nImages = correlation->count;
const dim3 grid(1, 1, nImages);
const float invReferenceSize = 1.0f/reference->size;
cuCorrNormalize_kernel< 6><<<grid, 64, 0, stream>>>(nImages,
reference->devData, reference->height, reference->width, reference->size,
secondary->devData, secondary->height, secondary->width, secondary->size,
correlation->devData, correlation->height, correlation->width, correlation->size,
invReferenceSize);
getLastCudaError("cuCorrNormalize kernel error");
}
void cuCorrNormalize128(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
{
const int nImages = correlation->count;
const dim3 grid(1, 1, nImages);
const float invReferenceSize = 1.0f/reference->size;
cuCorrNormalize_kernel< 7><<<grid, 128, 0, stream>>>(nImages,
reference->devData, reference->height, reference->width, reference->size,
secondary->devData, secondary->height, secondary->width, secondary->size,
correlation->devData, correlation->height, correlation->width, correlation->size,
invReferenceSize);
getLastCudaError("cuCorrNormalize kernel error");
}
void cuCorrNormalize256(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
{
const int nImages = correlation->count;
const dim3 grid(1, 1, nImages);
const float invReferenceSize = 1.0f/reference->size;
cuCorrNormalize_kernel< 8><<<grid, 256, 0, stream>>>(nImages,
reference->devData, reference->height, reference->width, reference->size,
secondary->devData, secondary->height, secondary->width, secondary->size,
correlation->devData, correlation->height, correlation->width, correlation->size,
invReferenceSize);
getLastCudaError("cuCorrNormalize kernel error");
}
void cuCorrNormalize512(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
{
const int nImages = correlation->count;
const dim3 grid(1, 1, nImages);
const float invReferenceSize = 1.0f/reference->size;
cuCorrNormalize_kernel< 9><<<grid, 512, 0, stream>>>(nImages,
reference->devData, reference->height, reference->width, reference->size,
secondary->devData, secondary->height, secondary->width, secondary->size,
correlation->devData, correlation->height, correlation->width, correlation->size,
invReferenceSize);
getLastCudaError("cuCorrNormalize kernel error");
}
void cuCorrNormalize1024(cuArrays<float> *correlation, cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
{
const int nImages = correlation->count;
const dim3 grid(1, 1, nImages);
const float invReferenceSize = 1.0f/reference->size;
cuCorrNormalize_kernel< 10><<<grid, 1024, 0, stream>>>(nImages,
cuCorrNormalize_kernel<Log2<Size>::value><<<grid, Size, 0, stream>>>(nImages,
reference->devData, reference->height, reference->width, reference->size,
secondary->devData, secondary->height, secondary->width, secondary->size,
correlation->devData, correlation->height, correlation->width, correlation->size,
@ -442,5 +398,20 @@ void cuCorrNormalize1024(cuArrays<float> *correlation, cuArrays<float> *referenc
getLastCudaError("cuCorrNormalize kernel error");
}
template void cuCorrNormalizeFixed<64>(cuArrays<float> *correlation,
cuArrays<float> *reference, cuArrays<float> *secondary,
cudaStream_t stream);
template void cuCorrNormalizeFixed<128>(cuArrays<float> *correlation,
cuArrays<float> *reference, cuArrays<float> *secondary,
cudaStream_t stream);
template void cuCorrNormalizeFixed<256>(cuArrays<float> *correlation,
cuArrays<float> *reference, cuArrays<float> *secondary,
cudaStream_t stream);
template void cuCorrNormalizeFixed<512>(cuArrays<float> *correlation,
cuArrays<float> *reference, cuArrays<float> *secondary,
cudaStream_t stream);
template void cuCorrNormalizeFixed<1024>(cuArrays<float> *correlation,
cuArrays<float> *reference, cuArrays<float> *secondary,
cudaStream_t stream);
// end of file

View File

@ -11,19 +11,19 @@ cuNormalizer::cuNormalizer(int secondaryNX, int secondaryNY, int count)
{
// depending on NY, choose different processor
if(secondaryNY <= 64) {
processor = new cuNormalize64();
processor = new cuNormalizeFixed<64>();
}
else if (secondaryNY <= 128) {
processor = new cuNormalize128();
processor = new cuNormalizeFixed<128>();
}
else if (secondaryNY <= 256) {
processor = new cuNormalize256();
processor = new cuNormalizeFixed<256>();
}
else if (secondaryNY <= 512) {
processor = new cuNormalize512();
processor = new cuNormalizeFixed<512>();
}
else if (secondaryNY <= 1024) {
processor = new cuNormalize1024();
processor = new cuNormalizeFixed<1024>();
}
else {
processor = new cuNormalizeSAT(secondaryNX, secondaryNY, count);
@ -74,34 +74,17 @@ void cuNormalizeSAT::execute(cuArrays<float> *correlation,
referenceSum2, secondarySAT, secondarySAT2, stream);
}
void cuNormalize64::execute(cuArrays<float> *correlation,
template<int Size>
void cuNormalizeFixed<Size>::execute(cuArrays<float> *correlation,
cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
{
cuCorrNormalize64(correlation, reference, secondary, stream);
cuCorrNormalizeFixed<Size>(correlation, reference, secondary, stream);
}
void cuNormalize128::execute(cuArrays<float> *correlation,
cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
{
cuCorrNormalize128(correlation, reference, secondary, stream);
}
template class cuNormalizeFixed<64>;
template class cuNormalizeFixed<128>;
template class cuNormalizeFixed<256>;
template class cuNormalizeFixed<512>;
template class cuNormalizeFixed<1024>;
void cuNormalize256::execute(cuArrays<float> *correlation,
cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
{
cuCorrNormalize256(correlation, reference, secondary, stream);
}
void cuNormalize512::execute(cuArrays<float> *correlation,
cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
{
cuCorrNormalize512(correlation, reference, secondary, stream);
}
void cuNormalize1024::execute(cuArrays<float> *correlation,
cuArrays<float> *reference, cuArrays<float> *secondary, cudaStream_t stream)
{
cuCorrNormalize1024(correlation, reference, secondary, stream);
}
// end of file
// end of file

View File

@ -4,7 +4,7 @@
*
* cuNormalizeProcessor is an abstract class for processors to normalize the correlation surface.
* It has different implementations wrt different image sizes.
* cuNormalize64, 128, ... 1024 use a shared memory accelerated algorithm, which are limited by the number of cuda threads in a block.
* cuNormalizeFixed<64/128/.../1024> use a shared memory accelerated algorithm, which are limited by the number of cuda threads in a block.
* cuNormalizeSAT uses the sum area table based algorithm, which applies to any size (used for >1024).
* cuNormalizer is a wrapper class which determines which processor to use.
*/
@ -44,31 +44,8 @@ public:
};
class cuNormalize64 : public cuNormalizeProcessor
{
public:
void execute(cuArrays<float> * correlation, cuArrays<float> *reference, cuArrays<float> *search, cudaStream_t stream) override;
};
class cuNormalize128 : public cuNormalizeProcessor
{
public:
void execute(cuArrays<float> * correlation, cuArrays<float> *reference, cuArrays<float> *search, cudaStream_t stream) override;
};
class cuNormalize256 : public cuNormalizeProcessor
{
public:
void execute(cuArrays<float> * correlation, cuArrays<float> *reference, cuArrays<float> *search, cudaStream_t stream) override;
};
class cuNormalize512 : public cuNormalizeProcessor
{
public:
void execute(cuArrays<float> * correlation, cuArrays<float> *reference, cuArrays<float> *search, cudaStream_t stream) override;
};
class cuNormalize1024 : public cuNormalizeProcessor
template<int Size>
class cuNormalizeFixed : public cuNormalizeProcessor
{
public:
void execute(cuArrays<float> * correlation, cuArrays<float> *reference, cuArrays<float> *search, cudaStream_t stream) override;
@ -88,4 +65,4 @@ public:
};
#endif
// end of file
// end of file