From 14d708863de69107bbad04b6c29c6d8f0433e8cd Mon Sep 17 00:00:00 2001 From: Ryan Burns Date: Mon, 19 Jul 2021 16:34:35 -0700 Subject: [PATCH] Templatize normalization routines --- contrib/PyCuAmpcor/src/cuAmpcorUtil.h | 8 +- contrib/PyCuAmpcor/src/cuCorrNormalization.cu | 79 ++++++------------- contrib/PyCuAmpcor/src/cuCorrNormalizer.cu | 45 ++++------- contrib/PyCuAmpcor/src/cuCorrNormalizer.h | 31 +------- 4 files changed, 46 insertions(+), 117 deletions(-) diff --git a/contrib/PyCuAmpcor/src/cuAmpcorUtil.h b/contrib/PyCuAmpcor/src/cuAmpcorUtil.h index ea49534..69f09ce 100644 --- a/contrib/PyCuAmpcor/src/cuAmpcorUtil.h +++ b/contrib/PyCuAmpcor/src/cuAmpcorUtil.h @@ -61,11 +61,9 @@ void cuArraysPaddingMany(cuArrays *image1, cuArrays *image2, cud //in cuCorrNormalization.cu: utilities to normalize the cross correlation function void cuArraysSubtractMean(cuArrays *images, cudaStream_t stream); void cuCorrNormalize(cuArrays *templates, cuArrays *images, cuArrays *results, cudaStream_t stream); -void cuCorrNormalize64(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream); -void cuCorrNormalize128(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream); -void cuCorrNormalize256(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream); -void cuCorrNormalize512(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream); -void cuCorrNormalize1024(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream); + +template +void cuCorrNormalizeFixed(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream); // in cuCorrNormalizationSAT.cu: to normalize the cross correlation function with sum area table void cuCorrNormalizeSAT(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, diff --git a/contrib/PyCuAmpcor/src/cuCorrNormalization.cu b/contrib/PyCuAmpcor/src/cuCorrNormalization.cu index e32fa5e..b6796d2 100644 --- a/contrib/PyCuAmpcor/src/cuCorrNormalization.cu +++ b/contrib/PyCuAmpcor/src/cuCorrNormalization.cu @@ -377,64 +377,20 @@ void cuCorrNormalize(cuArrays *templates, cuArrays *images, cuArra } -void cuCorrNormalize64(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream) +template struct Log2; +template<> struct Log2<64> { static const int value = 6; }; +template<> struct Log2<128> { static const int value = 7; }; +template<> struct Log2<256> { static const int value = 8; }; +template<> struct Log2<512> { static const int value = 9; }; +template<> struct Log2<1024> { static const int value = 10; }; + +template +void cuCorrNormalizeFixed(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream) { const int nImages = correlation->count; const dim3 grid(1, 1, nImages); const float invReferenceSize = 1.0f/reference->size; - cuCorrNormalize_kernel< 6><<>>(nImages, - reference->devData, reference->height, reference->width, reference->size, - secondary->devData, secondary->height, secondary->width, secondary->size, - correlation->devData, correlation->height, correlation->width, correlation->size, - invReferenceSize); - getLastCudaError("cuCorrNormalize kernel error"); -} - -void cuCorrNormalize128(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream) -{ - const int nImages = correlation->count; - const dim3 grid(1, 1, nImages); - const float invReferenceSize = 1.0f/reference->size; - cuCorrNormalize_kernel< 7><<>>(nImages, - reference->devData, reference->height, reference->width, reference->size, - secondary->devData, secondary->height, secondary->width, secondary->size, - correlation->devData, correlation->height, correlation->width, correlation->size, - invReferenceSize); - getLastCudaError("cuCorrNormalize kernel error"); -} - -void cuCorrNormalize256(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream) -{ - const int nImages = correlation->count; - const dim3 grid(1, 1, nImages); - const float invReferenceSize = 1.0f/reference->size; - cuCorrNormalize_kernel< 8><<>>(nImages, - reference->devData, reference->height, reference->width, reference->size, - secondary->devData, secondary->height, secondary->width, secondary->size, - correlation->devData, correlation->height, correlation->width, correlation->size, - invReferenceSize); - getLastCudaError("cuCorrNormalize kernel error"); -} - -void cuCorrNormalize512(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream) -{ - const int nImages = correlation->count; - const dim3 grid(1, 1, nImages); - const float invReferenceSize = 1.0f/reference->size; - cuCorrNormalize_kernel< 9><<>>(nImages, - reference->devData, reference->height, reference->width, reference->size, - secondary->devData, secondary->height, secondary->width, secondary->size, - correlation->devData, correlation->height, correlation->width, correlation->size, - invReferenceSize); - getLastCudaError("cuCorrNormalize kernel error"); -} - -void cuCorrNormalize1024(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream) -{ - const int nImages = correlation->count; - const dim3 grid(1, 1, nImages); - const float invReferenceSize = 1.0f/reference->size; - cuCorrNormalize_kernel< 10><<>>(nImages, + cuCorrNormalize_kernel::value><<>>(nImages, reference->devData, reference->height, reference->width, reference->size, secondary->devData, secondary->height, secondary->width, secondary->size, correlation->devData, correlation->height, correlation->width, correlation->size, @@ -442,5 +398,20 @@ void cuCorrNormalize1024(cuArrays *correlation, cuArrays *referenc getLastCudaError("cuCorrNormalize kernel error"); } +template void cuCorrNormalizeFixed<64>(cuArrays *correlation, + cuArrays *reference, cuArrays *secondary, + cudaStream_t stream); +template void cuCorrNormalizeFixed<128>(cuArrays *correlation, + cuArrays *reference, cuArrays *secondary, + cudaStream_t stream); +template void cuCorrNormalizeFixed<256>(cuArrays *correlation, + cuArrays *reference, cuArrays *secondary, + cudaStream_t stream); +template void cuCorrNormalizeFixed<512>(cuArrays *correlation, + cuArrays *reference, cuArrays *secondary, + cudaStream_t stream); +template void cuCorrNormalizeFixed<1024>(cuArrays *correlation, + cuArrays *reference, cuArrays *secondary, + cudaStream_t stream); // end of file diff --git a/contrib/PyCuAmpcor/src/cuCorrNormalizer.cu b/contrib/PyCuAmpcor/src/cuCorrNormalizer.cu index b5058b6..783080d 100644 --- a/contrib/PyCuAmpcor/src/cuCorrNormalizer.cu +++ b/contrib/PyCuAmpcor/src/cuCorrNormalizer.cu @@ -11,19 +11,19 @@ cuNormalizer::cuNormalizer(int secondaryNX, int secondaryNY, int count) { // depending on NY, choose different processor if(secondaryNY <= 64) { - processor = new cuNormalize64(); + processor = new cuNormalizeFixed<64>(); } else if (secondaryNY <= 128) { - processor = new cuNormalize128(); + processor = new cuNormalizeFixed<128>(); } else if (secondaryNY <= 256) { - processor = new cuNormalize256(); + processor = new cuNormalizeFixed<256>(); } else if (secondaryNY <= 512) { - processor = new cuNormalize512(); + processor = new cuNormalizeFixed<512>(); } else if (secondaryNY <= 1024) { - processor = new cuNormalize1024(); + processor = new cuNormalizeFixed<1024>(); } else { processor = new cuNormalizeSAT(secondaryNX, secondaryNY, count); @@ -74,34 +74,17 @@ void cuNormalizeSAT::execute(cuArrays *correlation, referenceSum2, secondarySAT, secondarySAT2, stream); } -void cuNormalize64::execute(cuArrays *correlation, +template +void cuNormalizeFixed::execute(cuArrays *correlation, cuArrays *reference, cuArrays *secondary, cudaStream_t stream) { - cuCorrNormalize64(correlation, reference, secondary, stream); + cuCorrNormalizeFixed(correlation, reference, secondary, stream); } -void cuNormalize128::execute(cuArrays *correlation, - cuArrays *reference, cuArrays *secondary, cudaStream_t stream) -{ - cuCorrNormalize128(correlation, reference, secondary, stream); -} +template class cuNormalizeFixed<64>; +template class cuNormalizeFixed<128>; +template class cuNormalizeFixed<256>; +template class cuNormalizeFixed<512>; +template class cuNormalizeFixed<1024>; -void cuNormalize256::execute(cuArrays *correlation, - cuArrays *reference, cuArrays *secondary, cudaStream_t stream) -{ - cuCorrNormalize256(correlation, reference, secondary, stream); -} - -void cuNormalize512::execute(cuArrays *correlation, - cuArrays *reference, cuArrays *secondary, cudaStream_t stream) -{ - cuCorrNormalize512(correlation, reference, secondary, stream); -} - -void cuNormalize1024::execute(cuArrays *correlation, - cuArrays *reference, cuArrays *secondary, cudaStream_t stream) -{ - cuCorrNormalize1024(correlation, reference, secondary, stream); -} - -// end of file \ No newline at end of file +// end of file diff --git a/contrib/PyCuAmpcor/src/cuCorrNormalizer.h b/contrib/PyCuAmpcor/src/cuCorrNormalizer.h index aa45a16..043accd 100644 --- a/contrib/PyCuAmpcor/src/cuCorrNormalizer.h +++ b/contrib/PyCuAmpcor/src/cuCorrNormalizer.h @@ -4,7 +4,7 @@ * * cuNormalizeProcessor is an abstract class for processors to normalize the correlation surface. * It has different implementations wrt different image sizes. - * cuNormalize64, 128, ... 1024 use a shared memory accelerated algorithm, which are limited by the number of cuda threads in a block. + * cuNormalizeFixed<64/128/.../1024> use a shared memory accelerated algorithm, which are limited by the number of cuda threads in a block. * cuNormalizeSAT uses the sum area table based algorithm, which applies to any size (used for >1024). * cuNormalizer is a wrapper class which determines which processor to use. */ @@ -44,31 +44,8 @@ public: }; -class cuNormalize64 : public cuNormalizeProcessor -{ -public: - void execute(cuArrays * correlation, cuArrays *reference, cuArrays *search, cudaStream_t stream) override; -}; - -class cuNormalize128 : public cuNormalizeProcessor -{ -public: - void execute(cuArrays * correlation, cuArrays *reference, cuArrays *search, cudaStream_t stream) override; -}; - -class cuNormalize256 : public cuNormalizeProcessor -{ -public: - void execute(cuArrays * correlation, cuArrays *reference, cuArrays *search, cudaStream_t stream) override; -}; - -class cuNormalize512 : public cuNormalizeProcessor -{ -public: - void execute(cuArrays * correlation, cuArrays *reference, cuArrays *search, cudaStream_t stream) override; -}; - -class cuNormalize1024 : public cuNormalizeProcessor +template +class cuNormalizeFixed : public cuNormalizeProcessor { public: void execute(cuArrays * correlation, cuArrays *reference, cuArrays *search, cudaStream_t stream) override; @@ -88,4 +65,4 @@ public: }; #endif -// end of file \ No newline at end of file +// end of file