PyCuAmpcor: more code cleanup

* replace tabs with spaces to align the code * remove extra spaces at the end of lines * add more docstrings
2020-11-25 12:55:38 -08:00 · 2020-11-25 12:55:38 -08:00 · 94caa1ea5c
parent 38646456d3
commit 94caa1ea5c
22 changed files with 924 additions and 868 deletions
--- a/contrib/PyCuAmpcor/src/PyCuAmpcor.pyx
+++ b/contrib/PyCuAmpcor/src/PyCuAmpcor.pyx
@ -97,20 +97,22 @@ cdef extern from "cuAmpcorParameter.h":
        int *referenceChunkWidth            ## array of width of all reference chunks
        int *secondaryChunkHeight           ## array of width of all reference chunks
        int *secondaryChunkWidth            ## array of width of all secondary chunks
-        int maxReferenceChunkHeight 			## max height for all reference/secondary chunks, determine the size of reading cache in GPU
+        int maxReferenceChunkHeight         ## max height for all reference chunks, determine the size of reading cache in GPU
        int maxReferenceChunkWidth          ## max width for all reference chunks, determine the size of reading cache in GPU
-        int maxSecondaryChunkHeight
-        int maxSecondaryChunkWidth
+        int maxSecondaryChunkHeight         ## max height for secondary chunk
+        int maxSecondaryChunkWidth          ## max width for secondary chunk

-        string grossOffsetImageName
+        string grossOffsetImageName         ## Output Gross Offset fields filename
        string offsetImageName              ## Output Offset fields filename
        string snrImageName                 ## Output SNR filename
        string covImageName                 ## Output COV filename
-        void setStartPixels(int*, int*, int*, int*)
-        void setStartPixels(int, int, int*, int*)
-        void setStartPixels(int, int, int, int)
-        void checkPixelInImageRange()  ## check whether

+        ## set start pixels for reference/secondary windows
+        void setStartPixels(int*, int*, int*, int*)  ## varying locations for reference and secondary
+        void setStartPixels(int, int, int*, int*)    ## first window location for reference, varying for secondary
+        void setStartPixels(int, int, int, int)      ## first window locations for reference and secondary
+
+        void checkPixelInImageRange()       ## check whether all windows are within image range
        void setupParameters()              ## Process other parameters after Python Inpu

 cdef extern from "cuAmpcorController.h":
@ -326,8 +328,7 @@ cdef class PyCuAmpcor(object):
    def numberChunks(self):
        return  self.c_cuAmpcor.param.numberChunks

-
-    ## gross offets
+    ## gross offset
    @property
    def grossOffsetImageName(self):
        return self.c_cuAmpcor.param.grossOffsetImageName.decode("utf-8")
@ -449,7 +450,3 @@ cdef class PyCuAmpcor(object):


 # end of file
-
-
-
-
--- a/contrib/PyCuAmpcor/src/cuAmpcorChunk.h
+++ b/contrib/PyCuAmpcor/src/cuAmpcorChunk.h
@ -68,7 +68,7 @@ private:
    cuArrays<int2> *offsetInit;
    cuArrays<int2> *offsetZoomIn;
    cuArrays<float2> *offsetFinal;
-	cuArrays<int2> *maxLocShift; //record the maxloc from the extract center
+    cuArrays<int2> *maxLocShift; // record the maxloc from the extract center
    cuArrays<float> *corrMaxValue;
    cuArrays<int2> *i_maxloc;
    cuArrays<float> *r_maxval;
@ -79,7 +79,7 @@ private:
    cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
    cuArrays<float> *r_snrValue;

-    // Variance estimation.
+    // Variance estimation
    cuArrays<float3> *r_covValue;

 public:
@ -88,15 +88,15 @@ public:
        GDALImage *reference_, GDALImage *secondary_,
        cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_,
        cuArrays<float3> *covImage_, cudaStream_t stream_);
+    // destructor
+    ~cuAmpcorChunk();

-    //
+    // local methods
    void setIndex(int idxDown_, int idxAcross_);
    void loadReferenceChunk();
    void loadSecondaryChunk();
    void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff);
-
-    ~cuAmpcorChunk();
-
+    // run the given chunk
    void run(int, int);
 };

--- a/contrib/PyCuAmpcor/src/cuAmpcorUtil.h
+++ b/contrib/PyCuAmpcor/src/cuAmpcorUtil.h
@ -94,3 +94,5 @@ void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuAr
 void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream);

 #endif
+
+// end of file
--- a/contrib/PyCuAmpcor/src/cuArraysCopy.cu
+++ b/contrib/PyCuAmpcor/src/cuArraysCopy.cu
@ -3,6 +3,20 @@
 * @brief Utilities for copying/converting images to different format
 *
 * All methods are declared in cuAmpcorUtil.h
+ * cudaArraysCopyToBatch to extract a batch of windows from the raw image
+ *   various implementations include:
+ *   1. fixed or varying offsets, as start pixels for windows
+ *   2. complex to complex, usually
+ *   3. complex to (amplitude,0), for TOPS
+ *   4. real to complex, for real images
+ * cuArraysCopyExtract to extract(shrink in size) from a batch of windows to another batch
+ *   overloaded for different data types
+ * cuArraysCopyInsert to insert a batch of windows (smaller in size) to another batch
+ *   overloaded for different data types
+ * cuArraysCopyPadded to insert a batch of windows to another batch while padding 0s for rest elements
+ *   used for fft oversampling
+ *   see also cuArraysPadding.cu for other zero-padding utilities
+ * cuArraysR2C cuArraysC2R cuArraysAbs to convert between different data types
 */


@ -29,8 +43,15 @@ __global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX
    imageOut[idxOut] = imageIn[idxIn];
 }

-// copy a chunk into a batch of chips for a given stride
-// used to extract chips from a raw image
+/**
+ * Copy a chunk into a batch of chips for a given stride
+ * @note used to extract chips from a raw image
+ * @param image1 Input image as a large chunk
+ * @param image2 Output images as a batch of chips
+ * @param strideH stride along height to extract chips
+ * @param strideW stride along width to extract chips
+ * @param stream cudaStream
+ */
 void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
    int strideH, int strideW, cudaStream_t stream)
 {
@ -45,7 +66,7 @@ void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
    getLastCudaError("cuArraysCopyToBatch_kernel");
 }

-// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to complex
+// kernel for cuArraysCopyToBatchWithOffset
 __global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
    float2 *imageOut, const int outNX, const int outNY, const int nImages,
    const int *offsetX, const int *offsetY)
@ -59,7 +80,16 @@ __global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, cons
    imageOut[idxOut] = imageIn[idxIn];
 }

-// lda1 (inNY) is the leading dimension of image1, usually, its width
+/**
+ * Copy a chunk into a batch of chips with varying offsets/strides
+ * @note used to extract chips from a raw secondary image with varying offsets
+ * @param image1 Input image as a large chunk
+ * @param lda1 the leading dimension of image1, usually, its width inNY
+ * @param image2 Output images as a batch of chips
+ * @param strideH (varying) offsets along height to extract chips
+ * @param strideW (varying) offsets along width to extract chips
+ * @param stream cudaStream
+ */
 void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
    const int *offsetH, const int* offsetW, cudaStream_t stream)
 {
@ -73,7 +103,7 @@ void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuA
    getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
 }

-// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to real(take amplitudes)
+// same as above, but from complex to real(take amplitudes)
 __global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
    float2 *imageOut, const int outNX, const int outNY, const int nImages,
    const int *offsetX, const int *offsetY)
@ -87,6 +117,16 @@ __global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, c
    imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0);
 }

+/**
+ * Copy a chunk into a batch of chips with varying offsets/strides
+ * @note similar to cuArraysCopyToBatchWithOffset, but take amplitudes instead
+ * @param image1 Input image as a large chunk
+ * @param lda1 the leading dimension of image1, usually, its width inNY
+ * @param image2 Output images as a batch of chips
+ * @param strideH (varying) offsets along height to extract chips
+ * @param strideW (varying) offsets along width to extract chips
+ * @param stream cudaStream
+ */
 void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
    const int *offsetH, const int* offsetW, cudaStream_t stream)
 {
@ -100,7 +140,7 @@ void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1,
    getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
 }

-// copy a chunk into a batch of chips for a set of offsets (varying strides), from real to complex(to real part)
+// kernel for cuArraysCopyToBatchWithOffsetR2C
 __global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, const int inNY,
    float2 *imageOut, const int outNX, const int outNY, const int nImages,
    const int *offsetX, const int *offsetY)
@ -114,6 +154,16 @@ __global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, co
    imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
 }

+/**
+ * Copy a chunk into a batch of chips with varying offsets/strides
+ * @note used to load real images
+ * @param image1 Input image as a large chunk
+ * @param lda1 the leading dimension of image1, usually, its width inNY
+ * @param image2 Output images as a batch of chips
+ * @param strideH (varying) offsets along height to extract chips
+ * @param strideW (varying) offsets along width to extract chips
+ * @param stream cudaStream
+ */
 void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
    const int *offsetH, const int* offsetW, cudaStream_t stream)
 {
@ -144,6 +194,15 @@ __global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, co
    imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor;
 }

+/**
+ * Copy a chunk into a batch of chips with varying offsets/strides
+ * @note similar to cuArraysCopyToBatchWithOffset, but take amplitudes instead
+ * @param image1 Input image as a large chunk
+ * @param image2 Output images as a batch of chips
+ * @param strideH offsets along height to extract chips
+ * @param strideW offsets along width to extract chips
+ * @param stream cudaStream
+ */
 void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
    int strideH, int strideW, cudaStream_t stream)
 {
@ -176,11 +235,12 @@ __global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int
    }
 }

-///
-/// Copy a tile of images to another image, with starting pixels offsets
-/// @param[in] imageIn inut images
-/// param[out] imageOut output images of dimension nImages*outNX*outNY
-///
+/**
+ * Copy a tile of images to another image, with starting pixels offsets, float to float
+ * @param[in] imageIn input images of dimension nImages*inNX*inNY
+ * @param[out] imageOut output images of dimension nImages*outNX*outNY
+ * @param[in] offsets, varying offsets for extraction
+ */
 void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
 {
    //assert(imagesIn->height >= imagesOut && inNY >= outNY);
@ -210,8 +270,10 @@ __global__ void cuArraysCopyExtractVaryingOffset_C2C(const float2 *imageIn, cons
 }

 /**
- * copy/extract complex images from a large size to a smaller size from the location (offsetX, offsetY)
- * offset is varying for each image
+ * Copy a tile of images to another image, with starting pixels offsets, float2 to float2
+ * @param[in] imageIn input images of dimension nImages*inNX*inNY
+ * @param[out] imageOut output images of dimension nImages*outNX*outNY
+ * @param[in] offsets, varying offsets for extraction
 */
 void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
 {
@ -317,8 +379,7 @@ void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
    getLastCudaError("cuArraysCopyExtract error");
 }

-//
-
+// cuda kernel for cuArraysCopyExtract float2 to float2
 __global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
     float2 *imageOut, const int outNX, const int outNY, const int nImages,
     const int offsetX, const int offsetY)
@ -343,15 +404,12 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
    const int nthreads = NTHREADS2D;
    dim3 threadsperblock(nthreads, nthreads,1);
    dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
-    //std::cout << "debug copyExtract" << imagesOut->width << imagesOut->height << "\n";
-    //imagesIn->debuginfo(stream);
-    //imagesOut->debuginfo(stream);
+
    cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
        (imagesIn->devData, imagesIn->height, imagesIn->width,
        imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
    getLastCudaError("cuArraysCopyExtractC2C error");
 }
-//

 // float3
 __global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const int inNX, const int inNY,
@ -384,8 +442,6 @@ void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut
    getLastCudaError("cuArraysCopyExtractFloat3 error");
 }

-//
-

 __global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
     float *imageOut, const int outNX, const int outNY, const int nImages,
@ -417,7 +473,6 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut,
        imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
    getLastCudaError("cuArraysCopyExtractC2C error");
 }
-//

 __global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY,
   float2* imageOut, const int outNY, const int offsetX, const int offsetY)
@ -441,7 +496,7 @@ void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, i
    dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
    cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
           imageOut->devData,  imageOut->width, offsetX, offsetY);
-	getLastCudaError("cuArraysCopyInsert error");
+    getLastCudaError("cuArraysCopyInsert float2 error");
 }
 //
 // float3
@ -467,7 +522,7 @@ void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, i
    dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
    cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
           imageOut->devData,  imageOut->width, offsetX, offsetY);
-	getLastCudaError("cuArraysCopyInsert error");
+    getLastCudaError("cuArraysCopyInsert float3 error");
 }

 //
@ -580,6 +635,7 @@ __global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inN

 /**
 * copy complex images from a smaller size to a larger size while padding 0 for extra elements
+ * @note use for zero-padding in fft oversampling
 */
 void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
 {
@ -590,7 +646,7 @@ void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cu
    cuArraysCopyPadded_C2C_kernel<<<gridSize, blockSize, 0, stream>>>
        (imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
        imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
-	 getLastCudaError("cuArraysCopyInversePadded error");
+     getLastCudaError("cuArraysCopyPadded C2C error");
 }

 // kernel for cuArraysCopyPadded
@ -616,6 +672,7 @@ __global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY

 /**
 * copy real images to complex images (imaginary part=0) with larger size (pad 0 for extra elements)
+ * @note use for zero-padding in fft oversampling
 */
 void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
 {
@ -626,7 +683,7 @@ void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cud
    cuArraysCopyPadded_R2C_kernel<<<gridSize, blockSize, 0, stream>>>
        (imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
        imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
-	 getLastCudaError("cuArraysCopyPadded error");
+     getLastCudaError("cuArraysCopyPadded R2C error");
 }

 // cuda kernel for setting a constant value
@ -651,7 +708,7 @@ void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t str

    cuArraysSetConstant_kernel<<<IDIVUP(size, nthreads), nthreads, 0, stream>>>
        (imageIn->devData, imageIn->size, value);
-	 getLastCudaError("cuArraysCopyPadded error");
+     getLastCudaError("cuArraysSetConstant error");
 }


@ -668,6 +725,8 @@ __global__ void cuArraysR2C_kernel(float *image1, float2 *image2, int size)

 /**
 * Convert real images to complex images (set imaginary parts to 0)
+ * @param[in] image1 input images
+ * @param[out] image2 output images
 */
 void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream)
 {
@ -688,7 +747,9 @@ __global__ void cuArraysC2R_kernel(float2 *image1, float *image2, int size)
 }

 /**
- * Take real parts of complex images
+ * Take real part of complex images
+ * @param[in] image1 input images
+ * @param[out] image2 output images
 */
 void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
 {
@ -709,6 +770,8 @@ __global__ void cuArraysAbs_kernel(float2 *image1, float *image2, int size)

 /**
 * Obtain abs (amplitudes) of complex images
+ * @param[in] image1 input images
+ * @param[out] image2 output images
 */
 void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
 {
--- a/contrib/PyCuAmpcor/src/cuSincOverSampler.cu
+++ b/contrib/PyCuAmpcor/src/cuSincOverSampler.cu
@ -195,6 +195,3 @@ void cuSincOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *i
 }

 // end of file
-
-
-
--- a/contrib/PyCuAmpcor/src/cuSincOverSampler.h
+++ b/contrib/PyCuAmpcor/src/cuSincOverSampler.h
@ -61,6 +61,3 @@ class cuSincOverSamplerR2R

 #endif // _CUSINCOVERSAMPLER_H
 // end of file
-
-
-