PyCuAmpcor: more code cleanup

* replace tabs with spaces to align the code

  * remove extra spaces at the end of lines

  * add more docstrings
LT1AB
Lijun Zhu 2020-11-25 12:55:38 -08:00
parent 38646456d3
commit 94caa1ea5c
22 changed files with 924 additions and 868 deletions

View File

@ -97,20 +97,22 @@ cdef extern from "cuAmpcorParameter.h":
int *referenceChunkWidth ## array of width of all reference chunks
int *secondaryChunkHeight ## array of width of all reference chunks
int *secondaryChunkWidth ## array of width of all secondary chunks
int maxReferenceChunkHeight ## max height for all reference/secondary chunks, determine the size of reading cache in GPU
int maxReferenceChunkHeight ## max height for all reference chunks, determine the size of reading cache in GPU
int maxReferenceChunkWidth ## max width for all reference chunks, determine the size of reading cache in GPU
int maxSecondaryChunkHeight
int maxSecondaryChunkWidth
int maxSecondaryChunkHeight ## max height for secondary chunk
int maxSecondaryChunkWidth ## max width for secondary chunk
string grossOffsetImageName
string grossOffsetImageName ## Output Gross Offset fields filename
string offsetImageName ## Output Offset fields filename
string snrImageName ## Output SNR filename
string covImageName ## Output COV filename
void setStartPixels(int*, int*, int*, int*)
void setStartPixels(int, int, int*, int*)
void setStartPixels(int, int, int, int)
void checkPixelInImageRange() ## check whether
## set start pixels for reference/secondary windows
void setStartPixels(int*, int*, int*, int*) ## varying locations for reference and secondary
void setStartPixels(int, int, int*, int*) ## first window location for reference, varying for secondary
void setStartPixels(int, int, int, int) ## first window locations for reference and secondary
void checkPixelInImageRange() ## check whether all windows are within image range
void setupParameters() ## Process other parameters after Python Inpu
cdef extern from "cuAmpcorController.h":
@ -326,8 +328,7 @@ cdef class PyCuAmpcor(object):
def numberChunks(self):
return self.c_cuAmpcor.param.numberChunks
## gross offets
## gross offset
@property
def grossOffsetImageName(self):
return self.c_cuAmpcor.param.grossOffsetImageName.decode("utf-8")
@ -449,7 +450,3 @@ cdef class PyCuAmpcor(object):
# end of file

View File

@ -68,7 +68,7 @@ private:
cuArrays<int2> *offsetInit;
cuArrays<int2> *offsetZoomIn;
cuArrays<float2> *offsetFinal;
cuArrays<int2> *maxLocShift; //record the maxloc from the extract center
cuArrays<int2> *maxLocShift; // record the maxloc from the extract center
cuArrays<float> *corrMaxValue;
cuArrays<int2> *i_maxloc;
cuArrays<float> *r_maxval;
@ -79,7 +79,7 @@ private:
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
cuArrays<float> *r_snrValue;
// Variance estimation.
// Variance estimation
cuArrays<float3> *r_covValue;
public:
@ -88,15 +88,15 @@ public:
GDALImage *reference_, GDALImage *secondary_,
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_,
cuArrays<float3> *covImage_, cudaStream_t stream_);
// destructor
~cuAmpcorChunk();
//
// local methods
void setIndex(int idxDown_, int idxAcross_);
void loadReferenceChunk();
void loadSecondaryChunk();
void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff);
~cuAmpcorChunk();
// run the given chunk
void run(int, int);
};

View File

@ -94,3 +94,5 @@ void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuAr
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream);
#endif
// end of file

View File

@ -3,6 +3,20 @@
* @brief Utilities for copying/converting images to different format
*
* All methods are declared in cuAmpcorUtil.h
* cudaArraysCopyToBatch to extract a batch of windows from the raw image
* various implementations include:
* 1. fixed or varying offsets, as start pixels for windows
* 2. complex to complex, usually
* 3. complex to (amplitude,0), for TOPS
* 4. real to complex, for real images
* cuArraysCopyExtract to extract(shrink in size) from a batch of windows to another batch
* overloaded for different data types
* cuArraysCopyInsert to insert a batch of windows (smaller in size) to another batch
* overloaded for different data types
* cuArraysCopyPadded to insert a batch of windows to another batch while padding 0s for rest elements
* used for fft oversampling
* see also cuArraysPadding.cu for other zero-padding utilities
* cuArraysR2C cuArraysC2R cuArraysAbs to convert between different data types
*/
@ -29,8 +43,15 @@ __global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX
imageOut[idxOut] = imageIn[idxIn];
}
// copy a chunk into a batch of chips for a given stride
// used to extract chips from a raw image
/**
* Copy a chunk into a batch of chips for a given stride
* @note used to extract chips from a raw image
* @param image1 Input image as a large chunk
* @param image2 Output images as a batch of chips
* @param strideH stride along height to extract chips
* @param strideW stride along width to extract chips
* @param stream cudaStream
*/
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
int strideH, int strideW, cudaStream_t stream)
{
@ -45,7 +66,7 @@ void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
getLastCudaError("cuArraysCopyToBatch_kernel");
}
// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to complex
// kernel for cuArraysCopyToBatchWithOffset
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int *offsetX, const int *offsetY)
@ -59,7 +80,16 @@ __global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, cons
imageOut[idxOut] = imageIn[idxIn];
}
// lda1 (inNY) is the leading dimension of image1, usually, its width
/**
* Copy a chunk into a batch of chips with varying offsets/strides
* @note used to extract chips from a raw secondary image with varying offsets
* @param image1 Input image as a large chunk
* @param lda1 the leading dimension of image1, usually, its width inNY
* @param image2 Output images as a batch of chips
* @param strideH (varying) offsets along height to extract chips
* @param strideW (varying) offsets along width to extract chips
* @param stream cudaStream
*/
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream)
{
@ -73,7 +103,7 @@ void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuA
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
}
// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to real(take amplitudes)
// same as above, but from complex to real(take amplitudes)
__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int *offsetX, const int *offsetY)
@ -87,6 +117,16 @@ __global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, c
imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0);
}
/**
* Copy a chunk into a batch of chips with varying offsets/strides
* @note similar to cuArraysCopyToBatchWithOffset, but take amplitudes instead
* @param image1 Input image as a large chunk
* @param lda1 the leading dimension of image1, usually, its width inNY
* @param image2 Output images as a batch of chips
* @param strideH (varying) offsets along height to extract chips
* @param strideW (varying) offsets along width to extract chips
* @param stream cudaStream
*/
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream)
{
@ -100,7 +140,7 @@ void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1,
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
}
// copy a chunk into a batch of chips for a set of offsets (varying strides), from real to complex(to real part)
// kernel for cuArraysCopyToBatchWithOffsetR2C
__global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int *offsetX, const int *offsetY)
@ -114,6 +154,16 @@ __global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, co
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
}
/**
* Copy a chunk into a batch of chips with varying offsets/strides
* @note used to load real images
* @param image1 Input image as a large chunk
* @param lda1 the leading dimension of image1, usually, its width inNY
* @param image2 Output images as a batch of chips
* @param strideH (varying) offsets along height to extract chips
* @param strideW (varying) offsets along width to extract chips
* @param stream cudaStream
*/
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
const int *offsetH, const int* offsetW, cudaStream_t stream)
{
@ -144,6 +194,15 @@ __global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, co
imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor;
}
/**
* Copy a chunk into a batch of chips with varying offsets/strides
* @note similar to cuArraysCopyToBatchWithOffset, but take amplitudes instead
* @param image1 Input image as a large chunk
* @param image2 Output images as a batch of chips
* @param strideH offsets along height to extract chips
* @param strideW offsets along width to extract chips
* @param stream cudaStream
*/
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
int strideH, int strideW, cudaStream_t stream)
{
@ -176,11 +235,12 @@ __global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int
}
}
///
/// Copy a tile of images to another image, with starting pixels offsets
/// @param[in] imageIn inut images
/// param[out] imageOut output images of dimension nImages*outNX*outNY
///
/**
* Copy a tile of images to another image, with starting pixels offsets, float to float
* @param[in] imageIn input images of dimension nImages*inNX*inNY
* @param[out] imageOut output images of dimension nImages*outNX*outNY
* @param[in] offsets, varying offsets for extraction
*/
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
{
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
@ -210,8 +270,10 @@ __global__ void cuArraysCopyExtractVaryingOffset_C2C(const float2 *imageIn, cons
}
/**
* copy/extract complex images from a large size to a smaller size from the location (offsetX, offsetY)
* offset is varying for each image
* Copy a tile of images to another image, with starting pixels offsets, float2 to float2
* @param[in] imageIn input images of dimension nImages*inNX*inNY
* @param[out] imageOut output images of dimension nImages*outNX*outNY
* @param[in] offsets, varying offsets for extraction
*/
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
{
@ -317,8 +379,7 @@ void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
getLastCudaError("cuArraysCopyExtract error");
}
//
// cuda kernel for cuArraysCopyExtract float2 to float2
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
float2 *imageOut, const int outNX, const int outNY, const int nImages,
const int offsetX, const int offsetY)
@ -343,15 +404,12 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
const int nthreads = NTHREADS2D;
dim3 threadsperblock(nthreads, nthreads,1);
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
//std::cout << "debug copyExtract" << imagesOut->width << imagesOut->height << "\n";
//imagesIn->debuginfo(stream);
//imagesOut->debuginfo(stream);
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
(imagesIn->devData, imagesIn->height, imagesIn->width,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
getLastCudaError("cuArraysCopyExtractC2C error");
}
//
// float3
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const int inNX, const int inNY,
@ -384,8 +442,6 @@ void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut
getLastCudaError("cuArraysCopyExtractFloat3 error");
}
//
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
float *imageOut, const int outNX, const int outNY, const int nImages,
@ -417,7 +473,6 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut,
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
getLastCudaError("cuArraysCopyExtractC2C error");
}
//
__global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY,
float2* imageOut, const int outNY, const int offsetX, const int offsetY)
@ -441,7 +496,7 @@ void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, i
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
imageOut->devData, imageOut->width, offsetX, offsetY);
getLastCudaError("cuArraysCopyInsert error");
getLastCudaError("cuArraysCopyInsert float2 error");
}
//
// float3
@ -467,7 +522,7 @@ void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, i
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
imageOut->devData, imageOut->width, offsetX, offsetY);
getLastCudaError("cuArraysCopyInsert error");
getLastCudaError("cuArraysCopyInsert float3 error");
}
//
@ -580,6 +635,7 @@ __global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inN
/**
* copy complex images from a smaller size to a larger size while padding 0 for extra elements
* @note use for zero-padding in fft oversampling
*/
void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
{
@ -590,7 +646,7 @@ void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cu
cuArraysCopyPadded_C2C_kernel<<<gridSize, blockSize, 0, stream>>>
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
getLastCudaError("cuArraysCopyInversePadded error");
getLastCudaError("cuArraysCopyPadded C2C error");
}
// kernel for cuArraysCopyPadded
@ -616,6 +672,7 @@ __global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY
/**
* copy real images to complex images (imaginary part=0) with larger size (pad 0 for extra elements)
* @note use for zero-padding in fft oversampling
*/
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
{
@ -626,7 +683,7 @@ void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cud
cuArraysCopyPadded_R2C_kernel<<<gridSize, blockSize, 0, stream>>>
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
getLastCudaError("cuArraysCopyPadded error");
getLastCudaError("cuArraysCopyPadded R2C error");
}
// cuda kernel for setting a constant value
@ -651,7 +708,7 @@ void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t str
cuArraysSetConstant_kernel<<<IDIVUP(size, nthreads), nthreads, 0, stream>>>
(imageIn->devData, imageIn->size, value);
getLastCudaError("cuArraysCopyPadded error");
getLastCudaError("cuArraysSetConstant error");
}
@ -668,6 +725,8 @@ __global__ void cuArraysR2C_kernel(float *image1, float2 *image2, int size)
/**
* Convert real images to complex images (set imaginary parts to 0)
* @param[in] image1 input images
* @param[out] image2 output images
*/
void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream)
{
@ -688,7 +747,9 @@ __global__ void cuArraysC2R_kernel(float2 *image1, float *image2, int size)
}
/**
* Take real parts of complex images
* Take real part of complex images
* @param[in] image1 input images
* @param[out] image2 output images
*/
void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
{
@ -709,6 +770,8 @@ __global__ void cuArraysAbs_kernel(float2 *image1, float *image2, int size)
/**
* Obtain abs (amplitudes) of complex images
* @param[in] image1 input images
* @param[out] image2 output images
*/
void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
{

View File

@ -195,6 +195,3 @@ void cuSincOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *i
}
// end of file

View File

@ -61,6 +61,3 @@ class cuSincOverSamplerR2R
#endif // _CUSINCOVERSAMPLER_H
// end of file