|
|
@ -3,6 +3,20 @@
|
|
|
|
* @brief Utilities for copying/converting images to different format
|
|
|
|
* @brief Utilities for copying/converting images to different format
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* All methods are declared in cuAmpcorUtil.h
|
|
|
|
* All methods are declared in cuAmpcorUtil.h
|
|
|
|
|
|
|
|
* cudaArraysCopyToBatch to extract a batch of windows from the raw image
|
|
|
|
|
|
|
|
* various implementations include:
|
|
|
|
|
|
|
|
* 1. fixed or varying offsets, as start pixels for windows
|
|
|
|
|
|
|
|
* 2. complex to complex, usually
|
|
|
|
|
|
|
|
* 3. complex to (amplitude,0), for TOPS
|
|
|
|
|
|
|
|
* 4. real to complex, for real images
|
|
|
|
|
|
|
|
* cuArraysCopyExtract to extract(shrink in size) from a batch of windows to another batch
|
|
|
|
|
|
|
|
* overloaded for different data types
|
|
|
|
|
|
|
|
* cuArraysCopyInsert to insert a batch of windows (smaller in size) to another batch
|
|
|
|
|
|
|
|
* overloaded for different data types
|
|
|
|
|
|
|
|
* cuArraysCopyPadded to insert a batch of windows to another batch while padding 0s for rest elements
|
|
|
|
|
|
|
|
* used for fft oversampling
|
|
|
|
|
|
|
|
* see also cuArraysPadding.cu for other zero-padding utilities
|
|
|
|
|
|
|
|
* cuArraysR2C cuArraysC2R cuArraysAbs to convert between different data types
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -29,8 +43,15 @@ __global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX
|
|
|
|
imageOut[idxOut] = imageIn[idxIn];
|
|
|
|
imageOut[idxOut] = imageIn[idxIn];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// copy a chunk into a batch of chips for a given stride
|
|
|
|
/**
|
|
|
|
// used to extract chips from a raw image
|
|
|
|
* Copy a chunk into a batch of chips for a given stride
|
|
|
|
|
|
|
|
* @note used to extract chips from a raw image
|
|
|
|
|
|
|
|
* @param image1 Input image as a large chunk
|
|
|
|
|
|
|
|
* @param image2 Output images as a batch of chips
|
|
|
|
|
|
|
|
* @param strideH stride along height to extract chips
|
|
|
|
|
|
|
|
* @param strideW stride along width to extract chips
|
|
|
|
|
|
|
|
* @param stream cudaStream
|
|
|
|
|
|
|
|
*/
|
|
|
|
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
|
|
|
|
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
|
|
|
|
int strideH, int strideW, cudaStream_t stream)
|
|
|
|
int strideH, int strideW, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -45,7 +66,7 @@ void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
|
|
|
|
getLastCudaError("cuArraysCopyToBatch_kernel");
|
|
|
|
getLastCudaError("cuArraysCopyToBatch_kernel");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to complex
|
|
|
|
// kernel for cuArraysCopyToBatchWithOffset
|
|
|
|
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
|
|
|
|
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
|
|
|
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
|
|
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
|
|
|
const int *offsetX, const int *offsetY)
|
|
|
|
const int *offsetX, const int *offsetY)
|
|
|
@ -59,7 +80,16 @@ __global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, cons
|
|
|
|
imageOut[idxOut] = imageIn[idxIn];
|
|
|
|
imageOut[idxOut] = imageIn[idxIn];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// lda1 (inNY) is the leading dimension of image1, usually, its width
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Copy a chunk into a batch of chips with varying offsets/strides
|
|
|
|
|
|
|
|
* @note used to extract chips from a raw secondary image with varying offsets
|
|
|
|
|
|
|
|
* @param image1 Input image as a large chunk
|
|
|
|
|
|
|
|
* @param lda1 the leading dimension of image1, usually, its width inNY
|
|
|
|
|
|
|
|
* @param image2 Output images as a batch of chips
|
|
|
|
|
|
|
|
* @param strideH (varying) offsets along height to extract chips
|
|
|
|
|
|
|
|
* @param strideW (varying) offsets along width to extract chips
|
|
|
|
|
|
|
|
* @param stream cudaStream
|
|
|
|
|
|
|
|
*/
|
|
|
|
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
|
|
|
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
|
|
|
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
|
|
|
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -73,7 +103,7 @@ void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuA
|
|
|
|
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
|
|
|
|
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to real(take amplitudes)
|
|
|
|
// same as above, but from complex to real(take amplitudes)
|
|
|
|
__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
|
|
|
|
__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
|
|
|
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
|
|
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
|
|
|
const int *offsetX, const int *offsetY)
|
|
|
|
const int *offsetX, const int *offsetY)
|
|
|
@ -87,6 +117,16 @@ __global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, c
|
|
|
|
imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0);
|
|
|
|
imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Copy a chunk into a batch of chips with varying offsets/strides
|
|
|
|
|
|
|
|
* @note similar to cuArraysCopyToBatchWithOffset, but take amplitudes instead
|
|
|
|
|
|
|
|
* @param image1 Input image as a large chunk
|
|
|
|
|
|
|
|
* @param lda1 the leading dimension of image1, usually, its width inNY
|
|
|
|
|
|
|
|
* @param image2 Output images as a batch of chips
|
|
|
|
|
|
|
|
* @param strideH (varying) offsets along height to extract chips
|
|
|
|
|
|
|
|
* @param strideW (varying) offsets along width to extract chips
|
|
|
|
|
|
|
|
* @param stream cudaStream
|
|
|
|
|
|
|
|
*/
|
|
|
|
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
|
|
|
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
|
|
|
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
|
|
|
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -100,7 +140,7 @@ void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1,
|
|
|
|
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
|
|
|
|
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// copy a chunk into a batch of chips for a set of offsets (varying strides), from real to complex(to real part)
|
|
|
|
// kernel for cuArraysCopyToBatchWithOffsetR2C
|
|
|
|
__global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, const int inNY,
|
|
|
|
__global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, const int inNY,
|
|
|
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
|
|
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
|
|
|
const int *offsetX, const int *offsetY)
|
|
|
|
const int *offsetX, const int *offsetY)
|
|
|
@ -114,6 +154,16 @@ __global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, co
|
|
|
|
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
|
|
|
|
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Copy a chunk into a batch of chips with varying offsets/strides
|
|
|
|
|
|
|
|
* @note used to load real images
|
|
|
|
|
|
|
|
* @param image1 Input image as a large chunk
|
|
|
|
|
|
|
|
* @param lda1 the leading dimension of image1, usually, its width inNY
|
|
|
|
|
|
|
|
* @param image2 Output images as a batch of chips
|
|
|
|
|
|
|
|
* @param strideH (varying) offsets along height to extract chips
|
|
|
|
|
|
|
|
* @param strideW (varying) offsets along width to extract chips
|
|
|
|
|
|
|
|
* @param stream cudaStream
|
|
|
|
|
|
|
|
*/
|
|
|
|
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
|
|
|
|
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
|
|
|
|
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
|
|
|
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -144,6 +194,15 @@ __global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, co
|
|
|
|
imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor;
|
|
|
|
imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Copy a chunk into a batch of chips with varying offsets/strides
|
|
|
|
|
|
|
|
* @note similar to cuArraysCopyToBatchWithOffset, but take amplitudes instead
|
|
|
|
|
|
|
|
* @param image1 Input image as a large chunk
|
|
|
|
|
|
|
|
* @param image2 Output images as a batch of chips
|
|
|
|
|
|
|
|
* @param strideH offsets along height to extract chips
|
|
|
|
|
|
|
|
* @param strideW offsets along width to extract chips
|
|
|
|
|
|
|
|
* @param stream cudaStream
|
|
|
|
|
|
|
|
*/
|
|
|
|
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
|
|
|
|
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
|
|
|
|
int strideH, int strideW, cudaStream_t stream)
|
|
|
|
int strideH, int strideW, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -176,11 +235,12 @@ __global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
///
|
|
|
|
/**
|
|
|
|
/// Copy a tile of images to another image, with starting pixels offsets
|
|
|
|
* Copy a tile of images to another image, with starting pixels offsets, float to float
|
|
|
|
/// @param[in] imageIn inut images
|
|
|
|
* @param[in] imageIn input images of dimension nImages*inNX*inNY
|
|
|
|
/// param[out] imageOut output images of dimension nImages*outNX*outNY
|
|
|
|
* @param[out] imageOut output images of dimension nImages*outNX*outNY
|
|
|
|
///
|
|
|
|
* @param[in] offsets, varying offsets for extraction
|
|
|
|
|
|
|
|
*/
|
|
|
|
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
|
|
|
|
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
|
|
|
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
|
|
@ -210,8 +270,10 @@ __global__ void cuArraysCopyExtractVaryingOffset_C2C(const float2 *imageIn, cons
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* copy/extract complex images from a large size to a smaller size from the location (offsetX, offsetY)
|
|
|
|
* Copy a tile of images to another image, with starting pixels offsets, float2 to float2
|
|
|
|
* offset is varying for each image
|
|
|
|
* @param[in] imageIn input images of dimension nImages*inNX*inNY
|
|
|
|
|
|
|
|
* @param[out] imageOut output images of dimension nImages*outNX*outNY
|
|
|
|
|
|
|
|
* @param[in] offsets, varying offsets for extraction
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
|
|
|
|
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -317,8 +379,7 @@ void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
|
|
|
|
getLastCudaError("cuArraysCopyExtract error");
|
|
|
|
getLastCudaError("cuArraysCopyExtract error");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// cuda kernel for cuArraysCopyExtract float2 to float2
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
|
|
|
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
|
|
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
|
|
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
|
|
|
const int offsetX, const int offsetY)
|
|
|
|
const int offsetX, const int offsetY)
|
|
|
@ -343,15 +404,12 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
|
|
|
|
const int nthreads = NTHREADS2D;
|
|
|
|
const int nthreads = NTHREADS2D;
|
|
|
|
dim3 threadsperblock(nthreads, nthreads,1);
|
|
|
|
dim3 threadsperblock(nthreads, nthreads,1);
|
|
|
|
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
|
|
|
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
|
|
|
//std::cout << "debug copyExtract" << imagesOut->width << imagesOut->height << "\n";
|
|
|
|
|
|
|
|
//imagesIn->debuginfo(stream);
|
|
|
|
|
|
|
|
//imagesOut->debuginfo(stream);
|
|
|
|
|
|
|
|
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
|
|
|
|
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
|
|
|
|
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
|
|
|
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
|
|
|
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
|
|
|
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
|
|
|
getLastCudaError("cuArraysCopyExtractC2C error");
|
|
|
|
getLastCudaError("cuArraysCopyExtractC2C error");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// float3
|
|
|
|
// float3
|
|
|
|
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const int inNX, const int inNY,
|
|
|
|
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const int inNX, const int inNY,
|
|
|
@ -384,8 +442,6 @@ void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut
|
|
|
|
getLastCudaError("cuArraysCopyExtractFloat3 error");
|
|
|
|
getLastCudaError("cuArraysCopyExtractFloat3 error");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
|
|
|
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
|
|
|
float *imageOut, const int outNX, const int outNY, const int nImages,
|
|
|
|
float *imageOut, const int outNX, const int outNY, const int nImages,
|
|
|
@ -417,7 +473,6 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut,
|
|
|
|
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
|
|
|
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
|
|
|
getLastCudaError("cuArraysCopyExtractC2C error");
|
|
|
|
getLastCudaError("cuArraysCopyExtractC2C error");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY,
|
|
|
|
__global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY,
|
|
|
|
float2* imageOut, const int outNY, const int offsetX, const int offsetY)
|
|
|
|
float2* imageOut, const int outNY, const int offsetX, const int offsetY)
|
|
|
@ -441,7 +496,7 @@ void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, i
|
|
|
|
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
|
|
|
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
|
|
|
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
|
|
|
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
|
|
|
imageOut->devData, imageOut->width, offsetX, offsetY);
|
|
|
|
imageOut->devData, imageOut->width, offsetX, offsetY);
|
|
|
|
getLastCudaError("cuArraysCopyInsert error");
|
|
|
|
getLastCudaError("cuArraysCopyInsert float2 error");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// float3
|
|
|
|
// float3
|
|
|
@ -467,7 +522,7 @@ void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, i
|
|
|
|
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
|
|
|
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
|
|
|
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
|
|
|
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
|
|
|
imageOut->devData, imageOut->width, offsetX, offsetY);
|
|
|
|
imageOut->devData, imageOut->width, offsetX, offsetY);
|
|
|
|
getLastCudaError("cuArraysCopyInsert error");
|
|
|
|
getLastCudaError("cuArraysCopyInsert float3 error");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
//
|
|
|
@ -580,6 +635,7 @@ __global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inN
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* copy complex images from a smaller size to a larger size while padding 0 for extra elements
|
|
|
|
* copy complex images from a smaller size to a larger size while padding 0 for extra elements
|
|
|
|
|
|
|
|
* @note use for zero-padding in fft oversampling
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
|
|
|
|
void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -590,7 +646,7 @@ void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cu
|
|
|
|
cuArraysCopyPadded_C2C_kernel<<<gridSize, blockSize, 0, stream>>>
|
|
|
|
cuArraysCopyPadded_C2C_kernel<<<gridSize, blockSize, 0, stream>>>
|
|
|
|
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
|
|
|
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
|
|
|
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
|
|
|
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
|
|
|
getLastCudaError("cuArraysCopyInversePadded error");
|
|
|
|
getLastCudaError("cuArraysCopyPadded C2C error");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// kernel for cuArraysCopyPadded
|
|
|
|
// kernel for cuArraysCopyPadded
|
|
|
@ -616,6 +672,7 @@ __global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* copy real images to complex images (imaginary part=0) with larger size (pad 0 for extra elements)
|
|
|
|
* copy real images to complex images (imaginary part=0) with larger size (pad 0 for extra elements)
|
|
|
|
|
|
|
|
* @note use for zero-padding in fft oversampling
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
|
|
|
|
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -626,7 +683,7 @@ void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cud
|
|
|
|
cuArraysCopyPadded_R2C_kernel<<<gridSize, blockSize, 0, stream>>>
|
|
|
|
cuArraysCopyPadded_R2C_kernel<<<gridSize, blockSize, 0, stream>>>
|
|
|
|
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
|
|
|
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
|
|
|
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
|
|
|
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
|
|
|
getLastCudaError("cuArraysCopyPadded error");
|
|
|
|
getLastCudaError("cuArraysCopyPadded R2C error");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// cuda kernel for setting a constant value
|
|
|
|
// cuda kernel for setting a constant value
|
|
|
@ -651,7 +708,7 @@ void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t str
|
|
|
|
|
|
|
|
|
|
|
|
cuArraysSetConstant_kernel<<<IDIVUP(size, nthreads), nthreads, 0, stream>>>
|
|
|
|
cuArraysSetConstant_kernel<<<IDIVUP(size, nthreads), nthreads, 0, stream>>>
|
|
|
|
(imageIn->devData, imageIn->size, value);
|
|
|
|
(imageIn->devData, imageIn->size, value);
|
|
|
|
getLastCudaError("cuArraysCopyPadded error");
|
|
|
|
getLastCudaError("cuArraysSetConstant error");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -668,6 +725,8 @@ __global__ void cuArraysR2C_kernel(float *image1, float2 *image2, int size)
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* Convert real images to complex images (set imaginary parts to 0)
|
|
|
|
* Convert real images to complex images (set imaginary parts to 0)
|
|
|
|
|
|
|
|
* @param[in] image1 input images
|
|
|
|
|
|
|
|
* @param[out] image2 output images
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
|
|
|
void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -688,7 +747,9 @@ __global__ void cuArraysC2R_kernel(float2 *image1, float *image2, int size)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* Take real parts of complex images
|
|
|
|
* Take real part of complex images
|
|
|
|
|
|
|
|
* @param[in] image1 input images
|
|
|
|
|
|
|
|
* @param[out] image2 output images
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
|
|
|
|
void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -709,6 +770,8 @@ __global__ void cuArraysAbs_kernel(float2 *image1, float *image2, int size)
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* Obtain abs (amplitudes) of complex images
|
|
|
|
* Obtain abs (amplitudes) of complex images
|
|
|
|
|
|
|
|
* @param[in] image1 input images
|
|
|
|
|
|
|
|
* @param[out] image2 output images
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
|
|
|
|
void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
|
|
|
|
{
|
|
|
|
{
|
|
|
|