PyCuAmpcor: compile files as pure C++ when possible

This speeds up compilation, and brings it closer in line with the CPU port
LT1AB
Ryan Burns 2022-11-15 13:32:18 -08:00
parent bb4d3b545b
commit ca462283f3
20 changed files with 139 additions and 129 deletions

View File

@ -11,24 +11,26 @@ set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
pybind11_add_module(PyCuAmpcor
src/PyCuAmpcor.cpp
src/GDALImage.cu
src/GDALImage.cpp
src/SConscript
src/cuAmpcorChunk.cu
src/cuAmpcorController.cu
src/cuAmpcorParameter.cu
src/cuArrays.cu
src/cuAmpcorChunk.cpp
src/cuAmpcorController.cpp
src/cuAmpcorParameter.cpp
src/cuArrays.cpp
src/cuArraysCopy.cu
src/cuArraysPadding.cu
src/cuCorrFrequency.cu
src/cuCorrNormalization.cu
src/cuCorrNormalizationSAT.cu
src/cuCorrNormalizer.cu
src/cuCorrNormalizer.cpp
src/cuCorrTimeDomain.cu
src/cuDeramp.cu
src/cuEstimateStats.cu
src/cuOffset.cu
src/cuOverSampler.cu
src/cuOverSampler.cpp
src/cuSincOverSampler.cu
src/cudaError.cpp
src/cudaUtil.cpp
)
target_include_directories(PyCuAmpcor PRIVATE
src

View File

@ -8,6 +8,7 @@
#include "GDALImage.h"
// dependencies
#include <cuda_runtime.h>
#include <iostream>
#include "cudaError.h"

View File

@ -12,6 +12,7 @@
#define __GDALIMAGE_H
// dependencies
#include <driver_types.h>
#include <string>
#include <gdal_priv.h>
#include <cpl_conv.h>

View File

@ -7,14 +7,15 @@ package = envPyCuAmpcor['PACKAGE']
project = envPyCuAmpcor['PROJECT']
build = envPyCuAmpcor['PRJ_LIB_DIR']
install = envPyCuAmpcor['PRJ_SCONS_INSTALL'] + '/' + package + '/' + project
listFiles = ['GDALImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu',
listFiles = ['GDALImage.cpp', 'cuArrays.cpp', 'cuArraysCopy.cu',
'cudaError.cpp', 'cudaUtil.cpp',
'cuArraysPadding.cu', 'cuOverSampler.cu',
'cuSincOverSampler.cu', 'cuDeramp.cu',
'cuSincOverSampler.cpp', 'cuDeramp.cu',
'cuOffset.cu', 'cuCorrNormalization.cu',
'cuCorrNormalizationSAT.cu', 'cuCorrNormalizer.cu',
'cuAmpcorParameter.cu', 'cuCorrTimeDomain.cu',
'cuAmpcorController.cu', 'cuCorrFrequency.cu',
'cuAmpcorChunk.cu', 'cuEstimateStats.cu']
'cuCorrNormalizationSAT.cu', 'cuCorrNormalizer.cpp',
'cuAmpcorParameter.cpp', 'cuCorrTimeDomain.cu',
'cuAmpcorController.cpp', 'cuCorrFrequency.cu',
'cuAmpcorChunk.cpp', 'cuEstimateStats.cu']
lib = envPyCuAmpcor.SharedLibrary(target = 'PyCuAmpcor', source= listFiles, SHLIBPREFIX='')

View File

@ -1,5 +1,8 @@
#include "cuAmpcorChunk.h"
#include "cuAmpcorUtil.h"
#include <cufft.h>
#include <iostream>
/**
* Run ampcor process for a batch of images (a chunk)

View File

@ -12,6 +12,7 @@
#include "cudaUtil.h"
#include "cuAmpcorChunk.h"
#include "cuAmpcorUtil.h"
#include <cuda_runtime.h>
#include <iostream>
// constructor

View File

@ -7,6 +7,9 @@
// dependencies
#include "cuArrays.h"
#include "cudaError.h"
#include <cuda_runtime.h>
#include <fstream>
#include <iostream>
// allocate arrays in device memory
template <typename T>

View File

@ -12,14 +12,9 @@
#define __CUARRAYS_H
// cuda dependencies
#include <cuda.h>
#include <driver_types.h>
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <ctime>
#include <string>
template <typename T>
class cuArrays{

View File

@ -109,4 +109,4 @@ void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2>
cudaKernel_elementMulConjugate<<<blockspergrid, threadsperblock, 0, stream>>>(image1->devData, image2->devData, size, coef );
getLastCudaError("cuArraysElementMultiply error\n");
}
//end of file
//end of file

View File

@ -8,8 +8,8 @@
#define __CUCORRFREQUENCY_H
// dependencies
#include "cudaUtil.h"
#include "cuArrays.h"
#include <cufft.h>
class cuFreqCorrelator
{
@ -34,4 +34,4 @@ public:
};
#endif //__CUCORRFREQUENCY_H
// end of file
// end of file

View File

@ -13,7 +13,6 @@
#define __CUNORMALIZER_H
#include "cuArrays.h"
#include "cudaUtil.h"
/**
* Abstract class interface for correlation surface normalization processor

View File

@ -12,7 +12,7 @@
#define __CUOVERSAMPLER_H
#include "cuArrays.h"
#include "cudaUtil.h"
#include <cufft.h>
// FFT Oversampler for complex images
class cuOverSamplerC2C

View File

@ -0,0 +1,44 @@
#include "cudaError.h"
#include <cuda_runtime.h>
#include <cufft.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef __DRIVER_TYPES_H__
#ifndef DEVICE_RESET
#define DEVICE_RESET cudaDeviceReset();
#endif
#else
#ifndef DEVICE_RESET
#define DEVICE_RESET
#endif
#endif
template<typename T >
void check(T result, char const *const func, const char *const file, int const line)
{
if (result) {
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n",
file, line, static_cast<unsigned int>(result), func);
DEVICE_RESET
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
template void check(cudaError_t, char const *const, const char *const, int const);
template void check(cufftResult_t, char const *const, const char *const, int const);
void __getLastCudaError(const char *errorMessage, const char *file, const int line)
{
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err)
{
fprintf(stderr, "%s(%i) : CUDA error : %s : (%d) %s.\n",
file, line, errorMessage, (int)err, cudaGetErrorString(err));
DEVICE_RESET
exit(EXIT_FAILURE);
}
}

View File

@ -10,51 +10,13 @@
#pragma once
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <cufft.h>
#include "debug.h"
#include <cuda.h>
#ifdef __DRIVER_TYPES_H__
#ifndef DEVICE_RESET
#define DEVICE_RESET cudaDeviceReset();
#endif
#else
#ifndef DEVICE_RESET
#define DEVICE_RESET
#endif
#endif
template<typename T >
void check(T result, char const *const func, const char *const file, int const line)
{
if (result)
{
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n",
file, line, static_cast<unsigned int>(result), func);
DEVICE_RESET
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
void check(T result, char const *const func, const char *const file, int const line);
// This will output the proper error string when calling cudaGetLastError
inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
{
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err)
{
fprintf(stderr, "%s(%i) : CUDA error : %s : (%d) %s.\n",
file, line, errorMessage, (int)err, cudaGetErrorString(err));
DEVICE_RESET
exit(EXIT_FAILURE);
}
}
void __getLastCudaError(const char *errorMessage, const char *file, const int line);
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#ifdef CUDA_ERROR_CHECK

View File

@ -0,0 +1,58 @@
#include "cudaUtil.h"
#include <cuda_runtime.h>
#include <stdio.h>
#include <stdlib.h>
#include "cudaError.h"
int gpuDeviceInit(int devID)
{
int device_count;
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0) {
fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
if (devID < 0 || devID > device_count - 1) {
fprintf(stderr, "gpuDeviceInit() Device %d is not a valid GPU device. \n", devID);
exit(EXIT_FAILURE);
}
checkCudaErrors(cudaSetDevice(devID));
printf("Using CUDA Device %d ...\n", devID);
return devID;
}
void gpuDeviceList()
{
int device_count = 0;
int current_device = 0;
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceCount(&device_count));
fprintf(stderr, "Detecting all CUDA devices ...\n");
if (device_count == 0) {
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
while (current_device < device_count) {
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device));
if (deviceProp.computeMode == cudaComputeModeProhibited) {
fprintf(stderr, "CUDA Device [%d]: \"%s\" is not available: "
"device is running in <Compute Mode Prohibited> \n",
current_device, deviceProp.name);
} else if (deviceProp.major < 1) {
fprintf(stderr, "CUDA Device [%d]: \"%s\" is not available: "
"device does not support CUDA \n",
current_device, deviceProp.name);
} else {
fprintf(stderr, "CUDA Device [%d]: \"%s\" is available.\n",
current_device, deviceProp.name);
}
current_device++;
}
}

View File

@ -10,9 +10,6 @@
#ifndef __CUDAUTIL_H
#define __CUDAUTIL_H
#include <cuda_runtime.h>
#include "cudaError.h"
// for 2D FFT
#define NRANK 2
@ -47,12 +44,6 @@
#define MIN(a,b) (a > b ? b: a)
#endif
// Float To Int conversion
inline int ftoi(float value)
{
return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
}
// compute the next integer in power of 2
inline int nextpower2(int value)
{
@ -61,63 +52,11 @@ inline int nextpower2(int value)
return r;
}
// General GPU Device CUDA Initialization
inline int gpuDeviceInit(int devID)
{
int device_count;
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0)
{
fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
if (devID < 0 || devID > device_count-1)
{
fprintf(stderr, "gpuDeviceInit() Device %d is not a valid GPU device. \n", devID);
exit(EXIT_FAILURE);
}
checkCudaErrors(cudaSetDevice(devID));
printf("Using CUDA Device %d ...\n", devID);
return devID;
}
int gpuDeviceInit(int devID);
// This function lists all available GPUs
inline void gpuDeviceList()
{
int device_count = 0;
int current_device = 0;
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceCount(&device_count));
fprintf(stderr, "Detecting all CUDA devices ...\n");
if (device_count == 0)
{
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
while (current_device < device_count)
{
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device));
if (deviceProp.computeMode == cudaComputeModeProhibited)
{
fprintf(stderr, "CUDA Device [%d]: \"%s\" is not available: device is running in <Compute Mode Prohibited> \n", current_device, deviceProp.name);
}
else if (deviceProp.major < 1)
{
fprintf(stderr, "CUDA Device [%d]: \"%s\" is not available: device does not support CUDA \n", current_device, deviceProp.name);
}
else {
fprintf(stderr, "CUDA Device [%d]: \"%s\" is available.\n", current_device, deviceProp.name);
}
current_device++;
}
}
void gpuDeviceList();
#endif //__CUDAUTIL_H
//end of file

View File

@ -7,7 +7,8 @@
#ifndef __FLOAT2_H
#define __FLOAT2_H
#include <vector_types.h>
#include <cuda_runtime.h>
#include <math.h>
inline __host__ __device__ void zero(float2 &a) { a.x = 0.0f; a.y = 0.0f; }
@ -126,4 +127,4 @@ inline __host__ __device__ float2 complexExp(float arg)
}
#endif //__FLOAT2_H
// end of file
// end of file