#ifndef _GPUTOOL_H_ #define _GPUTOOL_H_ #include "GPUBaseLibAPI.h" #include "BaseConstVariable.h" #ifdef __CUDANVCC___ #include #include #include #include #define __CUDADEBUG__ #define CUDAMEMORY Memory1MB*100 #define LAMP_CUDA_PI 3.141592653589793238462643383279 #define PI4POW2 157.91367041742973 // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE = SHAREMEMORY_FLOAT_HALF /** CUDA 调用参数 ************************************************************************************/ #define BLOCK_SIZE 256 #define SHAREMEMORY_BYTE 49152 #define SHAREMEMORY_FLOAT_HALF_STEP 2 #define SHAREMEMORY_FLOAT_HALF SHAREMEMORY_FLOAT_HALF_STEP*BLOCK_SIZE #define SHAREMEMORY_DEM_STEP 768 #define SHAREMEMORY_Reflect 612 enum LAMPGPUDATETYPE { LAMP_LONG, LAMP_FLOAT, LAMP_COMPLEXFLOAT }; extern "C" struct CUDAVector { float x; float y; float z; }; extern "C" struct CUDAVectorEllipsoidal { float theta; float phi; float Rho; }; extern "C" struct CUDAComplex { float x; float y; }; //// 定义设备函数 //extern __device__ cuComplex cuCexpf(cuComplex x); //extern __device__ CUDAVector GPU_VectorAB(CUDAVector A, CUDAVector B); //extern __device__ float GPU_VectorNorm2(CUDAVector A); //extern __device__ float GPU_dotVector(CUDAVector A, CUDAVector B); //extern __device__ float GPU_CosAngle_VectorA_VectorB(CUDAVector A, CUDAVector B); // //// 定义全局函数 //extern __global__ void CUDA_DistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long len); //extern __global__ void CUDA_B_DistanceA(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len); //extern __global__ void CUDA_make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len); //extern __global__ void CUDA_Norm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len); //extern __global__ void CUDA_cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len); //extern __global__ void CUDA_GridPoint_Linear_Interp1(float* v, float* q, float* qv, long xlen, long qlen); //extern __global__ void CUDA_D_sin(double* y, double* X, int n); //extern __global__ void CUDA_D_cos(double* y, double* X, int n); //extern __global__ void CUDAKernel_MemsetBlock(cuComplex* data, cuComplex init0, long len); // //extern __global__ void CUDAKernel_MemsetBlock(float* data, float init0, long len); // 打印GPU参数 extern "C" GPUBASELIBAPI void printDeviceInfo(int deviceId); // 误差处理函数 extern "C" GPUBASELIBAPI void checkCudaError(cudaError_t err, const char* msg); // GPU 内存函数 extern "C" GPUBASELIBAPI void* mallocCUDAHost(size_t memsize); // 主机内存声明 extern "C" GPUBASELIBAPI void FreeCUDAHost(void* ptr); extern "C" GPUBASELIBAPI void* mallocCUDADevice(size_t memsize, int device_id = 0); // GPU内存声明 extern "C" GPUBASELIBAPI void FreeCUDADevice(void* ptr); extern "C" GPUBASELIBAPI void HostToDevice(void* hostptr, void* deviceptr, size_t memsize);//GPU 内存数据转移 设备 -> GPU extern "C" GPUBASELIBAPI void DeviceToHost(void* hostptr, void* deviceptr, size_t memsize);//GPU 内存数据转移 GPU -> 设备 extern "C" GPUBASELIBAPI void DeviceToDevice(void* s_deviceptr, void* t_deviceptr, size_t memsize);//GPU 内存数据转移 GPU -> 设备 extern "C" GPUBASELIBAPI void CUDA_MemsetBlock(cuComplex* data, cuComplex init0, long len); // 矢量基础运算函数 extern "C" GPUBASELIBAPI void CUDAdistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long member); extern "C" GPUBASELIBAPI void CUDABdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long member); extern "C" GPUBASELIBAPI void CUDAmake_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long member); extern "C" GPUBASELIBAPI void CUDANorm_Vector(float* Vx, float* Vy, float* Vz, float* R, long member); extern "C" GPUBASELIBAPI void CUDAcosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len); // 常见插值算法 extern "C" GPUBASELIBAPI void CUDAGridPointLinearInterp1(float* v, float* q, float* qv, long xlen, long qlen); extern "C" GPUBASELIBAPI void CUDADSin(double* y, double* X, int n); extern "C" GPUBASELIBAPI void CUDADCos(double* y, double* X, int n); // 估算分块整数 extern "C" GPUBASELIBAPI long NextBlockPad(long num, long blocksize); extern "C" GPUBASELIBAPI void PrintLasterError(const char* s); extern "C" GPUBASELIBAPI void CUDAIFFTScale(cuComplex* inArr, cuComplex* outArr,long InRowCount,long InColCount,long outColCount); extern "C" GPUBASELIBAPI void CUDAIFFT(cuComplex* inArr, cuComplex* outArr, long InRowCount, long InColCount, long outColCount); extern "C" GPUBASELIBAPI void FFTShift1D(cuComplex* d_data, int batch_size, int signal_length); extern "C" GPUBASELIBAPI void shared_complexPtrToHostCuComplex(std::complex* src, cuComplex* dst, size_t len); extern "C" GPUBASELIBAPI void HostCuComplexToshared_complexPtr(cuComplex* src, std::complex* dst, size_t len); #endif #endif