RasterProcessTool/GPUTool/GPUTool.cu

317 lines
9.3 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#include <iostream>
#include <memory>
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "BaseConstVariable.h"
#include "GPUTool.cuh"
#ifdef __CUDANVCC___
// ś¨Ňĺ˛ÎĘý
__device__ cuComplex cuCexpf(cuComplex x)
{
float factor = exp(x.x);
return make_cuComplex(factor * cos(x.y), factor * sin(x.y));
}
__device__ CUDAVector GPU_VectorAB(CUDAVector A, CUDAVector B) {
CUDAVector C;
C.x = B.x - A.x;
C.y = B.y - A.y;
C.z = B.z - A.z;
return C;
}
__device__ float GPU_VectorNorm2(CUDAVector A) {
return sqrtf(A.x * A.x + A.y * A.y + A.z * A.z);
}
__device__ float GPU_dotVector(CUDAVector A, CUDAVector B) {
return A.x * B.x + A.y * B.y + A.z * B.z;
}
__device__ float GPU_CosAngle_VectorA_VectorB(CUDAVector A, CUDAVector B) {
return GPU_dotVector(A, B) / (GPU_VectorNorm2(A) * GPU_VectorNorm2(B));
}
__global__ void CUDA_DistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Ax[idx] - Bx[idx], 2) + powf(Ay[idx] - By[idx], 2) + powf(Az[idx] - Bz[idx], 2));
}
}
__global__ void CUDA_B_DistanceA(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Ax[idx] - Bx, 2) + powf(Ay[idx] - By, 2) + powf(Az[idx] - Bz, 2));
}
}
__global__ void CUDA_make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
RstX[idx] = sX - tX[idx]; // ľŘĂć->Ěě
RstY[idx] = sY - tY[idx];
RstZ[idx] = sZ - tZ[idx];
}
}
__global__ void CUDA_Norm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Vx[idx], 2) + powf(Vy[idx], 2) + powf(Vz[idx], 2));
}
}
__global__ void CUDA_cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float tAx = Ax[idx];
float tAy = Ay[idx];
float tAz = Az[idx];
float tBx = Bx[idx];
float tBy = By[idx];
float tBz = Bz[idx];
float AR = sqrtf(powf(tAx, 2) + powf(tAy, 2) + powf(tAz, 2));
float BR = sqrtf(powf(tBx, 2) + powf(tBy, 2) + powf(tBz, 2));
float dotAB = tAx * tBx + tAy * tBy + tAz * tBz;
float result = acosf(dotAB / (AR * BR));
anglecos[idx] = result;
}
}
__global__ void CUDA_GridPoint_Linear_Interp1(float* v, float* q, float* qv, long xlen, long qlen)
{
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < qlen) {
float qx = q[idx];
// źěË÷Ń­ťˇ
if (qx < 0 || qx > xlen - 1) {}
else {
long x1 = floor(qx);
long x2 = ceil(qx);
if (x1 >= 0 && x2 < xlen) {
float y1 = v[x1];
float y2 = v[x2];
float y = y1 + (y2 - y1) * (qx - x1) / (x2 - x1);
qv[idx] = y;
}
else {
}
}
}
}
//´íÎóĚáĘž
extern "C" void checkCudaError(cudaError_t err, const char* msg) {
if (err != cudaSuccess) {
std::cerr << "CUDA error: " << msg << " (" << cudaGetErrorString(err) << ")" << std::endl;
exit(EXIT_FAILURE);
}
}
// Ö÷ťú˛ÎĘýÄÚ´ćÉůĂ÷
extern "C" void* mallocCUDAHost(long memsize) {
void* ptr;
cudaMallocHost(&ptr, memsize);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("mallocCUDAHost CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
return ptr;
}
// Ö÷ťú˛ÎĘýÄÚ´ćĘ͡Ĺ
extern "C" void FreeCUDAHost(void* ptr) {
cudaFreeHost(ptr);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("FreeCUDAHost CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
// GPU˛ÎĘýÄÚ´ćÉůĂ÷
extern "C" void* mallocCUDADevice(long memsize) {
void* ptr;
cudaMalloc(&ptr, memsize);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("mallocCUDADevice CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
return ptr;
}
// GPU˛ÎĘýÄÚ´ćĘ͡Ĺ
extern "C" void FreeCUDADevice(void* ptr) {
cudaFree(ptr);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("FreeCUDADevice CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
// GPU ÄÚ´ćĘýžÝתŇĆ
extern "C" void HostToDevice(void* hostptr, void* deviceptr, long memsize) {
cudaMemcpy(deviceptr, hostptr, memsize, cudaMemcpyHostToDevice);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("HostToDevice CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void DeviceToHost(void* hostptr, void* deviceptr, long memsize) {
cudaMemcpy(hostptr, deviceptr, memsize, cudaMemcpyDeviceToHost);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("DeviceToHost CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
// ťů´ĄÔËË㺯Ęý
extern "C" void CUDAdistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long len) {
// ÉčÖĂ CUDA şËşŻĘýľÄÍř¸ńşÍżéľÄłß´ç
int blockSize = 256; // Ăż¸öżéľÄĎßłĚĘý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ůžÝ pixelcount źĆËăÍř¸ń´óĐĄ
// ľ÷ÓĂ CUDA şËşŻĘý
CUDA_DistanceAB << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, R, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDAdistanceAB CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDABdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
// ÉčÖĂ CUDA şËşŻĘýľÄÍř¸ńşÍżéľÄłß´ç
int blockSize = 256; // Ăż¸öżéľÄĎßłĚĘý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ůžÝ pixelcount źĆËăÍř¸ń´óĐĄ
// ľ÷ÓĂ CUDA şËşŻĘý
CUDA_B_DistanceA << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, R, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDABdistanceAs CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDAmake_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
// ÉčÖĂ CUDA şËşŻĘýľÄÍř¸ńşÍżéľÄłß´ç
int blockSize = 256; // Ăż¸öżéľÄĎßłĚĘý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ůžÝ pixelcount źĆËăÍř¸ń´óĐĄ
// ľ÷ÓĂ CUDA şËşŻĘý
CUDA_make_VectorA_B << <numBlocks, blockSize >> > (sX, sY, sZ, tX, tY, tZ, RstX, RstY, RstZ, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDAmake_VectorA_B CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDANorm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len) {
// ÉčÖĂ CUDA şËşŻĘýľÄÍř¸ńşÍżéľÄłß´ç
int blockSize = 256; // Ăż¸öżéľÄĎßłĚĘý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ůžÝ pixelcount źĆËăÍř¸ń´óĐĄ
// ľ÷ÓĂ CUDA şËşŻĘý
CUDA_Norm_Vector << <numBlocks, blockSize >> > (Vx, Vy, Vz, R, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDANorm_Vector CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDAcosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len) {
int blockSize = 256; // Ăż¸öżéľÄĎßłĚĘý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ůžÝ pixelcount źĆËăÍř¸ń´óĐĄ
// ľ÷ÓĂ CUDA şËşŻĘý
CUDA_cosAngle_VA_AB << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, anglecos, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDAcosAngle_VA_AB CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDAGridPointLinearInterp1(float* v, float* q, float* qv, long xlen, long qlen)
{
int blockSize = 256; // Ăż¸öżéľÄĎßłĚĘý
int numBlocks = (qlen + blockSize - 1) / blockSize; // ¸ůžÝ pixelcount źĆËăÍř¸ń´óĐĄ
// ľ÷ÓĂ CUDA şËşŻĘý
CUDA_GridPoint_Linear_Interp1 << <numBlocks, blockSize >> > ( v, q,qv, xlen, qlen);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDALinearInterp1 CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
#endif