274 lines
8.3 KiB
Plaintext
274 lines
8.3 KiB
Plaintext
|
|
|
|||
|
|
|
|||
|
|
#include <iostream>
|
|||
|
|
#include <memory>
|
|||
|
|
#include <cmath>
|
|||
|
|
#include <complex>
|
|||
|
|
#include <device_launch_parameters.h>
|
|||
|
|
#include <cuda_runtime.h>
|
|||
|
|
#include <cublas_v2.h>
|
|||
|
|
#include <cuComplex.h>
|
|||
|
|
|
|||
|
|
#include "BaseConstVariable.h"
|
|||
|
|
#include "GPUTool.cuh"
|
|||
|
|
|
|||
|
|
#ifdef __CUDANVCC___
|
|||
|
|
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
__device__ cuComplex cuCexpf(cuComplex x)
|
|||
|
|
{
|
|||
|
|
float factor = exp(x.x);
|
|||
|
|
return make_cuComplex(factor * cos(x.y), factor * sin(x.y));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
__device__ CUDAVector GPU_VectorAB(CUDAVector A, CUDAVector B) {
|
|||
|
|
CUDAVector C;
|
|||
|
|
C.x = B.x - A.x;
|
|||
|
|
C.y = B.y - A.y;
|
|||
|
|
C.z = B.z - A.z;
|
|||
|
|
return C;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
__device__ float GPU_VectorNorm2(CUDAVector A) {
|
|||
|
|
return sqrtf(A.x * A.x + A.y * A.y + A.z * A.z);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
__device__ float GPU_dotVector(CUDAVector A, CUDAVector B) {
|
|||
|
|
return A.x * B.x + A.y * B.y + A.z * B.z;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
__device__ float GPU_CosAngle_VectorA_VectorB(CUDAVector A, CUDAVector B) {
|
|||
|
|
return GPU_dotVector(A, B) / (GPU_VectorNorm2(A) * GPU_VectorNorm2(B));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
__global__ void CUDA_DistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long len) {
|
|||
|
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
|
if (idx < len) {
|
|||
|
|
R[idx] = sqrtf(powf(Ax[idx] - Bx[idx], 2) + powf(Ay[idx] - By[idx], 2) + powf(Az[idx] - Bz[idx], 2));
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
__global__ void CUDA_B_DistanceA(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
|
|||
|
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
|
if (idx < len) {
|
|||
|
|
R[idx] = sqrtf(powf(Ax[idx] - Bx, 2) + powf(Ay[idx] - By, 2) + powf(Az[idx] - Bz, 2));
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
__global__ void CUDA_make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
|
|||
|
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
|
if (idx < len) {
|
|||
|
|
RstX[idx] = sX - tX[idx]; // <20><><EFBFBD><EFBFBD>-><3E><>
|
|||
|
|
RstY[idx] = sY - tY[idx];
|
|||
|
|
RstZ[idx] = sZ - tZ[idx];
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
__global__ void CUDA_Norm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len) {
|
|||
|
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
|
if (idx < len) {
|
|||
|
|
R[idx] = sqrtf(powf(Vx[idx], 2) + powf(Vy[idx], 2) + powf(Vz[idx], 2));
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
__global__ void CUDA_cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len) {
|
|||
|
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
|
if (idx < len) {
|
|||
|
|
float tAx = Ax[idx];
|
|||
|
|
float tAy = Ay[idx];
|
|||
|
|
float tAz = Az[idx];
|
|||
|
|
float tBx = Bx[idx];
|
|||
|
|
float tBy = By[idx];
|
|||
|
|
float tBz = Bz[idx];
|
|||
|
|
float AR = sqrtf(powf(tAx, 2) + powf(tAy, 2) + powf(tAz, 2));
|
|||
|
|
float BR = sqrtf(powf(tBx, 2) + powf(tBy, 2) + powf(tBz, 2));
|
|||
|
|
float dotAB = tAx * tBx + tAy * tBy + tAz * tBz;
|
|||
|
|
float result = acosf(dotAB / (AR * BR));
|
|||
|
|
anglecos[idx] = result;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʾ
|
|||
|
|
extern "C" void checkCudaError(cudaError_t err, const char* msg) {
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
std::cerr << "CUDA error: " << msg << " (" << cudaGetErrorString(err) << ")" << std::endl;
|
|||
|
|
exit(EXIT_FAILURE);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
extern "C" void* mallocCUDAHost(long memsize) {
|
|||
|
|
void* ptr;
|
|||
|
|
cudaMallocHost(&ptr, memsize);
|
|||
|
|
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("mallocCUDAHost CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
return ptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4>ͷ<EFBFBD>
|
|||
|
|
extern "C" void FreeCUDAHost(void* ptr) {
|
|||
|
|
cudaFreeHost(ptr);
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("FreeCUDAHost CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// GPU<50><55><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
extern "C" void* mallocCUDADevice(long memsize) {
|
|||
|
|
void* ptr;
|
|||
|
|
cudaMalloc(&ptr, memsize);
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("mallocCUDADevice CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
return ptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// GPU<50><55><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4>ͷ<EFBFBD>
|
|||
|
|
extern "C" void FreeCUDADevice(void* ptr) {
|
|||
|
|
cudaFree(ptr);
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("FreeCUDADevice CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// GPU <20>ڴ<EFBFBD><DAB4><EFBFBD><EFBFBD><EFBFBD>ת<EFBFBD><D7AA>
|
|||
|
|
extern "C" void HostToDevice(void* hostptr, void* deviceptr, long memsize) {
|
|||
|
|
cudaMemcpy(deviceptr, hostptr, memsize, cudaMemcpyHostToDevice);
|
|||
|
|
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("HostToDevice CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
extern "C" void DeviceToHost(void* hostptr, void* deviceptr, long memsize) {
|
|||
|
|
cudaMemcpy(hostptr, deviceptr, memsize, cudaMemcpyDeviceToHost);
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("DeviceToHost CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>㺯<EFBFBD><E3BAAF>
|
|||
|
|
extern "C" void CUDAdistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long len) {
|
|||
|
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD><CDBF>ijߴ<C4B3>
|
|||
|
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
|
CUDA_DistanceAB << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, R, len);
|
|||
|
|
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
extern "C" void CUDABdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
|
|||
|
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD><CDBF>ijߴ<C4B3>
|
|||
|
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
|
CUDA_B_DistanceA << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, R, len);
|
|||
|
|
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
extern "C" void CUDAmake_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
|
|||
|
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD><CDBF>ijߴ<C4B3>
|
|||
|
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
|
CUDA_make_VectorA_B << <numBlocks, blockSize >> > (sX, sY, sZ, tX, tY, tZ, RstX, RstY, RstZ, len);
|
|||
|
|
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
extern "C" void CUDANorm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len) {
|
|||
|
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD><CDBF>ijߴ<C4B3>
|
|||
|
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
|
CUDA_Norm_Vector << <numBlocks, blockSize >> > (Vx, Vy, Vz, R, len);
|
|||
|
|
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
extern "C" void CUDAcosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len) {
|
|||
|
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
|
CUDA_cosAngle_VA_AB << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, anglecos, len);
|
|||
|
|
|
|||
|
|
#ifdef __CUDADEBUG__
|
|||
|
|
cudaError_t err = cudaGetLastError();
|
|||
|
|
if (err != cudaSuccess) {
|
|||
|
|
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
|
|||
|
|
// Possibly: exit(-1) if program cannot continue....
|
|||
|
|
}
|
|||
|
|
#endif // __CUDADEBUG__
|
|||
|
|
cudaDeviceSynchronize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
|