BaseLibraryCPP/GPUTool.cu

279 lines
9.6 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#include <iostream>
#include <memory>
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "BaseConstVariable.h"
#include "GPUTool.cuh"
#ifdef __CUDANVCC___
#define CUDAMEMORY Memory1MB*100
#define LAMP_CUDA_PI 3.141592653589793238462643383279
// ¶¨Òå²ÎÊý
__device__ cuComplex cuCexpf(cuComplex x)
{
float factor = exp(x.x);
return make_cuComplex(factor * cos(x.y), factor * sin(x.y));
}
__global__ void CUDA_DistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz,float *R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Ax[idx]-Bx[idx], 2) + powf(Ay[idx] - By[idx], 2) + powf(Az[idx] - Bz[idx], 2));
}
}
__global__ void CUDA_B_DistanceA(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Ax[idx] - Bx, 2) + powf(Ay[idx] - By, 2) + powf(Az[idx] - Bz, 2));
}
}
__global__ void CUDA_make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
RstX[idx] = sX - tX[idx];
RstY[idx] = sY - tY[idx];
RstZ[idx] = sZ - tZ[idx];
}
}
__global__ void CUDA_Norm_Vector(float* Vx, float* Vy, float* Vz,float *R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Vx[idx],2)+powf(Vy[idx],2)+powf(Vz[idx], 2));
}
}
__global__ void CUDA_cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos,long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float tAx = Ax[idx];
float tAy = Ay[idx];
float tAz = Az[idx];
float tBx = Bx[idx];
float tBy = By[idx];
float tBz = Bz[idx];
float AR = sqrtf(powf(tAx,2) + powf(tAy,2) + powf(tAz,2));
float BR = sqrtf(powf(tBx,2) + powf(tBy,2) + powf(tBz,2));
float dotAB = tAx * tBx + tAy * tBy + tAz * tBz;
float result =acosf( dotAB / (AR * BR));
anglecos[idx] = result;
}
}
__global__ void CUDA_SatelliteAntDirectNormal(float* RstX,float* RstY,float* RstZ,
float antXaxisX,float antXaxisY,float antXaxisZ,
float antYaxisX,float antYaxisY,float antYaxisZ,
float antZaxisX,float antZaxisY,float antZaxisZ,
float antDirectX,float antDirectY,float antDirectZ,
float* thetaAnt,float* phiAnt
, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float Xst = -1*RstX[idx]; // ÎÀÐÇ --> µØÃæ
float Yst = -1*RstY[idx];
float Zst = -1*RstZ[idx];
float AntXaxisX=antXaxisX ;
float AntXaxisY=antXaxisY ;
float AntXaxisZ=antXaxisZ ;
float AntYaxisX=antYaxisX ;
float AntYaxisY=antYaxisY ;
float AntYaxisZ=antYaxisZ ;
float AntZaxisX=antZaxisX ;
float AntZaxisY=antZaxisY ;
float AntZaxisZ=antZaxisZ ;
// ÌìÏßÖ¸ÏòÔÚÌìÏß×ø±êϵϵÄÖµ
float Xant = (Xst * (AntYaxisY * AntZaxisZ - AntYaxisZ * AntZaxisY) + Xst * ( AntXaxisZ * AntZaxisY - AntXaxisY * AntZaxisZ) + Xst * ( AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY)) / ( AntXaxisX * ( AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * ( AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * ( AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
float Yant = (Yst * (AntYaxisZ * AntZaxisX - AntYaxisX * AntZaxisZ) + Yst * ( AntXaxisX * AntZaxisZ - AntXaxisZ * AntZaxisX) + Yst * ( AntYaxisX * AntXaxisZ - AntXaxisX * AntYaxisZ)) / ( AntXaxisX * ( AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * ( AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * ( AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
float Zant = (Zst * (AntYaxisX * AntZaxisY - AntYaxisY * AntZaxisX) + Zst * ( AntXaxisY * AntZaxisX - AntXaxisX * AntZaxisY) + Zst * ( AntXaxisX * AntYaxisY - AntYaxisX * AntXaxisY)) / ( AntXaxisX * ( AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * ( AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * ( AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
// ¼ÆËãtheta Óë phi
float Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // ¼ÆËã pho
float ThetaAnt = acosf(Zant / Norm); // theta Óë ZÖáµÄ¼Ð½Ç
float YsinTheta = Yant / sinf(ThetaAnt);
float PhiAnt = (YsinTheta/abs(YsinTheta)) * acosf( Xant / (Norm * sinf(ThetaAnt)));
thetaAnt[idx] = ThetaAnt;
phiAnt[idx] = PhiAnt;
}
}
__global__ void CUDA_calculationEcho(float* sigma0, float* TransAnt, float* ReciveAnt,
float* localangle, float* R,float* slopeangle,
float nearRange, float Fs,float Pt,float lamda,long FreqIDmax,
cuComplex* echoArr , long* FreqID,
long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float r = R[idx];
float amp = Pt * TransAnt[idx] * ReciveAnt[idx];
amp= amp * sigma0[idx];
amp = amp / (powf(4* LAMP_CUDA_PI,2)*powf(r,4)); // ·´ÉäÇ¿¶È
// ´¦ÀíÏàλ
float phi = (-4 * LAMP_CUDA_PI / lamda) * r;
cuComplex echophi = make_cuComplex(0, phi) ;
cuComplex echophiexp = cuCexpf(echophi);
float timeR = 2 * (r - nearRange) / LIGHTSPEED * Fs;
long timeID = floorf(timeR);
if (timeID < 0 || timeID >= FreqIDmax) {
timeID = 0;
amp = 0;
}
cuComplex echo;
echo.x = echophiexp.x * amp;
echo.y = echophiexp.y * amp;
echoArr[idx] = echo;
}
}
//´íÎóÌáʾ
void checkCudaError(cudaError_t err, const char* msg) {
if (err != cudaSuccess) {
std::cerr << "CUDA error: " << msg << " (" << cudaGetErrorString(err) << ")" << std::endl;
exit(EXIT_FAILURE);
}
}
// Ö÷»ú²ÎÊýÄÚ´æÉùÃ÷
extern "C" void mallocCUDAHost(void* ptr, long memsize) {
cudaMallocHost(&ptr, memsize);
}
// Ö÷»ú²ÎÊýÄÚ´æÊÍ·Å
extern "C" void FreeCUDAHost(void* ptr) {
cudaFreeHost(ptr);
}
// GPU²ÎÊýÄÚ´æÉùÃ÷
extern "C" void mallocCUDADevice(void* ptr, long memsize) {
cudaMalloc(&ptr, memsize);
}
// GPU²ÎÊýÄÚ´æÊÍ·Å
extern "C" void FreeCUDADevice(void* ptr) {
cudaFree(ptr);
}
// GPU ÄÚ´æÊý¾Ý×ªÒÆ
extern "C" void HostToDevice(void* hostptr, void* deviceptr, long memsize) {
cudaMemcpy(deviceptr, hostptr, memsize, cudaMemcpyHostToDevice);
}
extern "C" void DeviceToHost(void* hostptr, void* deviceptr, long memsize) {
cudaMemcpy(hostptr, deviceptr, memsize, cudaMemcpyDeviceToHost);
}
extern "C" void distanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R,long len) {
// ÉèÖà CUDA ºËº¯ÊýµÄÍø¸ñºÍ¿éµÄ³ß´ç
int blockSize = 256; // ÿ¸ö¿éµÄÏß³ÌÊý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ù¾Ý pixelcount ¼ÆËãÍø¸ñ´óС
// µ÷Óà CUDA ºËº¯Êý
CUDA_DistanceAB << <blockSize, numBlocks >> > ( Ax, Ay, Az, Bx, By, Bz, R, len);
}
extern "C" void BdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
// ÉèÖà CUDA ºËº¯ÊýµÄÍø¸ñºÍ¿éµÄ³ß´ç
int blockSize = 256; // ÿ¸ö¿éµÄÏß³ÌÊý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ù¾Ý pixelcount ¼ÆËãÍø¸ñ´óС
// µ÷Óà CUDA ºËº¯Êý
CUDA_B_DistanceA << <blockSize, numBlocks >> > (Ax, Ay, Az, Bx, By, Bz, R, len);
cudaDeviceSynchronize();
}
extern "C" void make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
// ÉèÖà CUDA ºËº¯ÊýµÄÍø¸ñºÍ¿éµÄ³ß´ç
int blockSize = 256; // ÿ¸ö¿éµÄÏß³ÌÊý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ù¾Ý pixelcount ¼ÆËãÍø¸ñ´óС
// µ÷Óà CUDA ºËº¯Êý
CUDA_make_VectorA_B << <blockSize, numBlocks >> > (sX, sY, sZ,tX, tY, tZ, RstX,RstY, RstZ, len);
cudaDeviceSynchronize();
}
extern "C" void Norm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len) {
// ÉèÖà CUDA ºËº¯ÊýµÄÍø¸ñºÍ¿éµÄ³ß´ç
int blockSize = 256; // ÿ¸ö¿éµÄÏß³ÌÊý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ù¾Ý pixelcount ¼ÆËãÍø¸ñ´óС
// µ÷Óà CUDA ºËº¯Êý
CUDA_Norm_Vector << <blockSize, numBlocks >> > (Vx,Vy,Vz,R, len);
cudaDeviceSynchronize();
}
extern "C" void cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len) {
int blockSize = 256; // ÿ¸ö¿éµÄÏß³ÌÊý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ù¾Ý pixelcount ¼ÆËãÍø¸ñ´óС
// µ÷Óà CUDA ºËº¯Êý
CUDA_cosAngle_VA_AB << <blockSize, numBlocks >> > (Ax, Ay, Az, Bx, By, Bz, anglecos, len);
cudaDeviceSynchronize();
}
extern "C" void SatelliteAntDirectNormal(float* RstX, float* RstY, float* RstZ,
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ,
float* thetaAnt, float* phiAnt
, long len) {
int blockSize = 256; // ÿ¸ö¿éµÄÏß³ÌÊý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ù¾Ý pixelcount ¼ÆËãÍø¸ñ´óС
// µ÷Óà CUDA ºËº¯Êý
CUDA_SatelliteAntDirectNormal << <blockSize, numBlocks >> > ( RstX, RstY, RstZ,
antXaxisX, antXaxisY, antXaxisZ,
antYaxisX, antYaxisY, antYaxisZ,
antZaxisX, antZaxisY, antZaxisZ,
antDirectX, antDirectY, antDirectZ,
thetaAnt, phiAnt
, len);
cudaDeviceSynchronize();
}
extern "C" void calculationEcho(float* sigma0,float* TransAnt,float* ReciveAnt,
float* localangle,float* R, float* slopeangle,
float nearRange,float Fs, float pt, float lamda, long FreqIDmax,
cuComplex* echoAmp,long* FreqID,
long len)
{
int blockSize = 256; // ÿ¸ö¿éµÄÏß³ÌÊý
int numBlocks = (len + blockSize - 1) / blockSize; // ¸ù¾Ý pixelcount ¼ÆËãÍø¸ñ´óС
// µ÷Óà CUDA ºËº¯Êý
CUDA_calculationEcho << <blockSize, numBlocks >> > ( sigma0, TransAnt,ReciveAnt,
localangle, R, slopeangle,
nearRange, Fs, pt, lamda, FreqIDmax,
echoAmp, FreqID,
len);
cudaDeviceSynchronize();
}
#endif