279 lines
9.6 KiB
Plaintext
279 lines
9.6 KiB
Plaintext
|
|
|||
|
|
|||
|
#include <iostream>
|
|||
|
#include <memory>
|
|||
|
#include <cmath>
|
|||
|
#include <complex>
|
|||
|
#include <device_launch_parameters.h>
|
|||
|
#include <cuda_runtime.h>
|
|||
|
#include <cublas_v2.h>
|
|||
|
#include <cuComplex.h>
|
|||
|
|
|||
|
#include "BaseConstVariable.h"
|
|||
|
#include "GPUTool.cuh"
|
|||
|
|
|||
|
#ifdef __CUDANVCC___
|
|||
|
|
|||
|
#define CUDAMEMORY Memory1MB*100
|
|||
|
|
|||
|
#define LAMP_CUDA_PI 3.141592653589793238462643383279
|
|||
|
|
|||
|
|
|||
|
|
|||
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
__device__ cuComplex cuCexpf(cuComplex x)
|
|||
|
{
|
|||
|
float factor = exp(x.x);
|
|||
|
return make_cuComplex(factor * cos(x.y), factor * sin(x.y));
|
|||
|
}
|
|||
|
|
|||
|
__global__ void CUDA_DistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz,float *R, long len) {
|
|||
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
if (idx < len) {
|
|||
|
R[idx] = sqrtf(powf(Ax[idx]-Bx[idx], 2) + powf(Ay[idx] - By[idx], 2) + powf(Az[idx] - Bz[idx], 2));
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
__global__ void CUDA_B_DistanceA(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
|
|||
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
if (idx < len) {
|
|||
|
R[idx] = sqrtf(powf(Ax[idx] - Bx, 2) + powf(Ay[idx] - By, 2) + powf(Az[idx] - Bz, 2));
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
__global__ void CUDA_make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
|
|||
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
if (idx < len) {
|
|||
|
RstX[idx] = sX - tX[idx];
|
|||
|
RstY[idx] = sY - tY[idx];
|
|||
|
RstZ[idx] = sZ - tZ[idx];
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
__global__ void CUDA_Norm_Vector(float* Vx, float* Vy, float* Vz,float *R, long len) {
|
|||
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
if (idx < len) {
|
|||
|
R[idx] = sqrtf(powf(Vx[idx],2)+powf(Vy[idx],2)+powf(Vz[idx], 2));
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
__global__ void CUDA_cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos,long len) {
|
|||
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
if (idx < len) {
|
|||
|
float tAx = Ax[idx];
|
|||
|
float tAy = Ay[idx];
|
|||
|
float tAz = Az[idx];
|
|||
|
float tBx = Bx[idx];
|
|||
|
float tBy = By[idx];
|
|||
|
float tBz = Bz[idx];
|
|||
|
float AR = sqrtf(powf(tAx,2) + powf(tAy,2) + powf(tAz,2));
|
|||
|
float BR = sqrtf(powf(tBx,2) + powf(tBy,2) + powf(tBz,2));
|
|||
|
float dotAB = tAx * tBx + tAy * tBy + tAz * tBz;
|
|||
|
float result =acosf( dotAB / (AR * BR));
|
|||
|
anglecos[idx] = result;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
__global__ void CUDA_SatelliteAntDirectNormal(float* RstX,float* RstY,float* RstZ,
|
|||
|
float antXaxisX,float antXaxisY,float antXaxisZ,
|
|||
|
float antYaxisX,float antYaxisY,float antYaxisZ,
|
|||
|
float antZaxisX,float antZaxisY,float antZaxisZ,
|
|||
|
float antDirectX,float antDirectY,float antDirectZ,
|
|||
|
float* thetaAnt,float* phiAnt
|
|||
|
, long len) {
|
|||
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
if (idx < len) {
|
|||
|
float Xst = -1*RstX[idx]; // <20><><EFBFBD><EFBFBD> --> <20><><EFBFBD><EFBFBD>
|
|||
|
float Yst = -1*RstY[idx];
|
|||
|
float Zst = -1*RstZ[idx];
|
|||
|
float AntXaxisX=antXaxisX ;
|
|||
|
float AntXaxisY=antXaxisY ;
|
|||
|
float AntXaxisZ=antXaxisZ ;
|
|||
|
float AntYaxisX=antYaxisX ;
|
|||
|
float AntYaxisY=antYaxisY ;
|
|||
|
float AntYaxisZ=antYaxisZ ;
|
|||
|
float AntZaxisX=antZaxisX ;
|
|||
|
float AntZaxisY=antZaxisY ;
|
|||
|
float AntZaxisZ=antZaxisZ ;
|
|||
|
// <20><><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϵ<EFBFBD>µ<EFBFBD>ֵ
|
|||
|
float Xant = (Xst * (AntYaxisY * AntZaxisZ - AntYaxisZ * AntZaxisY) + Xst * ( AntXaxisZ * AntZaxisY - AntXaxisY * AntZaxisZ) + Xst * ( AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY)) / ( AntXaxisX * ( AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * ( AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * ( AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
|
|||
|
float Yant = (Yst * (AntYaxisZ * AntZaxisX - AntYaxisX * AntZaxisZ) + Yst * ( AntXaxisX * AntZaxisZ - AntXaxisZ * AntZaxisX) + Yst * ( AntYaxisX * AntXaxisZ - AntXaxisX * AntYaxisZ)) / ( AntXaxisX * ( AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * ( AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * ( AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
|
|||
|
float Zant = (Zst * (AntYaxisX * AntZaxisY - AntYaxisY * AntZaxisX) + Zst * ( AntXaxisY * AntZaxisX - AntXaxisX * AntZaxisY) + Zst * ( AntXaxisX * AntYaxisY - AntYaxisX * AntXaxisY)) / ( AntXaxisX * ( AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * ( AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * ( AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
|
|||
|
// <20><><EFBFBD><EFBFBD>theta <20><> phi
|
|||
|
float Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // <20><><EFBFBD><EFBFBD> pho
|
|||
|
float ThetaAnt = acosf(Zant / Norm); // theta <20><> Z<><5A><EFBFBD>ļн<C4BC>
|
|||
|
float YsinTheta = Yant / sinf(ThetaAnt);
|
|||
|
float PhiAnt = (YsinTheta/abs(YsinTheta)) * acosf( Xant / (Norm * sinf(ThetaAnt)));
|
|||
|
thetaAnt[idx] = ThetaAnt;
|
|||
|
phiAnt[idx] = PhiAnt;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
__global__ void CUDA_calculationEcho(float* sigma0, float* TransAnt, float* ReciveAnt,
|
|||
|
float* localangle, float* R,float* slopeangle,
|
|||
|
float nearRange, float Fs,float Pt,float lamda,long FreqIDmax,
|
|||
|
cuComplex* echoArr , long* FreqID,
|
|||
|
long len) {
|
|||
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|||
|
if (idx < len) {
|
|||
|
float r = R[idx];
|
|||
|
float amp = Pt * TransAnt[idx] * ReciveAnt[idx];
|
|||
|
amp= amp * sigma0[idx];
|
|||
|
amp = amp / (powf(4* LAMP_CUDA_PI,2)*powf(r,4)); // <20><><EFBFBD><EFBFBD>ǿ<EFBFBD><C7BF>
|
|||
|
|
|||
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>λ
|
|||
|
float phi = (-4 * LAMP_CUDA_PI / lamda) * r;
|
|||
|
cuComplex echophi = make_cuComplex(0, phi) ;
|
|||
|
cuComplex echophiexp = cuCexpf(echophi);
|
|||
|
|
|||
|
float timeR = 2 * (r - nearRange) / LIGHTSPEED * Fs;
|
|||
|
long timeID = floorf(timeR);
|
|||
|
if (timeID < 0 || timeID >= FreqIDmax) {
|
|||
|
timeID = 0;
|
|||
|
amp = 0;
|
|||
|
}
|
|||
|
|
|||
|
cuComplex echo;
|
|||
|
echo.x = echophiexp.x * amp;
|
|||
|
echo.y = echophiexp.y * amp;
|
|||
|
|
|||
|
|
|||
|
echoArr[idx] = echo;
|
|||
|
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʾ
|
|||
|
void checkCudaError(cudaError_t err, const char* msg) {
|
|||
|
if (err != cudaSuccess) {
|
|||
|
std::cerr << "CUDA error: " << msg << " (" << cudaGetErrorString(err) << ")" << std::endl;
|
|||
|
exit(EXIT_FAILURE);
|
|||
|
}
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
extern "C" void mallocCUDAHost(void* ptr, long memsize) {
|
|||
|
cudaMallocHost(&ptr, memsize);
|
|||
|
}
|
|||
|
|
|||
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4>ͷ<EFBFBD>
|
|||
|
extern "C" void FreeCUDAHost(void* ptr) {
|
|||
|
cudaFreeHost(ptr);
|
|||
|
}
|
|||
|
|
|||
|
// GPU<50><55><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
extern "C" void mallocCUDADevice(void* ptr, long memsize) {
|
|||
|
cudaMalloc(&ptr, memsize);
|
|||
|
}
|
|||
|
|
|||
|
// GPU<50><55><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4>ͷ<EFBFBD>
|
|||
|
extern "C" void FreeCUDADevice(void* ptr) {
|
|||
|
cudaFree(ptr);
|
|||
|
}
|
|||
|
|
|||
|
// GPU <20>ڴ<EFBFBD><DAB4><EFBFBD><EFBFBD><EFBFBD>ת<EFBFBD><D7AA>
|
|||
|
extern "C" void HostToDevice(void* hostptr, void* deviceptr, long memsize) {
|
|||
|
cudaMemcpy(deviceptr, hostptr, memsize, cudaMemcpyHostToDevice);
|
|||
|
}
|
|||
|
|
|||
|
extern "C" void DeviceToHost(void* hostptr, void* deviceptr, long memsize) {
|
|||
|
cudaMemcpy(hostptr, deviceptr, memsize, cudaMemcpyDeviceToHost);
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
extern "C" void distanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R,long len) {
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD><CDBF>ijߴ<C4B3>
|
|||
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
CUDA_DistanceAB << <blockSize, numBlocks >> > ( Ax, Ay, Az, Bx, By, Bz, R, len);
|
|||
|
}
|
|||
|
|
|||
|
extern "C" void BdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD><CDBF>ijߴ<C4B3>
|
|||
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
CUDA_B_DistanceA << <blockSize, numBlocks >> > (Ax, Ay, Az, Bx, By, Bz, R, len);
|
|||
|
cudaDeviceSynchronize();
|
|||
|
}
|
|||
|
|
|||
|
extern "C" void make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD><CDBF>ijߴ<C4B3>
|
|||
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
CUDA_make_VectorA_B << <blockSize, numBlocks >> > (sX, sY, sZ,tX, tY, tZ, RstX,RstY, RstZ, len);
|
|||
|
cudaDeviceSynchronize();
|
|||
|
}
|
|||
|
|
|||
|
extern "C" void Norm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len) {
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϳ<EFBFBD><CDBF>ijߴ<C4B3>
|
|||
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
CUDA_Norm_Vector << <blockSize, numBlocks >> > (Vx,Vy,Vz,R, len);
|
|||
|
cudaDeviceSynchronize();
|
|||
|
}
|
|||
|
|
|||
|
extern "C" void cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len) {
|
|||
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
CUDA_cosAngle_VA_AB << <blockSize, numBlocks >> > (Ax, Ay, Az, Bx, By, Bz, anglecos, len);
|
|||
|
cudaDeviceSynchronize();
|
|||
|
}
|
|||
|
|
|||
|
extern "C" void SatelliteAntDirectNormal(float* RstX, float* RstY, float* RstZ,
|
|||
|
float antXaxisX, float antXaxisY, float antXaxisZ,
|
|||
|
float antYaxisX, float antYaxisY, float antYaxisZ,
|
|||
|
float antZaxisX, float antZaxisY, float antZaxisZ,
|
|||
|
float antDirectX, float antDirectY, float antDirectZ,
|
|||
|
float* thetaAnt, float* phiAnt
|
|||
|
, long len) {
|
|||
|
|
|||
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
CUDA_SatelliteAntDirectNormal << <blockSize, numBlocks >> > ( RstX, RstY, RstZ,
|
|||
|
antXaxisX, antXaxisY, antXaxisZ,
|
|||
|
antYaxisX, antYaxisY, antYaxisZ,
|
|||
|
antZaxisX, antZaxisY, antZaxisZ,
|
|||
|
antDirectX, antDirectY, antDirectZ,
|
|||
|
thetaAnt, phiAnt
|
|||
|
, len);
|
|||
|
cudaDeviceSynchronize();
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
extern "C" void calculationEcho(float* sigma0,float* TransAnt,float* ReciveAnt,
|
|||
|
float* localangle,float* R, float* slopeangle,
|
|||
|
float nearRange,float Fs, float pt, float lamda, long FreqIDmax,
|
|||
|
cuComplex* echoAmp,long* FreqID,
|
|||
|
long len)
|
|||
|
{
|
|||
|
int blockSize = 256; // ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߳<EFBFBD><DFB3><EFBFBD>
|
|||
|
int numBlocks = (len + blockSize - 1) / blockSize; // <20><><EFBFBD><EFBFBD> pixelcount <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
|
|||
|
// <20><><EFBFBD><EFBFBD> CUDA <20>˺<EFBFBD><CBBA><EFBFBD>
|
|||
|
CUDA_calculationEcho << <blockSize, numBlocks >> > ( sigma0, TransAnt,ReciveAnt,
|
|||
|
localangle, R, slopeangle,
|
|||
|
nearRange, Fs, pt, lamda, FreqIDmax,
|
|||
|
echoAmp, FreqID,
|
|||
|
len);
|
|||
|
cudaDeviceSynchronize();
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
#endif
|