从BaseTool 中拆分除GPU的Base、RTPC、TBPImage 等GPU文件

pull/3/head
陈增辉 2024-12-24 15:27:09 +08:00
parent 16ed299e9f
commit 7ad0acdd5d
11 changed files with 995 additions and 1417 deletions

View File

@ -1,953 +0,0 @@
#include <iostream>
#include <memory>
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "BaseConstVariable.h"
#include "GPUTool.cuh"
#ifdef __CUDANVCC___
#define CUDAMEMORY Memory1MB*100
#define LAMP_CUDA_PI 3.141592653589793238462643383279
// 定义参数
__device__ cuComplex cuCexpf(cuComplex x)
{
float factor = exp(x.x);
return make_cuComplex(factor * cos(x.y), factor * sin(x.y));
}
// 定义仿真所需参数
__device__ float GPU_getSigma0dB(CUDASigmaParam param, float theta) {
float sigma= param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6);
return sigma;
}
__device__ CUDAVector GPU_VectorAB(CUDAVector A, CUDAVector B) {
CUDAVector C;
C.x = B.x - A.x;
C.y = B.y - A.y;
C.z = B.z - A.z;
return C;
}
__device__ float GPU_VectorNorm2(CUDAVector A) {
return sqrtf(A.x * A.x + A.y * A.y + A.z * A.z);
}
__device__ float GPU_dotVector(CUDAVector A, CUDAVector B) {
return A.x * B.x + A.y * B.y + A.z * B.z;
}
__device__ float GPU_CosAngle_VectorA_VectorB(CUDAVector A, CUDAVector B) {
return GPU_dotVector(A, B) / (GPU_VectorNorm2(A) * GPU_VectorNorm2(B));
}
__device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal(float RstX, float RstY, float RstZ,
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ
) {
CUDAVectorEllipsoidal result{ 0,0,-1 };
float Xst = -1 * RstX; // 卫星 --> 地面
float Yst = -1 * RstY;
float Zst = -1 * RstZ;
float AntXaxisX = antXaxisX;
float AntXaxisY = antXaxisY;
float AntXaxisZ = antXaxisZ;
float AntYaxisX = antYaxisX;
float AntYaxisY = antYaxisY;
float AntYaxisZ = antYaxisZ;
float AntZaxisX = antZaxisX;
float AntZaxisY = antZaxisY;
float AntZaxisZ = antZaxisZ;
// 天线指向在天线坐标系下的值
float Xant = (Xst * (AntYaxisY * AntZaxisZ - AntYaxisZ * AntZaxisY) + Xst * (AntXaxisZ * AntZaxisY - AntXaxisY * AntZaxisZ) + Xst * (AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY)) / (AntXaxisX * (AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * (AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * (AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
float Yant = (Yst * (AntYaxisZ * AntZaxisX - AntYaxisX * AntZaxisZ) + Yst * (AntXaxisX * AntZaxisZ - AntXaxisZ * AntZaxisX) + Yst * (AntYaxisX * AntXaxisZ - AntXaxisX * AntYaxisZ)) / (AntXaxisX * (AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * (AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * (AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
float Zant = (Zst * (AntYaxisX * AntZaxisY - AntYaxisY * AntZaxisX) + Zst * (AntXaxisY * AntZaxisX - AntXaxisX * AntZaxisY) + Zst * (AntXaxisX * AntYaxisY - AntYaxisX * AntXaxisY)) / (AntXaxisX * (AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * (AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * (AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
// 计算theta 与 phi
float Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
float ThetaAnt = acosf(Zant / Norm); // theta 与 Z轴的夹角
float YsinTheta = Yant / sinf(ThetaAnt);
float PhiAnt = (YsinTheta / abs(YsinTheta)) * acosf(Xant / (Norm * sinf(ThetaAnt)));
result.theta = ThetaAnt;
result.phi = PhiAnt;
result.pho = Norm;
return result;
}
/**
天线方向图插值方法以双线性插值算法为基础由theta与phi组合得到的矩阵图为基础数据通过插值计算的方法获取目标点的数据。
其中行是theta、列是phi
*/
__device__ float GPU_BillerInterpAntPattern(float* antpattern,
float starttheta, float startphi, float dtheta, float dphi,
long thetapoints, long phipoints,
float searththeta, float searchphi) {
float stheta = searththeta;
float sphi = searchphi;
if (stheta > 90) {
return 0;
}
else {}
float pthetaid = (stheta - starttheta) / dtheta;//
float pphiid = (sphi - startphi) / dphi;
long lasttheta = floorf(pthetaid);
long nextTheta = lasttheta + 1;
long lastphi = floorf(pphiid);
long nextPhi = lastphi + 1;
if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
{
return 0;
}
else {
float x = stheta;
float y = sphi;
float x1 = lasttheta * dtheta + starttheta;
float x2 = nextTheta * dtheta + starttheta;
float y1 = lastphi * dphi + startphi;
float y2 = nextPhi * dphi + startphi;
float z11 = antpattern[lasttheta * phipoints + lastphi];
float z12 = antpattern[lasttheta * phipoints + nextPhi];
float z21 = antpattern[nextTheta * phipoints + lastphi];
float z22 = antpattern[nextTheta * phipoints + nextPhi];
//z11 = powf(10, z11 / 10); // dB-> 线性
//z12 = powf(10, z12 / 10);
//z21 = powf(10, z21 / 10);
//z22 = powf(10, z22 / 10);
float GainValue = (z11 * (x2 - x) * (y2 - y)
+ z21 * (x - x1) * (y2 - y)
+ z12 * (x2 - x) * (y - y1)
+ z22 * (x - x1) * (y - y1));
GainValue = GainValue / ((x2 - x1) * (y2 - y1));
return GainValue;
}
}
__device__ cuComplex GPU_calculationEcho(float sigma0, float TransAnt, float ReciveAnt,
float localangle, float R, float slopeangle, float Pt, float lamda) {
float r = R;
float amp = Pt * TransAnt * ReciveAnt;
amp = amp * sigma0;
amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(r, 4)); // 反射强度
float phi = (-4 * LAMP_CUDA_PI / lamda) * r;
cuComplex echophi = make_cuComplex(0, phi);
cuComplex echophiexp = cuCexpf(echophi);
cuComplex echo;
echo.x = echophiexp.x * amp;
echo.y = echophiexp.y * amp;
return echo;
}
__global__ void CUDA_DistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Ax[idx] - Bx[idx], 2) + powf(Ay[idx] - By[idx], 2) + powf(Az[idx] - Bz[idx], 2));
}
}
__global__ void CUDA_B_DistanceA(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Ax[idx] - Bx, 2) + powf(Ay[idx] - By, 2) + powf(Az[idx] - Bz, 2));
}
}
__global__ void CUDA_make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
RstX[idx] = sX - tX[idx]; // 地面->天
RstY[idx] = sY - tY[idx];
RstZ[idx] = sZ - tZ[idx];
}
}
__global__ void CUDA_Norm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Vx[idx], 2) + powf(Vy[idx], 2) + powf(Vz[idx], 2));
}
}
__global__ void CUDA_cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float tAx = Ax[idx];
float tAy = Ay[idx];
float tAz = Az[idx];
float tBx = Bx[idx];
float tBy = By[idx];
float tBz = Bz[idx];
float AR = sqrtf(powf(tAx, 2) + powf(tAy, 2) + powf(tAz, 2));
float BR = sqrtf(powf(tBx, 2) + powf(tBy, 2) + powf(tBz, 2));
float dotAB = tAx * tBx + tAy * tBy + tAz * tBz;
float result = acosf(dotAB / (AR * BR));
anglecos[idx] = result;
}
}
__global__ void CUDA_SatelliteAntDirectNormal(float* RstX, float* RstY, float* RstZ,
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ,
float* thetaAnt, float* phiAnt
, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float Xst = -1 * RstX[idx]; // 卫星 --> 地面
float Yst = -1 * RstY[idx];
float Zst = -1 * RstZ[idx];
float AntXaxisX = antXaxisX;
float AntXaxisY = antXaxisY;
float AntXaxisZ = antXaxisZ;
float AntYaxisX = antYaxisX;
float AntYaxisY = antYaxisY;
float AntYaxisZ = antYaxisZ;
float AntZaxisX = antZaxisX;
float AntZaxisY = antZaxisY;
float AntZaxisZ = antZaxisZ;
// 归一化
float RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
float AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
float AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
float AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);
float Rx = Xst / RstNorm;
float Ry = Yst / RstNorm;
float Rz = Zst / RstNorm;
float Xx = AntXaxisX / AntXaxisNorm;
float Xy = AntXaxisY / AntXaxisNorm;
float Xz = AntXaxisZ / AntXaxisNorm;
float Yx = AntYaxisX / AntYaxisNorm;
float Yy = AntYaxisY / AntYaxisNorm;
float Yz = AntYaxisZ / AntYaxisNorm;
float Zx = AntZaxisX / AntZaxisNorm;
float Zy = AntZaxisY / AntZaxisNorm;
float Zz = AntZaxisZ / AntZaxisNorm;
float Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
float Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
float Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
// 计算theta 与 phi
float Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
float ThetaAnt = acosf(Zant / Norm); // theta 与 Z轴的夹角
float PhiAnt = atanf(Yant / Xant); // -pi/2 ~pi/2
if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上
PhiAnt = 0;
}
else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上原点
if (Yant > 0) {
PhiAnt = PI / 2;
}
else {
PhiAnt = -PI / 2;
}
}
else if (Xant < 0) {
if (Yant > 0) {
PhiAnt = PI + PhiAnt;
}
else {
PhiAnt = -PI+PhiAnt ;
}
}
else { // Xant>0 X 正轴
}
if (isnan(PhiAnt)) {
printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant,Norm, ThetaAnt, PhiAnt);
}
//if (abs(ThetaAnt - 0) < PRECISIONTOLERANCE) {
// PhiAnt = 0;
//}
//else {}
thetaAnt[idx] = ThetaAnt*r2d;
phiAnt[idx] = PhiAnt*r2d;
//printf("Rst=[%f,%f,%f];AntXaxis = [%f, %f, %f];AntYaxis=[%f,%f,%f];AntZaxis=[%f,%f,%f];phiAnt=%f;thetaAnt=%f;\n", Xst, Yst, Zst
// , AntXaxisX, AntXaxisY, AntXaxisZ
// , AntYaxisX, AntYaxisY, AntYaxisZ
// , AntZaxisX, AntZaxisY, AntZaxisZ
// , phiAnt[idx]
// , thetaAnt[idx]
//);
}
}
__global__ void CUDA_BillerInterpAntPattern(float* antpattern,
float starttheta, float startphi, float dtheta, float dphi,
long thetapoints, long phipoints,
float* searththeta, float* searchphi, float* searchantpattern,
long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float stheta = searththeta[idx];
float sphi = searchphi[idx];
float pthetaid = (stheta - starttheta) / dtheta;//
float pphiid = (sphi - startphi) / dphi;
long lasttheta = floorf(pthetaid);
long nextTheta = lasttheta + 1;
long lastphi = floorf(pphiid);
long nextPhi = lastphi + 1;
if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
{
searchantpattern[idx] = 0;
}
else {
float x = stheta;
float y = sphi;
float x1 = lasttheta * dtheta + starttheta;
float x2 = nextTheta * dtheta + starttheta;
float y1 = lastphi * dphi + startphi;
float y2 = nextPhi * dphi + startphi;
float z11 = antpattern[lasttheta * phipoints + lastphi];
float z12 = antpattern[lasttheta * phipoints + nextPhi];
float z21 = antpattern[nextTheta * phipoints + lastphi];
float z22 = antpattern[nextTheta * phipoints + nextPhi];
z11 = powf(10, z11 / 10);
z12 = powf(10, z12 / 10);
z21 = powf(10, z21 / 10);
z22 = powf(10, z22 / 10);
float GainValue = (z11 * (x2 - x) * (y2 - y)
+ z21 * (x - x1) * (y2 - y)
+ z12 * (x2 - x) * (y - y1)
+ z22 * (x - x1) * (y - y1));
GainValue = GainValue / ((x2 - x1) * (y2 - y1));
searchantpattern[idx] = GainValue;
}
}
}
__global__ void CUDA_Test_HelloWorld(float a, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
printf("\nidx:\t %d %d \n", idx, len);
}
__global__ void CUDA_RTPC(
float antPx, float antPy, float antPz,// 天线坐标
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ,
float* demx, float* demy, float* demz, long* demcls,
float* demslopex, float* demslopey, float* demslopez, float* demslopeangle,
float* Tantpattern, float Tstarttheta, float Tstartphi, float Tdtheta, float Tdphi, long Tthetapoints, long Tphipoints,
float* Rantpattern, float Rstarttheta, float Rstartphi, float Rdtheta, float Rdphi, long Rthetapoints, long Rphipoints,
float lamda, float fs, float nearrange, float Pt, long Freqnumbers, // 参数
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,// 地表覆盖类型-sigma插值对应函数-ulaby
cuComplex* outecho, int* d_echoAmpFID,
int linecount,int plusepoint) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
//printf("\nidx:\t %d %d %d\n", idx, linecount, plusepoint);
if (idx < linecount* plusepoint) {
long clsid = demcls[idx];
CUDAVector Rs{ antPx,antPy,antPz };
CUDAVector Rt{ demx[idx],demy[idx],demz[idx] };
CUDAVector Rst{ Rs.x - Rt.x,Rs.y - Rt.y,Rs.z - Rt.z };
CUDAVector Vslope{ demslopex[idx],demslopey[idx],demslopez[idx] };
float R = GPU_VectorNorm2(Rst); // 斜距
float slopeangle = demslopeangle[idx];
CUDAVectorEllipsoidal Rtanttheta = GPU_SatelliteAntDirectNormal( // 地面目标在天线的位置
Rst.x, Rst.y, Rst.z,
antXaxisX, antXaxisY, antXaxisZ,
antYaxisX, antYaxisY, antYaxisZ,
antZaxisX, antZaxisY, antZaxisZ,
antDirectX, antDirectY, antDirectZ);
float localangle = GPU_CosAngle_VectorA_VectorB(Rst, Vslope); // 距地入射角
float sigma = GPU_getSigma0dB(sigma0Paramslist[clsid], localangle * r2d);
sigma = powf(10.0, sigma / 10.0);// 后向散射系数
//printf("\ntheta: %f\t,%f ,%f ,%f ,%f ,%f ,%f \n", localangle * r2d, sigma0Paramslist[clsid].p1, sigma0Paramslist[clsid].p2, sigma0Paramslist[clsid].p3,
// sigma0Paramslist[clsid].p4, sigma0Paramslist[clsid].p5, sigma0Paramslist[clsid].p6);
// 发射方向图
float transPattern = GPU_BillerInterpAntPattern(Tantpattern,
Tstarttheta, Tstartphi, Tdtheta, Tdphi, Tthetapoints, Tphipoints,
Rtanttheta.theta, Rtanttheta.phi) * r2d;
// 接收方向图
float receivePattern = GPU_BillerInterpAntPattern(Rantpattern,
Rstarttheta, Rstartphi, Rdtheta, Rdphi, Rthetapoints, Rphipoints,
Rtanttheta.theta, Rtanttheta.phi) * r2d;
// 计算振幅、相位
float amp = Pt * transPattern * receivePattern * sigma * (1 / cos(slopeangle) * sin(localangle));
amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4));
float phi = (-4 * LAMP_CUDA_PI / lamda) * R;
// 构建回波
cuComplex echophi = make_cuComplex(0, phi);
cuComplex echophiexp = cuCexpf(echophi);
float timeR = 2 * (R - nearrange) / LIGHTSPEED * fs;
long timeID = floorf(timeR);
if (timeID < 0 || timeID >= Freqnumbers) {
timeID = 0;
amp = 0;
}
else {}
cuComplex echo;
echo.x = echophiexp.x * amp;
echo.y = echophiexp.y * amp;
outecho[idx] = echo;
d_echoAmpFID[idx] = timeID;
}
}
__global__ void CUDA_TBPImage(
float* antPx, float* antPy, float* antPz,
float* imgx, float* imgy, float* imgz,
cuComplex* echoArr, cuComplex* imgArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfid, long freqcount
) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
//printf("\nidx:\t %d %d %d\n", idx, linecount, plusepoint);
if (idx < rowcount * colcount) {
float R = sqrtf(powf(antPx[prfid] - imgx[idx], 2) + powf(antPy[prfid] - imgy[idx], 2) + powf(antPz[prfid] - imgz[idx], 2));
float Ridf = ((R - Rnear) * 2 / LIGHTSPEED) * fs;
long Rid = floorf(Ridf);
if(Rid <0|| Rid >= freqcount){}
else {
float factorj = freq * 4 * PI / LIGHTSPEED;
cuComplex Rphi =cuCexpf(make_cuComplex(0, factorj * R));// 校正项
imgArr[idx] = cuCaddf(imgArr[idx], cuCmulf(echoArr[Rid] , Rphi));// 矫正
}
}
}
__global__ void CUDA_calculationEcho(float* sigma0, float* TransAnt, float* ReciveAnt,
float* localangle, float* R, float* slopeangle,
float nearRange, float Fs, float Pt, float lamda, long FreqIDmax,
cuComplex* echoArr, long* FreqID,
long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float r = R[idx];
float amp = Pt * TransAnt[idx] * ReciveAnt[idx];
amp = amp * sigma0[idx];
amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(r, 4)); // 反射强度
// 处理相位
float phi = (-4 * LAMP_CUDA_PI / lamda) * r;
cuComplex echophi = make_cuComplex(0, phi);
cuComplex echophiexp = cuCexpf(echophi);
float timeR = 2 * (r - nearRange) / LIGHTSPEED * Fs;
long timeID = floorf(timeR);
if (timeID < 0 || timeID >= FreqIDmax) {
timeID = 0;
amp = 0;
}
cuComplex echo;
echo.x = echophiexp.x * amp;
echo.y = echophiexp.y * amp;
echoArr[idx] = echo;
FreqID[idx] = timeID;
}
}
__global__ void CUDA_AntPatternInterpGain(float* anttheta, float* antphi, float* gain,
float* antpattern, float starttheta, float startphi, float dtheta, float dphi, int thetapoints, int phipoints, long len) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float temptheta = anttheta[idx];
float tempphi = antphi[idx];
float antPatternGain = GPU_BillerInterpAntPattern(antpattern,
starttheta, startphi, dtheta, dphi, thetapoints, phipoints,
temptheta, tempphi) ;
gain[idx] = antPatternGain;
}
}
//__global__ void Sigma0InterpPixel(long* demcls, float* demslopeangle, CUDASigmaParam* sigma0Paramslist, float* localangle, float* sigma0list, long sigmaparamslistlen, long len)
//{
// long idx = blockIdx.x * blockDim.x + threadIdx.x;
// if (idx < len) {
// long clsid = demcls[idx];
// if(clsid<=)
// sigma0list[idx] = 0;
// }
//}
__global__ void CUDA_InterpSigma(
long* demcls, float* sigmaAmp, float* localanglearr, long len,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
long clsid = demcls[idx];
float localangle = localanglearr[idx] * r2d;
CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
//printf("cls:%d;localangle=%f;\n",clsid, localangle);
if (localangle < 0 || localangle >= 90) {
sigmaAmp[idx] = 0;
}
else {}
if (abs(tempsigma.p1)< PRECISIONTOLERANCE&&
abs(tempsigma.p2) < PRECISIONTOLERANCE &&
abs(tempsigma.p3) < PRECISIONTOLERANCE &&
abs(tempsigma.p4) < PRECISIONTOLERANCE&&
abs(tempsigma.p5) < PRECISIONTOLERANCE&&
abs(tempsigma.p6) < PRECISIONTOLERANCE
) {
sigmaAmp[idx] = 0;
}
else {
float sigma = GPU_getSigma0dB(tempsigma, localangle);
sigma = powf(10.0, sigma / 10.0);// 后向散射系数
//printf("cls:%d;localangle=%f;sigma0=%f;\n", clsid, localangle, sigma);
sigmaAmp[idx] = sigma;
}
}
}
//错误提示
void checkCudaError(cudaError_t err, const char* msg) {
if (err != cudaSuccess) {
std::cerr << "CUDA error: " << msg << " (" << cudaGetErrorString(err) << ")" << std::endl;
exit(EXIT_FAILURE);
}
}
// 主机参数内存声明
extern "C" void* mallocCUDAHost(long memsize) {
void* ptr;
cudaMallocHost(&ptr, memsize);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("mallocCUDAHost CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
return ptr;
}
// 主机参数内存释放
extern "C" void FreeCUDAHost(void* ptr) {
cudaFreeHost(ptr);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("FreeCUDAHost CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
// GPU参数内存声明
extern "C" void* mallocCUDADevice(long memsize) {
void* ptr;
cudaMalloc(&ptr, memsize);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("mallocCUDADevice CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
return ptr;
}
// GPU参数内存释放
extern "C" void FreeCUDADevice(void* ptr) {
cudaFree(ptr);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("FreeCUDADevice CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
// GPU 内存数据转移
extern "C" void HostToDevice(void* hostptr, void* deviceptr, long memsize) {
cudaMemcpy(deviceptr, hostptr, memsize, cudaMemcpyHostToDevice);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("HostToDevice CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void DeviceToHost(void* hostptr, void* deviceptr, long memsize) {
cudaMemcpy(hostptr, deviceptr, memsize, cudaMemcpyDeviceToHost);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("DeviceToHost CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDATestHelloWorld(float a,long len) {
// 设置 CUDA 核函数的网格和块的尺寸
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_Test_HelloWorld << <numBlocks, blockSize >> > (a, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("FreeCUDADevice CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
void CUDATBPImage(float* antPx, float* antPy, float* antPz,
float* imgx, float* imgy, float* imgz,
cuComplex* echoArr, cuComplex* imgArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfid, long freqcount)
{
int blockSize = 256; // 每个块的线程数
int numBlocks = (rowcount * colcount + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
//printf("\nCUDA_RTPC_SiglePRF blockSize:%d ,numBlock:%d\n",blockSize,numBlocks);
// 调用 CUDA 核函数 CUDA_RTPC_Kernel
CUDA_TBPImage << <numBlocks, blockSize >> > (
antPx, antPy, antPz,
imgx, imgy, imgz,
echoArr, imgArr,
freq, fs, Rnear, Rfar,
rowcount, colcount,
prfid, freqcount
);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDATBPImage CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void distanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long len) {
// 设置 CUDA 核函数的网格和块的尺寸
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_DistanceAB << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, R, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void BdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
// 设置 CUDA 核函数的网格和块的尺寸
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_B_DistanceA << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, R, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
// 设置 CUDA 核函数的网格和块的尺寸
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_make_VectorA_B << <numBlocks, blockSize >> > (sX, sY, sZ, tX, tY, tZ, RstX, RstY, RstZ, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void Norm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len) {
// 设置 CUDA 核函数的网格和块的尺寸
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_Norm_Vector << <numBlocks, blockSize >> > (Vx, Vy, Vz, R, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len) {
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_cosAngle_VA_AB << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, anglecos, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void SatelliteAntDirectNormal(float* RstX, float* RstY, float* RstZ,
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ,
float* thetaAnt, float* phiAnt
, long len) {
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_SatelliteAntDirectNormal << <numBlocks, blockSize >> > (RstX, RstY, RstZ,
antXaxisX, antXaxisY, antXaxisZ,
antYaxisX, antYaxisY, antYaxisZ,
antZaxisX, antZaxisY, antZaxisZ,
antDirectX, antDirectY, antDirectZ,
thetaAnt, phiAnt
, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void AntPatternInterpGain(float* anttheta, float* antphi, float* gain,
float* antpattern, float starttheta, float startphi, float dtheta, float dphi, int thetapoints, int phipoints, long len) {
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
//printf("\nCUDA_RTPC_SiglePRF blockSize:%d ,numBlock:%d\n", blockSize, numBlocks);
CUDA_AntPatternInterpGain << <numBlocks, blockSize >> > ( anttheta,antphi, gain,
antpattern,
starttheta, startphi, dtheta, dphi, thetapoints, phipoints,
len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDARTPCPRF(float antPx, long len) {
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
printf("\nCUDA_RTPC_SiglePRF blockSize:%d ,numBlock:%d\n", blockSize, numBlocks);
CUDA_Test_HelloWorld << <numBlocks, blockSize >> > (antPx, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void calculationEcho(float* sigma0, float* TransAnt, float* ReciveAnt,
float* localangle, float* R, float* slopeangle,
float nearRange, float Fs, float pt, float lamda, long FreqIDmax,
cuComplex* echoAmp, long* FreqID,
long len)
{
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_calculationEcho << <numBlocks, blockSize >> > (sigma0, TransAnt, ReciveAnt,
localangle, R, slopeangle,
nearRange, Fs, pt, lamda, FreqIDmax,
echoAmp, FreqID,
len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDA_RTPC_SiglePRF(
float antPx, float antPy, float antPZ,
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ,
float* demx, float* demy, float* demz, long* demcls,
float* demslopex, float* demslopey, float* demslopez, float* demslopeangle,
float* Tantpattern, float Tstarttheta, float Tstartphi, float Tdtheta, float Tdphi, int Tthetapoints, int Tphipoints,
float* Rantpattern, float Rstarttheta, float Rstartphi, float Rdtheta, float Rdphi, int Rthetapoints, int Rphipoints,
float lamda, float fs, float nearrange, float Pt, int Freqnumbers,
CUDASigmaParam* sigma0Paramslist, int sigmaparamslistlen,
cuComplex* outecho, int* d_echoAmpFID,
int linecount,int colcount) {
int blockSize = 256; // 每个块的线程数
int numBlocks = (linecount* colcount + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
//printf("\nCUDA_RTPC_SiglePRF blockSize:%d ,numBlock:%d\n",blockSize,numBlocks);
// 调用 CUDA 核函数 CUDA_RTPC_Kernel
CUDA_RTPC << <numBlocks, blockSize >> > (
antPx, antPy, antPZ,// 天线坐标
antXaxisX, antXaxisY, antXaxisZ, // 天线坐标系
antYaxisX, antYaxisY, antYaxisZ, //
antZaxisX, antZaxisY, antZaxisZ,
antDirectX, antDirectY, antDirectZ,// 天线指向
demx, demy, demz,
demcls, // 地面坐标
demslopex, demslopey, demslopez, demslopeangle,// 地面坡度
Tantpattern, Tstarttheta, Tstartphi, Tdtheta, Tdphi, Tthetapoints, Tphipoints,// 天线方向图相关
Rantpattern, Rstarttheta, Rstartphi, Rdtheta, Rdphi, Rthetapoints, Rphipoints,// 天线方向图相关
lamda, fs, nearrange, Pt, Freqnumbers, // 参数
sigma0Paramslist, sigmaparamslistlen,// 地表覆盖类型-sigma插值对应函数-ulaby
outecho, d_echoAmpFID,
linecount, colcount
);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDAInterpSigma(
long* demcls,float* sigmaAmp, float* localanglearr,long len,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen) {// 地表覆盖类型-sigma插值对应函数-ulaby
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_InterpSigma << <numBlocks, blockSize >> > (
demcls, sigmaAmp, localanglearr, len,
sigma0Paramslist, sigmaparamslistlen
);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
#endif

View File

@ -1,452 +0,0 @@
#ifndef GPUTOOL_H
#define GPUTOOL_H
#ifdef __CUDANVCC___
#include "BaseConstVariable.h"
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#define __CUDADEBUG__
// 默认显存分布
enum LAMPGPUDATETYPE {
LAMP_LONG,
LAMP_FLOAT,
LAMP_COMPLEXFLOAT
};
extern "C" struct CUDASigmaParam {
float p1;
float p2;
float p3;
float p4;
float p5;
float p6;
};
extern "C" struct CUDAVector {
float x;
float y;
float z;
};
extern "C" struct CUDAVectorEllipsoidal {
float theta;
float phi;
float pho;
};
// GPU 内存函数
extern "C" void* mallocCUDAHost( long memsize); // 主机内存声明
extern "C" void FreeCUDAHost(void* ptr);
extern "C" void* mallocCUDADevice( long memsize); // GPU内存声明
extern "C" void FreeCUDADevice(void* ptr);
extern "C" void HostToDevice(void* hostptr, void* deviceptr, long memsize);//GPU 内存数据转移 设备 -> GPU
extern "C" void DeviceToHost(void* hostptr, void* deviceptr, long memsize);//GPU 内存数据转移 GPU -> 设备
// 仿真所需的常用函数
extern "C" void distanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long member);
extern "C" void BdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long member);
extern "C" void make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long member);
extern "C" void Norm_Vector(float* Vx, float* Vy, float* Vz, float* R, long member);
extern "C" void cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len);
extern "C" void SatelliteAntDirectNormal(float* RstX, float* RstY, float* RstZ, float antXaxisX, float antXaxisY, float antXaxisZ, float antYaxisX, float antYaxisY, float antYaxisZ, float antZaxisX, float antZaxisY, float antZaxisZ, float antDirectX, float antDirectY, float antDirectZ, float* thetaAnt, float* phiAnt, long len);
extern "C" void AntPatternInterpGain(float* anttheta, float* antphi, float* gain, float* antpattern, float starttheta, float startphi, float dtheta, float dphi, int thetapoints, int phipoints,long len);
extern "C" void CUDA_RTPC_SiglePRF(
float antPx, float antPy, float antPZ,
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ,
float* demx, float* demy, float* demz, long* demcls,
float* demslopex, float* demslopey, float* demslopez, float* demslopeangle,
float* Tantpattern, float Tstarttheta, float Tstartphi, float Tdtheta, float Tdphi, int Tthetapoints, int Tphipoints,
float* Rantpattern, float Rstarttheta, float Rstartphi, float Rdtheta, float Rdphi, int Rthetapoints, int Rphipoints,
float lamda, float fs, float nearrange, float Pt, int Freqnumbers,
CUDASigmaParam* sigma0Paramslist, int sigmaparamslistlen,
cuComplex* outecho, int* d_echoAmpFID,
int linecount, int colcount
);
extern "C" void CUDARTPCPRF(float antPx, long len);
extern "C" void CUDATestHelloWorld(float a, long len);
extern "C" void CUDATBPImage(
float* antPx,
float* antPy,
float* antPz,
float* imgx,
float* imgy,
float* imgz,
cuComplex* echoArr,
cuComplex* imgArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfid, long freqcount
);
extern "C" void calculationEcho(float* sigma0, float* TransAnt, float* ReciveAnt,
float* localangle, float* R, float* slopeangle,
float nearRange, float Fs, float pt, float lamda, long FreqIDmax,
cuComplex* echoAmp, long* FreqID,
long len);
extern "C" void CUDAInterpSigma(
long* demcls, float* sigmaAmp, float* localanglearr, long len,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen);
#endif
#endif
/**
*
double* databuffer = new double[nXSize * nYSize * 2];
poBand->RasterIO(GF_Read, start_col, start_row, cols_count, rows_count, databuffer, cols_count,
rows_count, GDT_CFloat64, 0, 0);
GDALClose((GDALDatasetH)poDataset);
Eigen::MatrixXcd rasterData(nYSize, nXSize); // 使用Eigen的MatrixXcd
for(size_t i = 0; i < nYSize; i++) {
for(size_t j = 0; j < nXSize; j++) {
rasterData(i, j) = std::complex<double>(databuffer[i * nXSize * 2 + j * 2],
databuffer[i * nXSize * 2 + j * 2 + 1]);
}
}
delete[] databuffer;
gdalImage demxyz(demxyzPath);// 地面点坐标
gdalImage demlandcls(this->LandCoverPath);// 地表覆盖类型
gdalImage demsloperxyz(this->demsloperPath);// 地面坡向
omp_lock_t lock; // 定义锁
omp_init_lock(&lock); // 初始化锁
long start_ids = 1250;
for (start_ids = 1; start_ids < demxyz.height; start_ids = start_ids + line_invert) { // 8+ 17 + 0.3 MB
QDateTime current = QDateTime::currentDateTime();
long pluseStep = Memory1MB * 100 / 3 / PlusePoint;
if (pluseStep * num_thread * 3 > this->PluseCount) {
pluseStep = this->PluseCount / num_thread / 3;
}
pluseStep = pluseStep > 50 ? pluseStep : 50;
qDebug() << current.toString("yyyy-MM-dd HH:mm:ss.zzz") << " \tstart \t " << start_ids << " - " << start_ids + line_invert << "\t" << demxyz.height << "\t pluseCount:\t" << pluseStep;
// 文件读取
Eigen::MatrixXd dem_x = demxyz.getData(start_ids - 1, 0, line_invert + 1, demxyz.width, 1); //
Eigen::MatrixXd dem_y = demxyz.getData(start_ids - 1, 0, line_invert + 1, demxyz.width, 2); //
Eigen::MatrixXd dem_z = demxyz.getData(start_ids - 1, 0, line_invert + 1, demxyz.width, 3); //
// 地表覆盖
std::shared_ptr<long> dem_landcls = readDataArr<long>(demlandcls, start_ids - 1, 0, line_invert + 1, demxyz.width, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); // 地表覆盖类型
long* dem_landcls_ptr = dem_landcls.get();
double localAngle = 30.0;
bool sigmaNoZeroFlag = true;
for (long ii = 0; ii < dem_x.rows(); ii++) {
for (long jj = 0; jj < dem_y.cols(); jj++) {
if (0 != this->SigmaDatabasePtr->getAmp(dem_landcls_ptr[dem_x.cols() * ii + jj], localAngle, polartype)) {
sigmaNoZeroFlag = false;
break;
}
}
if (!sigmaNoZeroFlag) {
break;
}
}
if (sigmaNoZeroFlag) {
continue;
}
//#ifdef DEBUGSHOWDIALOG
// dialog->load_double_MatrixX_data(dem_z, "dem_z");
//#endif
Eigen::MatrixXd demsloper_x = demsloperxyz.getData(start_ids - 1, 0, line_invert + 1, demxyz.width, 1); //
Eigen::MatrixXd demsloper_y = demsloperxyz.getData(start_ids - 1, 0, line_invert + 1, demxyz.width, 2); //
Eigen::MatrixXd demsloper_z = demsloperxyz.getData(start_ids - 1, 0, line_invert + 1, demxyz.width, 3); //
Eigen::MatrixXd sloperAngle = demsloperxyz.getData(start_ids - 1, 0, line_invert + 1, demxyz.width, 4); //
sloperAngle = sloperAngle.array() * T180_PI;
long dem_rows = dem_x.rows();
long dem_cols = dem_x.cols();
long freqidx = 0;//
#ifdef DEBUGSHOWDIALOG
ImageShowDialogClass* dialog = new ImageShowDialogClass(nullptr);
dialog->show();
Eigen::MatrixXd landaArr = Eigen::MatrixXd::Zero(dem_rows, dem_cols);
for (long i = 0; i < dem_rows; i++) {
for (long j = 0; j < dem_cols; j++) {
landaArr(i, j) = dem_landcls.get()[i * dem_cols + j];
}
}
dialog->load_double_MatrixX_data(landaArr, "landCover");
#endif
//qDebug() << " pluse bolck size :\t " << pluseStep << " all size:\t" << this->PluseCount;
long processNumber = 0;
#pragma omp parallel for
for (long startprfidx = 0; startprfidx < this->PluseCount; startprfidx = startprfidx + pluseStep) { // 17 + 0.3 MB
long prfcount_step = startprfidx + pluseStep < this->PluseCount ? pluseStep : this->PluseCount - startprfidx;
Eigen::MatrixXcd echoPluse = Eigen::MatrixXcd::Zero(prfcount_step, PlusePoint); // 当前脉冲的回波积分情况
// 内存预分配
Eigen::MatrixXd Rst_x = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd Rst_y = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd Rst_z = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd R = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd localangle = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd Vst_x = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd Vst_y = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd Vst_z = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd fde = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd fr = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd Rx = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd sigam = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd echoAmp = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols()).array() + Pt;
Eigen::MatrixXd Rphi = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd TimeRange = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd TransAnt = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd ReciveAnt = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd AntTheta = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
Eigen::MatrixXd AntPhi = Eigen::MatrixXd::Zero(dem_x.rows(), dem_x.cols());
double minR = 0, maxR = 0;
double minLocalAngle = 0, maxLocalAngle = 0;
Vector3D Rt = { 0,0,0 };
SatelliteOribtNode oRs = SatelliteOribtNode{ 0 };;
Vector3D p0 = {}, slopeVector = {}, sateAntDirect = {};
Vector3D Rs = {}, Vs = {}, Ast = {};
SatelliteAntDirect antdirectNode = {};
std::complex<double> echofreq;
std::complex<double> Imag1(0, 1);
double TAntPattern = 1; // 发射天线方向图
double RAntPanttern = 1;// 接收天线方向图
double maxechoAmp = 1;
double tempAmp = 1;
for (long prfidx = 0; prfidx < prfcount_step; prfidx++)
{
oRs = sateOirbtNodes[prfidx + startprfidx];
// 计算天线方向图
for (long jj = 1; jj < dem_cols - 1; jj++) {
for (long ii = 1; ii < dem_rows - 1; ii++) {
p0.x = dem_x(ii, jj);
p0.y = dem_y(ii, jj);
p0.z = dem_z(ii, jj);
this->TaskSetting->getSatelliteAntDirectNormal(oRs, p0, antdirectNode);
//antdirectNode.ThetaAnt = antdirectNode.ThetaAnt * r2d;
//antdirectNode.PhiAnt = antdirectNode.PhiAnt * r2d;
AntTheta(ii, jj) = antdirectNode.ThetaAnt * r2d;
AntPhi(ii, jj) = antdirectNode.PhiAnt * r2d;
}
}
// 计算发射天线方向图
for (long jj = 1; jj < dem_cols - 1; jj++) {
for (long ii = 1; ii < dem_rows - 1; ii++) {
TransformPattern->getGainLinear(AntTheta(ii, jj), AntPhi(ii, jj), TransAnt(ii, jj));
//TransAnt(ii, jj) = TAntPattern;
}
}
// 计算接收天线方向图
for (long jj = 1; jj < dem_cols - 1; jj++) {
for (long ii = 1; ii < dem_rows - 1; ii++) {
TransformPattern->getGainLinear(AntTheta(ii, jj), AntPhi(ii, jj), ReciveAnt(ii, jj));
//ReciveAnt(ii, jj) = RAntPanttern;
}
}
// 计算经过增益的能量
echoAmp = Pt * TransAnt.array() * ReciveAnt.array();
maxechoAmp = echoAmp.maxCoeff();
if (std::abs(maxechoAmp) < PRECISIONTOLERANCE) { // 这种情况下,不在合成孔径范围中
continue;
}
Rs.x = sateOirbtNodes[prfidx + startprfidx].Px; // 卫星位置
Rs.y = sateOirbtNodes[prfidx + startprfidx].Py;
Rs.z = sateOirbtNodes[prfidx + startprfidx].Pz;
Vs.x = sateOirbtNodes[prfidx + startprfidx].Vx; // 卫星速度
Vs.y = sateOirbtNodes[prfidx + startprfidx].Vy;
Vs.z = sateOirbtNodes[prfidx + startprfidx].Vz;
Ast.x = sateOirbtNodes[prfidx + startprfidx].AVx;// 卫星加速度
Ast.y = sateOirbtNodes[prfidx + startprfidx].AVy;
Ast.z = sateOirbtNodes[prfidx + startprfidx].AVz;
Rst_x = Rs.x - dem_x.array(); // Rst = Rs - Rt;
Rst_y = Rs.y - dem_y.array();
Rst_z = Rs.z - dem_z.array();
R = (Rst_x.array().pow(2) + Rst_y.array().pow(2) + Rst_z.array().pow(2)).array().sqrt(); // R
minR = R.minCoeff();
maxR = R.maxCoeff();
//qDebug() << "minR:\t" << minR << " maxR:\t" << maxR;
if (maxR<NearRange || minR>FarRange) {
continue;
}
else {}
// getCosAngle
// double c = dot(a, b) / (getlength(a) * getlength(b));
// return acos(c > 1 ? 1 : c < -1 ? -1 : c) * r2d;
// localangle = getCosAngle(Rst, slopeVector) * T180_PI; // 注意这个只能实时计算,因为非实时计算代价太大
localangle = (Rst_x.array() * demsloper_x.array() + Rst_y.array() * demsloper_y.array() + Rst_z.array() * demsloper_z.array()).array(); // dot(a, b)
localangle = localangle.array() / R.array();
localangle = localangle.array() / (demsloper_x.array().pow(2) + demsloper_y.array().pow(2) + demsloper_z.array().pow(2)).array().sqrt().array();
localangle = localangle.array().acos(); // 弧度值
minLocalAngle = localangle.minCoeff();
maxLocalAngle = localangle.maxCoeff();
if (maxLocalAngle<0 || minLocalAngle>PI / 2) {
continue;
}
else {}
//Vst_x = Vs.x + 1 * earthRoute * dem_y.array(); // Vst = Vs - Vt;
//Vst_y = Vs.y - 1 * earthRoute * dem_x.array();
//Vst_z = Vs.z - Eigen::MatrixXd::Zero(dem_x.rows(), dem_y.cols()).array();
//// 计算多普勒中心频率 Rst, Vst : ( - 2 / lamda) * dot(Rs - Rt, Vs - Vt) / R; // 星载合成孔径雷达原始回波数据模拟研究 3.18
//fde = (-2 / lamda) * (Rst_x.array() * Vst_x.array() + Rst_y.array() * Vst_y.array() + Rst_z.array() * Vst_z.array()).array() / (R.array());
//// 计算多普勒频率斜率 // 星载合成孔径雷达原始回波数据模拟研究 3.19
//// -(2/lamda)*( dot(Vs - Vt, Vs - Vt)/R + dot(Ast, Rs - Rt)/R - std::pow(dot(Vs - Vt, Rs - Rt),2 )/std::pow(R,3));
//fr = (-2 / lamda) *
// (Vst_x.array() * Vst_x.array() + Vst_y.array() * Vst_y.array() + Vst_z.array() * Vst_z.array()).array() / (R.array()) +
// (-2 / lamda) *
// (Ast.x * Rst_x.array() + Ast.y * Rst_y.array() + Ast.z * Rst_z.array()).array() / (R.array()) -
// (-2 / lamda) *
// (Vst_x.array() * Rst_x.array() + Vst_y.array() * Rst_y.array() + Vst_z.array() * Rst_z.array()).array().pow(2) / (R.array().pow(3));
// 计算回波
Rx = R;// -(lamda / 2) * (fde * TRx + 0.5 * fr * TRx * TRx); // 斜距历程值
// 逐点计算 this->SigmaDatabasePtr->getAmp(covercls, localangle, polartype); // 后向散射系数 HH
for (long ii = 0; ii < dem_x.rows(); ii++) {
for (long jj = 0; jj < dem_y.cols(); jj++) {
sigam(ii, jj) = this->SigmaDatabasePtr->getAmp(dem_landcls_ptr[dem_x.cols() * ii + jj], localangle(ii, jj) * r2d, polartype);
}
}
if (sigam.maxCoeff() > 0) {}
else {
continue;
}
// projArea = 1 / std::cos(sloperAngle) * std::sin(localangle); // 投影面积系数,单位投影面积 1m x 1m --注意这里是假设,后期再补充
// echoAmp = projArea*TAntPattern * RAntPanttern * sigam / (4 * PI * R * R);
echoAmp = echoAmp.array() * sigam.array() * (1 / sloperAngle.array().cos() * localangle.array().sin()); // 反射强度
echoAmp = echoAmp.array() / (4 * PI * R.array().pow(2)); // 距离衰减
Rphi = -4 * PI / lamda * Rx.array();// 距离徙动相位
// 积分
TimeRange = ((2 * R.array() / LIGHTSPEED - TimgNearRange).array() * Fs).array();
double localAnglepoint = -1;
long prf_freq_id = 0;
for (long jj = 1; jj < dem_cols - 1; jj++) {
for (long ii = 1; ii < dem_rows - 1; ii++) {
prf_freq_id = std::floor(TimeRange(ii, jj));
if (prf_freq_id < 0 || prf_freq_id >= PlusePoint || localangle(ii, jj) < 0 || localangle(ii, jj) > PI / 2 || echoAmp(ii, jj) == 0) {
continue;
}
echofreq = echoAmp(ii, jj) * std::exp(Rphi(ii, jj) * Imag1);
echoPluse(prfidx, prf_freq_id) = echoPluse(prfidx, prf_freq_id) + echofreq;
}
}
#ifdef DEBUGSHOWDIALOG
ImageShowDialogClass* localangledialog = new ImageShowDialogClass(dialog);
localangledialog->show();
localangledialog->load_double_MatrixX_data(localangle.array() * r2d, "localangle");
ImageShowDialogClass* sigamdialog = new ImageShowDialogClass(dialog);
sigamdialog->show();
sigamdialog->load_double_MatrixX_data(TimeRange, "TimeRange");
ImageShowDialogClass* ampdialog = new ImageShowDialogClass(dialog);
ampdialog->show();
ampdialog->load_double_MatrixX_data(echoAmp, "echoAmp");
Eigen::MatrixXd echoPluseamp = echoPluse.array().abs().cast<double>().array();
ImageShowDialogClass* echoampdialog = new ImageShowDialogClass(dialog);
echoampdialog->show();
echoampdialog->load_double_MatrixX_data(echoPluseamp, "echoPluseamp");
dialog->exec();
#endif
//qDebug() << QDateTime::currentDateTime().toString("yyyy-MM-dd HH:mm:ss.zzz") << " end " << prfidx;
}
//qDebug() << QDateTime::currentDateTime().toString("yyyy-MM-dd HH:mm:ss.zzz")<<" step "<< prfcount_step;
omp_set_lock(&lock); // 回波整体赋值处理
for (long prfidx = 0; prfidx < prfcount_step; prfidx++) {
for (long freqidx = 0; freqidx < PlusePoint; freqidx++)
{
//qDebug() << prfidx << " " << freqidx << " " << echoPluse(prfidx, freqidx).real() << " + " << echoPluse(prfidx, freqidx).imag() << " j";
echo.get()[(prfidx + startprfidx) * PlusePoint + freqidx] = echo.get()[(prfidx + startprfidx) * PlusePoint + freqidx] + echoPluse(prfidx, freqidx);
}
}
//this->EchoSimulationData->saveEchoArr(echo, 0, PluseCount);
omp_unset_lock(&lock); // 解锁
//qDebug() << QDateTime::currentDateTime().toString("yyyy-MM-dd HH:mm:ss.zzz") << " step 2" << prfcount_step;
}
omp_set_lock(&lock); // 保存文件
processNumber = processNumber + pluseStep;
this->EchoSimulationData->saveEchoArr(echo, 0, PluseCount);
omp_unset_lock(&lock); // 解锁
qDebug() << QDateTime::currentDateTime().toString("yyyy-MM-dd HH:mm:ss.zzz") << " \t " << start_ids << "\t--\t " << start_ids + line_invert << "\t/\t" << demxyz.height;
}
omp_destroy_lock(&lock); // 销毁锁
*/

464
GPUTool/GPURTPC.cu Normal file
View File

@ -0,0 +1,464 @@
#include <iostream>
#include <memory>
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "BaseConstVariable.h"
#include "GPURTPC.cuh"
#ifdef __CUDANVCC___
__device__ float GPU_getSigma0dB(CUDASigmaParam param, float theta) {//线性值
float sigma= param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6);
return sigma;
}
__device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal(float RstX, float RstY, float RstZ,
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ
) {
CUDAVectorEllipsoidal result{ 0,0,-1 };
float Xst = -1 * RstX; // 卫星 --> 地面
float Yst = -1 * RstY;
float Zst = -1 * RstZ;
float AntXaxisX = antXaxisX;
float AntXaxisY = antXaxisY;
float AntXaxisZ = antXaxisZ;
float AntYaxisX = antYaxisX;
float AntYaxisY = antYaxisY;
float AntYaxisZ = antYaxisZ;
float AntZaxisX = antZaxisX;
float AntZaxisY = antZaxisY;
float AntZaxisZ = antZaxisZ;
// 天线指向在天线坐标系下的值
float Xant = (Xst * (AntYaxisY * AntZaxisZ - AntYaxisZ * AntZaxisY) + Xst * (AntXaxisZ * AntZaxisY - AntXaxisY * AntZaxisZ) + Xst * (AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY)) / (AntXaxisX * (AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * (AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * (AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
float Yant = (Yst * (AntYaxisZ * AntZaxisX - AntYaxisX * AntZaxisZ) + Yst * (AntXaxisX * AntZaxisZ - AntXaxisZ * AntZaxisX) + Yst * (AntYaxisX * AntXaxisZ - AntXaxisX * AntYaxisZ)) / (AntXaxisX * (AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * (AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * (AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
float Zant = (Zst * (AntYaxisX * AntZaxisY - AntYaxisY * AntZaxisX) + Zst * (AntXaxisY * AntZaxisX - AntXaxisX * AntZaxisY) + Zst * (AntXaxisX * AntYaxisY - AntYaxisX * AntXaxisY)) / (AntXaxisX * (AntYaxisY * AntZaxisZ - AntZaxisY * AntYaxisZ) - AntYaxisX * (AntXaxisY * AntZaxisZ - AntXaxisZ * AntZaxisY) + AntZaxisX * (AntXaxisY * AntYaxisZ - AntXaxisZ * AntYaxisY));
// 计算theta 与 phi
float Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
float ThetaAnt = acosf(Zant / Norm); // theta 与 Z轴的夹角
float YsinTheta = Yant / sinf(ThetaAnt);
float PhiAnt = (YsinTheta / abs(YsinTheta)) * acosf(Xant / (Norm * sinf(ThetaAnt)));
result.theta = ThetaAnt;
result.phi = PhiAnt;
result.pho = Norm;
return result;
}
__device__ float GPU_BillerInterpAntPattern(float* antpattern,
float starttheta, float startphi, float dtheta, float dphi,
long thetapoints, long phipoints,
float searththeta, float searchphi) {
float stheta = searththeta;
float sphi = searchphi;
if (stheta > 90) {
return 0;
}
else {}
float pthetaid = (stheta - starttheta) / dtheta;//
float pphiid = (sphi - startphi) / dphi;
long lasttheta = floorf(pthetaid);
long nextTheta = lasttheta + 1;
long lastphi = floorf(pphiid);
long nextPhi = lastphi + 1;
if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
{
return 0;
}
else {
float x = stheta;
float y = sphi;
float x1 = lasttheta * dtheta + starttheta;
float x2 = nextTheta * dtheta + starttheta;
float y1 = lastphi * dphi + startphi;
float y2 = nextPhi * dphi + startphi;
float z11 = antpattern[lasttheta * phipoints + lastphi];
float z12 = antpattern[lasttheta * phipoints + nextPhi];
float z21 = antpattern[nextTheta * phipoints + lastphi];
float z22 = antpattern[nextTheta * phipoints + nextPhi];
//z11 = powf(10, z11 / 10); // dB-> 线性
//z12 = powf(10, z12 / 10);
//z21 = powf(10, z21 / 10);
//z22 = powf(10, z22 / 10);
float GainValue = (z11 * (x2 - x) * (y2 - y)
+ z21 * (x - x1) * (y2 - y)
+ z12 * (x2 - x) * (y - y1)
+ z22 * (x - x1) * (y - y1));
GainValue = GainValue / ((x2 - x1) * (y2 - y1));
return GainValue;
}
}
__device__ cuComplex GPU_calculationEcho(float sigma0, float TransAnt, float ReciveAnt,
float localangle, float R, float slopeangle, float Pt, float lamda) {
float r = R;
float amp = Pt * TransAnt * ReciveAnt;
amp = amp * sigma0;
amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(r, 4)); // 反射强度
float phi = (-4 * LAMP_CUDA_PI / lamda) * r;
cuComplex echophi = make_cuComplex(0, phi);
cuComplex echophiexp = cuCexpf(echophi);
cuComplex echo;
echo.x = echophiexp.x * amp;
echo.y = echophiexp.y * amp;
return echo;
}
__global__ void CUDA_SatelliteAntDirectNormal(float* RstX, float* RstY, float* RstZ,
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ,
float* thetaAnt, float* phiAnt
, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float Xst = -1 * RstX[idx]; // 卫星 --> 地面
float Yst = -1 * RstY[idx];
float Zst = -1 * RstZ[idx];
float AntXaxisX = antXaxisX;
float AntXaxisY = antXaxisY;
float AntXaxisZ = antXaxisZ;
float AntYaxisX = antYaxisX;
float AntYaxisY = antYaxisY;
float AntYaxisZ = antYaxisZ;
float AntZaxisX = antZaxisX;
float AntZaxisY = antZaxisY;
float AntZaxisZ = antZaxisZ;
// 归一化
float RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
float AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
float AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
float AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);
float Rx = Xst / RstNorm;
float Ry = Yst / RstNorm;
float Rz = Zst / RstNorm;
float Xx = AntXaxisX / AntXaxisNorm;
float Xy = AntXaxisY / AntXaxisNorm;
float Xz = AntXaxisZ / AntXaxisNorm;
float Yx = AntYaxisX / AntYaxisNorm;
float Yy = AntYaxisY / AntYaxisNorm;
float Yz = AntYaxisZ / AntYaxisNorm;
float Zx = AntZaxisX / AntZaxisNorm;
float Zy = AntZaxisY / AntZaxisNorm;
float Zz = AntZaxisZ / AntZaxisNorm;
float Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
float Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
float Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
// 计算theta 与 phi
float Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
float ThetaAnt = acosf(Zant / Norm); // theta 与 Z轴的夹角
float PhiAnt = atanf(Yant / Xant); // -pi/2 ~pi/2
if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上
PhiAnt = 0;
}
else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上原点
if (Yant > 0) {
PhiAnt = PI / 2;
}
else {
PhiAnt = -PI / 2;
}
}
else if (Xant < 0) {
if (Yant > 0) {
PhiAnt = PI + PhiAnt;
}
else {
PhiAnt = -PI+PhiAnt ;
}
}
else { // Xant>0 X 正轴
}
if (isnan(PhiAnt)) {
printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant,Norm, ThetaAnt, PhiAnt);
}
//if (abs(ThetaAnt - 0) < PRECISIONTOLERANCE) {
// PhiAnt = 0;
//}
//else {}
thetaAnt[idx] = ThetaAnt*r2d;
phiAnt[idx] = PhiAnt*r2d;
//printf("Rst=[%f,%f,%f];AntXaxis = [%f, %f, %f];AntYaxis=[%f,%f,%f];AntZaxis=[%f,%f,%f];phiAnt=%f;thetaAnt=%f;\n", Xst, Yst, Zst
// , AntXaxisX, AntXaxisY, AntXaxisZ
// , AntYaxisX, AntYaxisY, AntYaxisZ
// , AntZaxisX, AntZaxisY, AntZaxisZ
// , phiAnt[idx]
// , thetaAnt[idx]
//);
}
}
__global__ void CUDA_BillerInterpAntPattern(float* antpattern,
float starttheta, float startphi, float dtheta, float dphi,
long thetapoints, long phipoints,
float* searththeta, float* searchphi, float* searchantpattern,
long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float stheta = searththeta[idx];
float sphi = searchphi[idx];
float pthetaid = (stheta - starttheta) / dtheta;//
float pphiid = (sphi - startphi) / dphi;
long lasttheta = floorf(pthetaid);
long nextTheta = lasttheta + 1;
long lastphi = floorf(pphiid);
long nextPhi = lastphi + 1;
if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
{
searchantpattern[idx] = 0;
}
else {
float x = stheta;
float y = sphi;
float x1 = lasttheta * dtheta + starttheta;
float x2 = nextTheta * dtheta + starttheta;
float y1 = lastphi * dphi + startphi;
float y2 = nextPhi * dphi + startphi;
float z11 = antpattern[lasttheta * phipoints + lastphi];
float z12 = antpattern[lasttheta * phipoints + nextPhi];
float z21 = antpattern[nextTheta * phipoints + lastphi];
float z22 = antpattern[nextTheta * phipoints + nextPhi];
z11 = powf(10, z11 / 10);
z12 = powf(10, z12 / 10);
z21 = powf(10, z21 / 10);
z22 = powf(10, z22 / 10);
float GainValue = (z11 * (x2 - x) * (y2 - y)
+ z21 * (x - x1) * (y2 - y)
+ z12 * (x2 - x) * (y - y1)
+ z22 * (x - x1) * (y - y1));
GainValue = GainValue / ((x2 - x1) * (y2 - y1));
searchantpattern[idx] = GainValue;
}
}
}
__global__ void CUDA_calculationEcho(float* sigma0, float* TransAnt, float* ReciveAnt,
float* localangle, float* R, float* slopeangle,
float nearRange, float Fs, float Pt, float lamda, long FreqIDmax,
cuComplex* echoArr, long* FreqID,
long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float r = R[idx];
float amp = Pt * TransAnt[idx] * ReciveAnt[idx];
amp = amp * sigma0[idx];
amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(r, 4)); // 反射强度
// 处理相位
float phi = (-4 * LAMP_CUDA_PI / lamda) * r;
cuComplex echophi = make_cuComplex(0, phi);
cuComplex echophiexp = cuCexpf(echophi);
float timeR = 2 * (r - nearRange) / LIGHTSPEED * Fs;
long timeID = floorf(timeR);
if (timeID < 0 || timeID >= FreqIDmax) {
timeID = 0;
amp = 0;
}
cuComplex echo;
echo.x = echophiexp.x * amp;
echo.y = echophiexp.y * amp;
echoArr[idx] = echo;
FreqID[idx] = timeID;
}
}
__global__ void CUDA_AntPatternInterpGain(float* anttheta, float* antphi, float* gain,
float* antpattern, float starttheta, float startphi, float dtheta, float dphi, int thetapoints, int phipoints, long len) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float temptheta = anttheta[idx];
float tempphi = antphi[idx];
float antPatternGain = GPU_BillerInterpAntPattern(antpattern,
starttheta, startphi, dtheta, dphi, thetapoints, phipoints,
temptheta, tempphi) ;
gain[idx] = antPatternGain;
}
}
__global__ void CUDA_InterpSigma(
long* demcls, float* sigmaAmp, float* localanglearr, long len,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
long clsid = demcls[idx];
float localangle = localanglearr[idx];
CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
//printf("cls:%d;localangle=%f;\n",clsid, localangle);
if (localangle < 0 || localangle >= 90) {
sigmaAmp[idx] = 0;
}
else {}
if (abs(tempsigma.p1)< PRECISIONTOLERANCE&&
abs(tempsigma.p2) < PRECISIONTOLERANCE &&
abs(tempsigma.p3) < PRECISIONTOLERANCE &&
abs(tempsigma.p4) < PRECISIONTOLERANCE&&
abs(tempsigma.p5) < PRECISIONTOLERANCE&&
abs(tempsigma.p6) < PRECISIONTOLERANCE
) {
sigmaAmp[idx] = 0;
}
else {
float sigma = GPU_getSigma0dB(tempsigma, localangle);
sigma = powf(10.0, sigma / 10.0);// 后向散射系数
//printf("cls:%d;localangle=%f;sigma0=%f;\n", clsid, localangle, sigma);
sigmaAmp[idx] = sigma;
}
}
}
extern "C" void SatelliteAntDirectNormal(float* RstX, float* RstY, float* RstZ,
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ,
float* thetaAnt, float* phiAnt
, long len) {
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_SatelliteAntDirectNormal << <numBlocks, blockSize >> > (RstX, RstY, RstZ,
antXaxisX, antXaxisY, antXaxisZ,
antYaxisX, antYaxisY, antYaxisZ,
antZaxisX, antZaxisY, antZaxisZ,
antDirectX, antDirectY, antDirectZ,
thetaAnt, phiAnt
, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void AntPatternInterpGain(float* anttheta, float* antphi, float* gain,
float* antpattern, float starttheta, float startphi, float dtheta, float dphi, int thetapoints, int phipoints, long len) {
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
//printf("\nCUDA_RTPC_SiglePRF blockSize:%d ,numBlock:%d\n", blockSize, numBlocks);
CUDA_AntPatternInterpGain << <numBlocks, blockSize >> > ( anttheta,antphi, gain,
antpattern,
starttheta, startphi, dtheta, dphi, thetapoints, phipoints,
len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void calculationEcho(float* sigma0, float* TransAnt, float* ReciveAnt,
float* localangle, float* R, float* slopeangle,
float nearRange, float Fs, float pt, float lamda, long FreqIDmax,
cuComplex* echoAmp, long* FreqID,
long len)
{
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_calculationEcho << <numBlocks, blockSize >> > (sigma0, TransAnt, ReciveAnt,
localangle, R, slopeangle,
nearRange, Fs, pt, lamda, FreqIDmax,
echoAmp, FreqID,
len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDAInterpSigma(
long* demcls,float* sigmaAmp, float* localanglearr,long len,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen) {// 地表覆盖类型-sigma插值对应函数-ulaby
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_InterpSigma << <numBlocks, blockSize >> > (
demcls, sigmaAmp, localanglearr, len,
sigma0Paramslist, sigmaparamslistlen
);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
#endif

47
GPUTool/GPURTPC.cuh Normal file
View File

@ -0,0 +1,47 @@
#ifndef _GPURTPC_H_
#define _GPURTPC_H_
#include "BaseConstVariable.h"
#include "GPUTool.cuh"
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cublas_v2.h>
#include <cuComplex.h>
extern "C" struct CUDASigmaParam {
float p1;
float p2;
float p3;
float p4;
float p5;
float p6;
};
extern "C" void SatelliteAntDirectNormal(float* RstX, float* RstY, float* RstZ,
float antXaxisX, float antXaxisY, float antXaxisZ,
float antYaxisX, float antYaxisY, float antYaxisZ,
float antZaxisX, float antZaxisY, float antZaxisZ,
float antDirectX, float antDirectY, float antDirectZ,
float* thetaAnt, float* phiAnt, long len);
extern "C" void AntPatternInterpGain(float* anttheta, float* antphi, float* gain,
float* antpattern,
float starttheta, float startphi, float dtheta, float dphi, int thetapoints, int phipoints,
long len);
extern "C" void calculationEcho(float* sigma0, float* TransAnt, float* ReciveAnt,
float* localangle, float* R, float* slopeangle,
float nearRange, float Fs, float pt, float lamda, long FreqIDmax,
cuComplex* echoAmp, long* FreqID,
long len);
extern "C" void CUDAInterpSigma(
long* demcls, float* sigmaAmp, float* localanglearr, long len,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen);
#endif

80
GPUTool/GPUTBPImage.cu Normal file
View File

@ -0,0 +1,80 @@
#include <iostream>
#include <memory>
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "BaseConstVariable.h"
#include "GPUTool.cuh"
#include "GPUTBPImage.cuh"
#ifdef __CUDANVCC___
__global__ void CUDA_TBPImage(
float* antPx, float* antPy, float* antPz,
float* imgx, float* imgy, float* imgz,
cuComplex* echoArr, cuComplex* imgArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfid, long freqcount
) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
//printf("\nidx:\t %d %d %d\n", idx, linecount, plusepoint);
if (idx < rowcount * colcount) {
float R = sqrtf(powf(antPx[prfid] - imgx[idx], 2) + powf(antPy[prfid] - imgy[idx], 2) + powf(antPz[prfid] - imgz[idx], 2));
float Ridf = ((R - Rnear) * 2 / LIGHTSPEED) * fs;
long Rid = floorf(Ridf);
if(Rid <0|| Rid >= freqcount){}
else {
float factorj = freq * 4 * PI / LIGHTSPEED;
cuComplex Rphi =cuCexpf(make_cuComplex(0, factorj * R));// 校正项
imgArr[idx] = cuCaddf(imgArr[idx], cuCmulf(echoArr[Rid] , Rphi));// 矫正
}
}
}
extern "C" void CUDATBPImage(float* antPx, float* antPy, float* antPz,
float* imgx, float* imgy, float* imgz,
cuComplex* echoArr, cuComplex* imgArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfid, long freqcount)
{
int blockSize = 256; // 每个块的线程数
int numBlocks = (rowcount * colcount + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
//printf("\nCUDA_RTPC_SiglePRF blockSize:%d ,numBlock:%d\n",blockSize,numBlocks);
// 调用 CUDA 核函数 CUDA_RTPC_Kernel
CUDA_TBPImage << <numBlocks, blockSize >> > (
antPx, antPy, antPz,
imgx, imgy, imgz,
echoArr, imgArr,
freq, fs, Rnear, Rfar,
rowcount, colcount,
prfid, freqcount
);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDATBPImage CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
#endif

37
GPUTool/GPUTBPImage.cuh Normal file
View File

@ -0,0 +1,37 @@
#ifndef _GPUTBPIMAGE_H_
#define _GPUTBPIMAGE_H_
#include "BaseConstVariable.h"
#include "GPUTool.cuh"
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "GPUTool.cuh"
extern __global__ void CUDA_TBPImage(
float* antPx, float* antPy, float* antPz,
float* imgx, float* imgy, float* imgz,
cuComplex* echoArr, cuComplex* imgArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfid, long freqcount
);
extern "C" void CUDATBPImage(
float* antPx,
float* antPy,
float* antPz,
float* imgx,
float* imgy,
float* imgz,
cuComplex* echoArr,
cuComplex* imgArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfid, long freqcount
);
#endif

274
GPUTool/GPUTool.cu Normal file
View File

@ -0,0 +1,274 @@
#include <iostream>
#include <memory>
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "BaseConstVariable.h"
#include "GPUTool.cuh"
#ifdef __CUDANVCC___
// 定义参数
__device__ cuComplex cuCexpf(cuComplex x)
{
float factor = exp(x.x);
return make_cuComplex(factor * cos(x.y), factor * sin(x.y));
}
__device__ CUDAVector GPU_VectorAB(CUDAVector A, CUDAVector B) {
CUDAVector C;
C.x = B.x - A.x;
C.y = B.y - A.y;
C.z = B.z - A.z;
return C;
}
__device__ float GPU_VectorNorm2(CUDAVector A) {
return sqrtf(A.x * A.x + A.y * A.y + A.z * A.z);
}
__device__ float GPU_dotVector(CUDAVector A, CUDAVector B) {
return A.x * B.x + A.y * B.y + A.z * B.z;
}
__device__ float GPU_CosAngle_VectorA_VectorB(CUDAVector A, CUDAVector B) {
return GPU_dotVector(A, B) / (GPU_VectorNorm2(A) * GPU_VectorNorm2(B));
}
__global__ void CUDA_DistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Ax[idx] - Bx[idx], 2) + powf(Ay[idx] - By[idx], 2) + powf(Az[idx] - Bz[idx], 2));
}
}
__global__ void CUDA_B_DistanceA(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Ax[idx] - Bx, 2) + powf(Ay[idx] - By, 2) + powf(Az[idx] - Bz, 2));
}
}
__global__ void CUDA_make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
RstX[idx] = sX - tX[idx]; // 地面->天
RstY[idx] = sY - tY[idx];
RstZ[idx] = sZ - tZ[idx];
}
}
__global__ void CUDA_Norm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
R[idx] = sqrtf(powf(Vx[idx], 2) + powf(Vy[idx], 2) + powf(Vz[idx], 2));
}
}
__global__ void CUDA_cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
float tAx = Ax[idx];
float tAy = Ay[idx];
float tAz = Az[idx];
float tBx = Bx[idx];
float tBy = By[idx];
float tBz = Bz[idx];
float AR = sqrtf(powf(tAx, 2) + powf(tAy, 2) + powf(tAz, 2));
float BR = sqrtf(powf(tBx, 2) + powf(tBy, 2) + powf(tBz, 2));
float dotAB = tAx * tBx + tAy * tBy + tAz * tBz;
float result = acosf(dotAB / (AR * BR));
anglecos[idx] = result;
}
}
//错误提示
extern "C" void checkCudaError(cudaError_t err, const char* msg) {
if (err != cudaSuccess) {
std::cerr << "CUDA error: " << msg << " (" << cudaGetErrorString(err) << ")" << std::endl;
exit(EXIT_FAILURE);
}
}
// 主机参数内存声明
extern "C" void* mallocCUDAHost(long memsize) {
void* ptr;
cudaMallocHost(&ptr, memsize);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("mallocCUDAHost CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
return ptr;
}
// 主机参数内存释放
extern "C" void FreeCUDAHost(void* ptr) {
cudaFreeHost(ptr);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("FreeCUDAHost CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
// GPU参数内存声明
extern "C" void* mallocCUDADevice(long memsize) {
void* ptr;
cudaMalloc(&ptr, memsize);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("mallocCUDADevice CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
return ptr;
}
// GPU参数内存释放
extern "C" void FreeCUDADevice(void* ptr) {
cudaFree(ptr);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("FreeCUDADevice CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
// GPU 内存数据转移
extern "C" void HostToDevice(void* hostptr, void* deviceptr, long memsize) {
cudaMemcpy(deviceptr, hostptr, memsize, cudaMemcpyHostToDevice);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("HostToDevice CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void DeviceToHost(void* hostptr, void* deviceptr, long memsize) {
cudaMemcpy(hostptr, deviceptr, memsize, cudaMemcpyDeviceToHost);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("DeviceToHost CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
// 基础运算函数
extern "C" void CUDAdistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long len) {
// 设置 CUDA 核函数的网格和块的尺寸
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_DistanceAB << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, R, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDABdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len) {
// 设置 CUDA 核函数的网格和块的尺寸
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_B_DistanceA << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, R, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDAmake_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
// 设置 CUDA 核函数的网格和块的尺寸
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_make_VectorA_B << <numBlocks, blockSize >> > (sX, sY, sZ, tX, tY, tZ, RstX, RstY, RstZ, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDANorm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len) {
// 设置 CUDA 核函数的网格和块的尺寸
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_Norm_Vector << <numBlocks, blockSize >> > (Vx, Vy, Vz, R, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDAcosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len) {
int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDA_cosAngle_VA_AB << <numBlocks, blockSize >> > (Ax, Ay, Az, Bx, By, Bz, anglecos, len);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA_RTPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
#endif

74
GPUTool/GPUTool.cuh Normal file
View File

@ -0,0 +1,74 @@
#ifndef _GPUTOOL_H_
#define _GPUTOOL_H_
#ifdef __CUDANVCC___
#include "BaseConstVariable.h"
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#define __CUDADEBUG__
#define CUDAMEMORY Memory1MB*100
#define LAMP_CUDA_PI 3.141592653589793238462643383279
// 默认显存分布
enum LAMPGPUDATETYPE {
LAMP_LONG,
LAMP_FLOAT,
LAMP_COMPLEXFLOAT
};
extern "C" struct CUDAVector {
float x;
float y;
float z;
};
extern "C" struct CUDAVectorEllipsoidal {
float theta;
float phi;
float pho;
};
// 定义设备函数
extern __device__ cuComplex cuCexpf(cuComplex x);
extern __device__ CUDAVector GPU_VectorAB(CUDAVector A, CUDAVector B);
extern __device__ float GPU_VectorNorm2(CUDAVector A);
extern __device__ float GPU_dotVector(CUDAVector A, CUDAVector B);
extern __device__ float GPU_CosAngle_VectorA_VectorB(CUDAVector A, CUDAVector B);
// 定义全局函数
extern __global__ void CUDA_DistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long len);
extern __global__ void CUDA_B_DistanceA(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long len);
extern __global__ void CUDA_make_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len);
extern __global__ void CUDA_Norm_Vector(float* Vx, float* Vy, float* Vz, float* R, long len);
extern __global__ void CUDA_cosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len);
// 误差处理函数
extern "C" void checkCudaError(cudaError_t err, const char* msg);
// GPU 内存函数
extern "C" void* mallocCUDAHost( long memsize); // 主机内存声明
extern "C" void FreeCUDAHost(void* ptr);
extern "C" void* mallocCUDADevice( long memsize); // GPU内存声明
extern "C" void FreeCUDADevice(void* ptr);
extern "C" void HostToDevice(void* hostptr, void* deviceptr, long memsize);//GPU 内存数据转移 设备 -> GPU
extern "C" void DeviceToHost(void* hostptr, void* deviceptr, long memsize);//GPU 内存数据转移 GPU -> 设备
// 基础运算函数
extern "C" void CUDAdistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long member);
extern "C" void CUDABdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long member);
extern "C" void CUDAmake_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long member);
extern "C" void CUDANorm_Vector(float* Vx, float* Vy, float* Vz, float* R, long member);
extern "C" void CUDAcosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* anglecos, long len);
#endif
#endif

View File

@ -68,7 +68,7 @@
<IncludePath>.\SimulationSAR;.\GF3ProcessToolbox;.\BaseTool;$(IncludePath)</IncludePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<IncludePath>.\SimulationSAR;.\GF3ProcessToolbox;.\BaseTool;$(oneMKLIncludeDir);$(IncludePath)</IncludePath>
<IncludePath>.\GPUTool;.\SimulationSAR;.\GF3ProcessToolbox;.\BaseTool;$(oneMKLIncludeDir);$(IncludePath)</IncludePath>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
@ -184,13 +184,15 @@
<ClInclude Include="BaseTool\EchoDataFormat.h" />
<ClInclude Include="BaseTool\FileOperator.h" />
<ClInclude Include="BaseTool\GeoOperator.h" />
<ClInclude Include="BaseTool\GPUTool.cuh" />
<ClInclude Include="BaseTool\ImageOperatorBase.h" />
<ClInclude Include="BaseTool\LogInfoCls.h" />
<QtMoc Include="BaseTool\QToolProcessBarDialog.h" />
<ClInclude Include="BaseTool\RasterToolBase.h" />
<ClInclude Include="BaseTool\SARSimulationImageL1.h" />
<ClInclude Include="BaseTool\stdafx.h" />
<ClInclude Include="GPUTool\GPURTPC.cuh" />
<ClInclude Include="GPUTool\GPUTBPImage.cuh" />
<ClInclude Include="GPUTool\GPUTool.cuh" />
<ClInclude Include="SimulationSAR\TBPImageAlgCls.h" />
<QtMoc Include="QSimulationRTPCGUI.h" />
<QtMoc Include="GF3ProcessToolbox\QOrthSlrRaster.h" />
@ -213,10 +215,18 @@
<QtMoc Include="QMergeRasterProcessDialog.h" />
</ItemGroup>
<ItemGroup>
<CudaCompile Include="BaseTool\GPUTool.cu" />
<None Include="cpp.hint" />
</ItemGroup>
<ItemGroup>
<None Include="cpp.hint" />
<CudaCompile Include="GPUTool\GPURTPC.cu">
<GenerateRelocatableDeviceCode Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</GenerateRelocatableDeviceCode>
</CudaCompile>
<CudaCompile Include="GPUTool\GPUTBPImage.cu">
<GenerateRelocatableDeviceCode Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</GenerateRelocatableDeviceCode>
</CudaCompile>
<CudaCompile Include="GPUTool\GPUTool.cu">
<GenerateRelocatableDeviceCode Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</GenerateRelocatableDeviceCode>
</CudaCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Condition="Exists('$(QtMsBuild)\qt.targets')">

View File

@ -21,6 +21,7 @@
#ifdef __CUDANVCC___
#include "GPUTool.cuh"
#include "GPURTPC.cuh"
#endif // __CUDANVCC___
#include <Imageshow/ImageShowDialogClass.h>
@ -827,9 +828,9 @@ ErrorCode RTPCProcessCls::RTPCMainProcess_GPU( )
std::cout << "ant Position=[" << antpx << "," << antpy << "," << antpz << "]" << std::endl;
#endif // __PRFDEBUG__
make_VectorA_B(antpx, antpy, antpz, d_dem_x, d_dem_y, d_dem_z, d_RstX, d_RstY, d_RstZ, pixelcount); // Rst = Rs - Rt; 華醱-> 硌砃
Norm_Vector(d_RstX, d_RstY, d_RstZ, d_R, pixelcount); // R
cosAngle_VA_AB(d_RstX, d_RstY, d_RstZ, d_demsloper_x, d_demsloper_y, d_demsloper_z, d_localangle, pixelcount); // 擁窒⻌扞褒
CUDAmake_VectorA_B(antpx, antpy, antpz, d_dem_x, d_dem_y, d_dem_z, d_RstX, d_RstY, d_RstZ, pixelcount); // Rst = Rs - Rt; 華醱-> 硌砃
CUDANorm_Vector(d_RstX, d_RstY, d_RstZ, d_R, pixelcount); // R
CUDAcosAngle_VA_AB(d_RstX, d_RstY, d_RstZ, d_demsloper_x, d_demsloper_y, d_demsloper_z, d_localangle, pixelcount); // 擁窒⻌扞褒
SatelliteAntDirectNormal(d_RstX, d_RstY, d_RstZ,
antXaxisX, antXaxisY, antXaxisZ,
antYaxisX, antYaxisY, antYaxisZ,

View File

@ -7,6 +7,7 @@
#include <QProgressDialog>
#include <QMessageBox>
#include "GPUTool.cuh"
#include "GPUTBPImage.cuh"
void CreatePixelXYZ(std::shared_ptr<EchoL0Dataset> echoL0ds, QString outPixelXYZPath)
{
@ -400,9 +401,6 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
FreeCUDADevice(d_echoArr);
FreeCUDADevice(d_imgArr);
// ΚΝ·ΕGPU±δΑΏ
}
@ -464,8 +462,6 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
/**
ErrorCode TBPImageAlgCls::ProcessCPU(long num_thread)
{
omp_set_num_threads(num_thread);