RasterProcessTool/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC_single.cu

469 lines
15 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#include <cuda.h>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "BaseConstVariable.h"
#include "GPURFPC_single.cuh"
#ifdef __CUDANVCC___
/* 机器函数 ****************************************************************************************************************************/
extern __device__ float GPU_getSigma0dB_single(CUDASigmaParam_single param, float theta) {//线性值
float sigma = param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6);
return sigma;
}
extern __device__ float GPU_getSigma0dB_single(const float p1, const float p2, const float p3, const float p4, const float p5, const float p6, float theta)
{
return p1 + p2 * expf(-p3 * theta) + p4 * cosf(p5 * theta + p6);
}
extern __device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal_single(
float RstX, float RstY, float RstZ,
float AntXaxisX, float AntXaxisY, float AntXaxisZ,
float AntYaxisX, float AntYaxisY, float AntYaxisZ,
float AntZaxisX, float AntZaxisY, float AntZaxisZ,
float AntDirectX, float AntDirectY, float AntDirectZ
) {
CUDAVectorEllipsoidal result{ 0,0,-1 };
// 求解天线增益
float Xst = -1 * RstX; // 卫星 --> 地面
float Yst = -1 * RstY;
float Zst = -1 * RstZ;
// 归一化
float RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
float AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
float AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
float AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);
float Rx = Xst / RstNorm;
float Ry = Yst / RstNorm;
float Rz = Zst / RstNorm;
float Xx = AntXaxisX / AntXaxisNorm;
float Xy = AntXaxisY / AntXaxisNorm;
float Xz = AntXaxisZ / AntXaxisNorm;
float Yx = AntYaxisX / AntYaxisNorm;
float Yy = AntYaxisY / AntYaxisNorm;
float Yz = AntYaxisZ / AntYaxisNorm;
float Zx = AntZaxisX / AntZaxisNorm;
float Zy = AntZaxisY / AntZaxisNorm;
float Zz = AntZaxisZ / AntZaxisNorm;
float Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
float Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
float Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
// 计算theta 与 phi
float Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
float Zn = Zant / Norm;
float ThetaAnt = ( - 1 > Zn) ? PI : (Zn > 1 ? 0 : acos(Zn));// acosf(Zant / Norm); // theta 与 Z轴的夹角
float PhiAnt = abs(Xant)<PRECISIONTOLERANCE ?0: atanf(Yant / Xant); // -pi/2 ~pi/2
if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上
PhiAnt = 0;
}
else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上原点
if (Yant > 0) {
PhiAnt = PI / 2;
}
else {
PhiAnt = -PI / 2;
}
}
else if (Xant < 0) {
if (Yant > 0) {
PhiAnt = PI + PhiAnt;
}
else {
PhiAnt = -PI + PhiAnt;
}
}
else { // Xant>0 X 正轴
}
if (isnan(PhiAnt)) {
printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt);
}
result.theta = ThetaAnt;
result.phi = PhiAnt;
result.Rho = Norm;
return result;
}
extern __device__ float GPU_BillerInterpAntPattern_single(float* antpattern,
float starttheta, float startphi, float dtheta, float dphi,
long thetapoints, long phipoints,
float searththeta, float searchphi) {
float stheta = searththeta;
float sphi = searchphi;
if (stheta > 90) {
return 0;
}
else {}
float pthetaid = (stheta - starttheta) / dtheta;//
float pphiid = (sphi - startphi) / dphi;
long lasttheta = floorf(pthetaid);
long nextTheta = lasttheta + 1;
long lastphi = floorf(pphiid);
long nextPhi = lastphi + 1;
if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
{
return 0;
}
else {
float x = stheta;
float y = sphi;
float x1 = lasttheta * dtheta + starttheta;
float x2 = nextTheta * dtheta + starttheta;
float y1 = lastphi * dphi + startphi;
float y2 = nextPhi * dphi + startphi;
float z11 = antpattern[lasttheta * phipoints + lastphi];
float z12 = antpattern[lasttheta * phipoints + nextPhi];
float z21 = antpattern[nextTheta * phipoints + lastphi];
float z22 = antpattern[nextTheta * phipoints + nextPhi];
//z11 = powf(10, z11 / 10); // dB-> 线性
//z12 = powf(10, z12 / 10);
//z21 = powf(10, z21 / 10);
//z22 = powf(10, z22 / 10);
float GainValue = (z11 * (x2 - x) * (y2 - y)
+ z21 * (x - x1) * (y2 - y)
+ z12 * (x2 - x) * (y - y1)
+ z22 * (x - x1) * (y - y1));
GainValue = GainValue / ((x2 - x1) * (y2 - y1));
return GainValue;
}
}
/* 核函数 ****************************************************************************************************************************/
// 计算每块
__global__ void CUDA_Kernel_Computer_R_amp_single(
float* antX, float* antY, float* antZ,
float* antXaxisX, float* antXaxisY, float* antXaxisZ,
float* antYaxisX, float* antYaxisY, float* antYaxisZ,
float* antZaxisX, float* antZaxisY, float* antZaxisZ,
float* antDirectX, float* antDirectY, float* antDirectZ,
long PRFCount, // 整体的脉冲数,
float* targetX, float* targetY, float* targetZ, long* demCls,
float* demSlopeX, float* demSlopeY, float* demSlopeZ ,
long startPosId, long pixelcount,
CUDASigmaParam_single* sigma0Paramslist, long sigmaparamslistlen,
float Pt,
float refPhaseRange,
float* TransAntpattern,
float Transtarttheta, float Transstartphi, float Transdtheta, float Transdphi, int Transthetapoints, int Transphipoints,
float* ReceiveAntpattern,
float Receivestarttheta, float Receivestartphi, float Receivedtheta, float Receivedphi, int Receivethetapoints, int Receivephipoints,
float maxTransAntPatternValue, float maxReceiveAntPatternValue,
float NearR, float FarR,
float* d_temp_R, float* d_temp_amps// 计算输出
) {
long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
long prfId = idx / SHAREMEMORY_FLOAT_HALF;
long posId = idx % SHAREMEMORY_FLOAT_HALF+ startPosId; // 当前线程对应的影像点
if (prfId < PRFCount && posId < pixelcount) {
float RstX = antX[prfId] - targetX[posId]; // 计算坐标矢量
float RstY = antY[prfId] - targetY[posId];
float RstZ = antZ[prfId] - targetZ[posId];
float RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离
if (RstR<NearR || RstR>FarR) {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
else {
float slopeX = demSlopeX[posId];
float slopeY = demSlopeY[posId];
float slopeZ = demSlopeZ[posId];
float slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
if (abs(slopR - 0) > 1e-3) {
float dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
float localangle = acos(dotAB / (RstR * slopR));
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2|| isnan(localangle)) {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
else {}
float ampGain = 0;
// 求解天线方向图指向
CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal_single(
RstX, RstY, RstZ,
antXaxisX[prfId], antXaxisY[prfId], antXaxisZ[prfId],
antYaxisX[prfId], antYaxisY[prfId], antYaxisZ[prfId],
antZaxisX[prfId], antZaxisY[prfId], antZaxisZ[prfId],
antDirectX[prfId], antDirectY[prfId], antDirectZ[prfId]
);
antVector.theta = antVector.theta * r2d;
antVector.phi = antVector.phi * r2d;
//printf("theta: %f , phi: %f \n", antVector.theta, antVector.phi);
if (antVector.Rho > 0) {
//float TansantPatternGain = GPU_BillerInterpAntPattern(
// TransAntpattern,
// Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
// antVector.theta, antVector.phi);
//float antPatternGain = GPU_BillerInterpAntPattern(
// ReceiveAntpattern,
// Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
// antVector.theta, antVector.phi);
float sigma0 = 0;
{
long clsid = demCls[posId];
//printf("clsid=%d\n", clsid);
CUDASigmaParam_single tempsigma = sigma0Paramslist[clsid];
if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
abs(tempsigma.p2) < PRECISIONTOLERANCE &&
abs(tempsigma.p3) < PRECISIONTOLERANCE &&
abs(tempsigma.p4) < PRECISIONTOLERANCE &&
abs(tempsigma.p5) < PRECISIONTOLERANCE &&
abs(tempsigma.p6) < PRECISIONTOLERANCE
) {
sigma0 = 0;
}
else {
float sigma = GPU_getSigma0dB_single(tempsigma, localangle);
sigma0 = powf(10.0, sigma / 10.0);
}
}
//ampGain = TansantPatternGain * antPatternGain;
ampGain = 1;
//if (10 * log10(ampGain / maxReceiveAntPatternValue / maxTransAntPatternValue) < -3) { // 小于-3dB
// d_temp_R[idx] = 0;
// d_temp_amps[idx] = 0;
// return;
//}
//else {}
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度
float temp_amp = float(ampGain * Pt * sigma0);
float temp_R = float(RstR - refPhaseRange);
if (isnan(temp_amp) || isnan(temp_R)|| isinf(temp_amp) || isinf(temp_R)) {
printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R);
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
else {}
d_temp_amps[idx] = temp_amp;
d_temp_R[idx] = temp_R;
return;
}
else {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
}
else {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
}
}
}
__global__ void CUDA_Kernel_Computer_echo_single(
float* d_temp_R, float* d_temp_amps, long posNum,
float f0, float dfreq,
long FreqPoints, // 当前频率的分块
long maxfreqnum, // 最大脉冲值
float* d_temp_echo_real, float* d_temp_echo_imag,
long temp_PRF_Count
) {
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
long tid = threadIdx.x;
long bid = blockIdx.x;
long idx = bid * blockDim.x + tid;
long prfId = idx / FreqPoints; // 脉冲ID
long fId = idx % FreqPoints;//频率ID
long psid = 0;
long pixelId = 0;
for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF
psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii;
pixelId = prfId * posNum + psid; //
if (psid < posNum) {
s_R[psid] = d_temp_R[pixelId];
s_amp[psid] = d_temp_amps[pixelId];
}
else {
s_R[psid] = 0;
s_amp[psid] = 0;
}
}
__syncthreads(); // 确定所有待处理数据都已经进入程序中
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
float temp_real = 0;
float temp_imag = 0;
float temp_phi = 0;
float temp_amp = 0;
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
temp_phi = s_R[dataid] * factorjTemp;
temp_amp = s_amp[dataid];
temp_real += (temp_amp * cosf(temp_phi));
temp_imag += (temp_amp * sinf(temp_phi));
//if (dataid > 5000) {
// printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]);
//}
if (isnan(temp_phi) || isnan(temp_amp) || isnan(temp_real) || isnan(temp_imag)
|| isinf(temp_phi) || isinf(temp_amp) || isinf(temp_real) || isinf(temp_imag)
) {
printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n",temp_amp,temp_phi,temp_real,temp_imag);
}
}
//printf("echo_ID=%d; ehodata=(%f,%f)\n", echo_ID, temp_real, temp_imag);
//printf("(%f %f %f) ", factorjTemp, s_amp[0], s_R[0]);
d_temp_echo_real[echo_ID] += /*d_temp_echo_real[echo_ID] + */temp_real;
d_temp_echo_imag[echo_ID] += /*d_temp_echo_imag[echo_ID] +*/ temp_imag;
}
}
/**
* 分块计算主流程
*/
void CUDA_RFPC_MainProcess_single(
float* antX, float* antY, float* antZ,
float* antXaxisX, float* antXaxisY, float* antXaxisZ,
float* antYaxisX, float* antYaxisY, float* antYaxisZ,
float* antZaxisX, float* antZaxisY, float* antZaxisZ,
float* antDirectX, float* antDirectY, float* antDirectZ,
long PRFCount, long FreqNum,
float f0, float dfreq,
float Pt,
float refPhaseRange,
float* TransAntpattern,
float Transtarttheta, float Transstartphi, float Transdtheta, float Transdphi, int Transthetapoints, int Transphipoints,
float* ReceiveAntpattern,
float Receivestarttheta, float Receivestartphi, float Receivedtheta, float Receivedphi, int Receivethetapoints, int Receivephipoints,
float maxTransAntPatternValue, float maxReceiveAntPatternValue,
float NearR, float FarR,
float* targetX, float* targetY, float* targetZ, long* demCls, long TargetNumber,
float* demSlopeX, float* demSlopeY, float* demSlopeZ,
CUDASigmaParam_single* sigma0Paramslist, long sigmaparamslistlen,
float* out_echoReal, float* out_echoImag,
float* d_temp_R, float* d_temp_amp
)
{
long BLOCK_FREQNUM = NextBlockPad(FreqNum, BLOCK_SIZE); // 256*freqBlockID
long cudaBlocknum = 0;
long freqpoints = BLOCK_FREQNUM;
printf("freqpoints:%d\n", freqpoints);
long process = 0;
for (long sTi = 0; sTi < TargetNumber; sTi = sTi + SHAREMEMORY_FLOAT_HALF) {
cudaBlocknum = (PRFCount * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDA_Kernel_Computer_R_amp_single << <cudaBlocknum, BLOCK_SIZE >> > (
antX, antY, antZ,
antXaxisX, antXaxisY, antXaxisZ,
antYaxisX, antYaxisY, antYaxisZ,
antZaxisX, antZaxisY, antZaxisZ,
antDirectX, antDirectY, antDirectZ,
PRFCount,
targetX, targetY, targetZ, demCls,
demSlopeX, demSlopeY, demSlopeZ,
sTi, TargetNumber,
sigma0Paramslist, sigmaparamslistlen,
Pt,
refPhaseRange,
TransAntpattern,
Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
ReceiveAntpattern,
Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
maxTransAntPatternValue, maxReceiveAntPatternValue,
NearR, FarR,
d_temp_R, d_temp_amp// 计算输出
);
PrintLasterError("CUDA_Kernel_Computer_R_amp");
cudaBlocknum = (PRFCount * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDA_Kernel_Computer_echo_single << <cudaBlocknum, BLOCK_SIZE >> > (
d_temp_R, d_temp_amp, SHAREMEMORY_FLOAT_HALF,
f0, dfreq,
freqpoints, FreqNum,
out_echoReal, out_echoImag,
PRFCount
);
PrintLasterError("CUDA_Kernel_Computer_echo");
if ((sTi * 100.0 / TargetNumber ) - process >= 1) {
process = sTi * 100.0 / TargetNumber;
PRINT("TargetID [%f]: %d / %d finished\n", sTi*100.0/ TargetNumber,sTi, TargetNumber);
}
}
cudaDeviceSynchronize();
}
#endif