RasterProcessTool/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cu

806 lines
24 KiB
Plaintext
Raw Normal View History

#include <cuda.h>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "BaseConstVariable.h"
#include "GPURFPC.cuh"
#ifdef __CUDANVCC___
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> ****************************************************************************************************************************/
2025-03-28 11:06:59 +00:00
__device__ double GPU_getSigma0dB(CUDASigmaParam param, double theta)
{
return param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6);
}
extern __device__ float GPU_getSigma0dB(CUDASigmaParam param, float theta) {//<2F><><EFBFBD><EFBFBD>ֵ
return param.p1 + param.p2 * expf(-param.p3 * theta) + param.p4 * cosf(param.p5 * theta + param.p6);;
}
__device__ double GPU_getSigma0dB_params(
2025-03-24 02:36:46 +00:00
const double p1, const double p2, const double p3, const double p4, const double p5, const double p6,
double theta) {//<2F><><EFBFBD><EFBFBD>ֵ
2025-03-05 09:10:21 +00:00
return p1 + p2 * exp(-p3 * theta) + p4 * cos(p5 * theta + p6);
}
extern __device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal(
2025-01-14 01:25:23 +00:00
double RstX, double RstY, double RstZ,
2025-01-28 03:01:01 +00:00
double AntXaxisX, double AntXaxisY, double AntXaxisZ,
double AntYaxisX, double AntYaxisY, double AntYaxisZ,
double AntZaxisX, double AntZaxisY, double AntZaxisZ,
double AntDirectX, double AntDirectY, double AntDirectZ
) {
CUDAVectorEllipsoidal result{ 0,0,-1 };
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2025-01-14 01:25:23 +00:00
double Xst = -1 * RstX; // <20><><EFBFBD><EFBFBD> --> <20><><EFBFBD><EFBFBD>
double Yst = -1 * RstY;
double Zst = -1 * RstZ;
// <20><>һ<EFBFBD><D2BB>
2025-01-14 01:25:23 +00:00
double RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
double AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
double AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
double AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);
double Rx = Xst / RstNorm;
double Ry = Yst / RstNorm;
double Rz = Zst / RstNorm;
double Xx = AntXaxisX / AntXaxisNorm;
double Xy = AntXaxisY / AntXaxisNorm;
double Xz = AntXaxisZ / AntXaxisNorm;
double Yx = AntYaxisX / AntYaxisNorm;
double Yy = AntYaxisY / AntYaxisNorm;
double Yz = AntYaxisZ / AntYaxisNorm;
double Zx = AntZaxisX / AntZaxisNorm;
double Zy = AntZaxisY / AntZaxisNorm;
double Zz = AntZaxisZ / AntZaxisNorm;
double Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
double Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
double Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
2025-01-28 03:01:01 +00:00
// <20><><EFBFBD><EFBFBD>theta <20><> phi
2025-01-14 01:25:23 +00:00
double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // <20><><EFBFBD><EFBFBD> pho
2025-01-28 03:01:01 +00:00
double Zn = Zant / Norm;
2025-03-24 02:36:46 +00:00
double ThetaAnt = (-1 > Zn) ? PI : (Zn > 1 ? 0 : acos(Zn));// acosf(Zant / Norm); // theta <20><> Z<><5A><EFBFBD>ļн<C4BC>
double PhiAnt = abs(Xant) < PRECISIONTOLERANCE ? 0 : atanf(Yant / Xant); // -pi/2 ~pi/2
if (abs(Yant) < PRECISIONTOLERANCE) { // X<><58><EFBFBD><EFBFBD>
PhiAnt = 0;
}
else if (abs(Xant) < PRECISIONTOLERANCE) { // Y<><59><EFBFBD>ϣ<EFBFBD>ԭ<EFBFBD><D4AD>
if (Yant > 0) {
PhiAnt = PI / 2;
}
else {
PhiAnt = -PI / 2;
}
}
else if (Xant < 0) {
if (Yant > 0) {
PhiAnt = PI + PhiAnt;
}
else {
PhiAnt = -PI + PhiAnt;
}
}
else { // Xant>0 X <20><><EFBFBD><EFBFBD>
}
if (isnan(PhiAnt)) {
printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt);
}
2025-01-02 16:15:08 +00:00
result.theta = ThetaAnt;
result.phi = PhiAnt;
result.Rho = Norm;
return result;
}
extern __device__ double GPU_BillerInterpAntPattern(double* antpattern,
2025-01-14 01:25:23 +00:00
double starttheta, double startphi, double dtheta, double dphi,
long thetapoints, long phipoints,
2025-01-14 01:25:23 +00:00
double searththeta, double searchphi) {
double stheta = searththeta;
double sphi = searchphi;
if (stheta > 90) {
return 0;
}
else {}
2025-01-14 01:25:23 +00:00
double pthetaid = (stheta - starttheta) / dtheta;//
double pphiid = (sphi - startphi) / dphi;
long lasttheta = floorf(pthetaid);
long nextTheta = lasttheta + 1;
long lastphi = floorf(pphiid);
long nextPhi = lastphi + 1;
2025-01-02 16:15:08 +00:00
if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
{
return 0;
}
else {
2025-01-14 01:25:23 +00:00
double x = stheta;
double y = sphi;
2025-01-14 01:25:23 +00:00
double x1 = lasttheta * dtheta + starttheta;
double x2 = nextTheta * dtheta + starttheta;
double y1 = lastphi * dphi + startphi;
double y2 = nextPhi * dphi + startphi;
2025-01-14 01:25:23 +00:00
double z11 = antpattern[lasttheta * phipoints + lastphi];
double z12 = antpattern[lasttheta * phipoints + nextPhi];
double z21 = antpattern[nextTheta * phipoints + lastphi];
double z22 = antpattern[nextTheta * phipoints + nextPhi];
//z11 = powf(10, z11 / 10); // dB-> <20><><EFBFBD><EFBFBD>
//z12 = powf(10, z12 / 10);
//z21 = powf(10, z21 / 10);
//z22 = powf(10, z22 / 10);
2025-01-14 01:25:23 +00:00
double GainValue = (z11 * (x2 - x) * (y2 - y)
+ z21 * (x - x1) * (y2 - y)
+ z12 * (x2 - x) * (y - y1)
+ z22 * (x - x1) * (y - y1));
GainValue = GainValue / ((x2 - x1) * (y2 - y1));
return GainValue;
}
}
2025-01-15 03:35:48 +00:00
2025-03-24 02:36:46 +00:00
/* <20>˺<EFBFBD><CBBA><EFBFBD> ****************************************************************************************************************************/
2025-01-20 07:49:54 +00:00
// <20><><EFBFBD><EFBFBD>ÿ<EFBFBD><C3BF>
__global__ void CUDA_Kernel_Computer_R_amp(
double* antX, double* antY, double* antZ,
double* antXaxisX, double* antXaxisY, double* antXaxisZ,
double* antYaxisX, double* antYaxisY, double* antYaxisZ,
double* antZaxisX, double* antZaxisY, double* antZaxisZ,
double* antDirectX, double* antDirectY, double* antDirectZ,
long PRFCount, // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2025-03-24 02:36:46 +00:00
double* targetX, double* targetY, double* targetZ, long* demCls,
double* demSlopeX, double* demSlopeY, double* demSlopeZ,
long startPosId, long pixelcount,
2025-01-20 07:49:54 +00:00
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
double Pt,
double refPhaseRange,
double* TransAntpattern,
double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints,
double* ReceiveAntpattern,
double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,
double maxTransAntPatternValue, double maxReceiveAntPatternValue,
2025-01-20 07:49:54 +00:00
double NearR, double FarR,
float* d_temp_R, float* d_temp_amps// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
) {
long idx = blockIdx.x * blockDim.x + threadIdx.x; // <20><>ȡ<EFBFBD><C8A1>ǰ<EFBFBD><C7B0><EFBFBD>̱߳<DFB3><CCB1><EFBFBD>
long prfId = idx / SHAREMEMORY_FLOAT_HALF;
2025-03-24 02:36:46 +00:00
long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // <20><>ǰ<EFBFBD>̶߳<DFB3>Ӧ<EFBFBD><D3A6>Ӱ<EFBFBD><D3B0><EFBFBD><EFBFBD>
if (prfId < PRFCount && posId < pixelcount) {
double RstX = antX[prfId] - targetX[posId]; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʸ<EFBFBD><CAB8>
double RstY = antY[prfId] - targetY[posId];
double RstZ = antZ[prfId] - targetZ[posId];
2025-01-20 07:49:54 +00:00
double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // ʸ<><CAB8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if (RstR<NearR || RstR>FarR) {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
2025-01-20 07:49:54 +00:00
}
else {
double slopeX = demSlopeX[posId];
double slopeY = demSlopeY[posId];
double slopeZ = demSlopeZ[posId];
2025-03-24 02:36:46 +00:00
2025-01-20 07:49:54 +00:00
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
if (abs(slopR - 0) > 1e-3) {
double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
2025-03-24 02:36:46 +00:00
double localangle = acos(dotAB / (RstR * slopR));
2025-03-24 02:36:46 +00:00
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
else {}
double ampGain = 0;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߷<EFBFBD><DFB7><EFBFBD>ͼָ<CDBC><D6B8>
CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal(
RstX, RstY, RstZ,
antXaxisX[prfId], antXaxisY[prfId], antXaxisZ[prfId],
antYaxisX[prfId], antYaxisY[prfId], antYaxisZ[prfId],
antZaxisX[prfId], antZaxisY[prfId], antZaxisZ[prfId],
antDirectX[prfId], antDirectY[prfId], antDirectZ[prfId]
);
antVector.theta = antVector.theta * r2d;
antVector.phi = antVector.phi * r2d;
//printf("theta: %f , phi: %f \n", antVector.theta, antVector.phi);
if (antVector.Rho > 0) {
2025-02-24 10:53:35 +00:00
//double TansantPatternGain = GPU_BillerInterpAntPattern(
// TransAntpattern,
// Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
// antVector.theta, antVector.phi);
//double antPatternGain = GPU_BillerInterpAntPattern(
// ReceiveAntpattern,
// Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
// antVector.theta, antVector.phi);
2025-01-20 07:49:54 +00:00
double sigma0 = 0;
{
long clsid = demCls[posId];
//printf("clsid=%d\n", clsid);
CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
2025-03-24 02:36:46 +00:00
if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
abs(tempsigma.p2) < PRECISIONTOLERANCE &&
abs(tempsigma.p3) < PRECISIONTOLERANCE &&
abs(tempsigma.p4) < PRECISIONTOLERANCE &&
abs(tempsigma.p5) < PRECISIONTOLERANCE &&
abs(tempsigma.p6) < PRECISIONTOLERANCE
) {
sigma0 = 0;
}
else {
double sigma = GPU_getSigma0dB(tempsigma, localangle);
sigma0 = powf(10.0, sigma / 10.0);
}
2025-01-20 07:49:54 +00:00
}
2025-02-24 10:53:35 +00:00
//ampGain = TansantPatternGain * antPatternGain;
ampGain = 1;
2025-02-01 11:58:12 +00:00
//if (10 * log10(ampGain / maxReceiveAntPatternValue / maxTransAntPatternValue) < -3) { // С<><D0A1>-3dB
// d_temp_R[idx] = 0;
// d_temp_amps[idx] = 0;
// return;
//}
//else {}
2025-01-14 01:25:23 +00:00
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // <20><><EFBFBD><EFBFBD>ǿ<EFBFBD><C7BF>
2025-03-24 02:36:46 +00:00
float temp_amp = float(ampGain * Pt * sigma0);
float temp_R = float(RstR - refPhaseRange);
2025-03-24 02:36:46 +00:00
if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) {
printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R);
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
else {}
2025-03-24 02:36:46 +00:00
d_temp_amps[idx] = temp_amp;
d_temp_R[idx] = temp_R;
return;
}
else {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
2025-01-20 07:49:54 +00:00
}
else {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
2025-01-20 07:49:54 +00:00
}
2025-01-16 02:12:08 +00:00
}
2025-01-20 07:49:54 +00:00
}
}
__global__ void CUDA_Kernel_Computer_echo(
float* d_temp_R, float* d_temp_amps, long posNum,
2025-03-24 02:36:46 +00:00
float f0, float dfreq,
long FreqPoints, // <20><>ǰƵ<C7B0>ʵķֿ<C4B7>
long maxfreqnum, // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ
2025-01-20 07:49:54 +00:00
float* d_temp_echo_real, float* d_temp_echo_imag,
long temp_PRF_Count
2025-03-24 02:36:46 +00:00
) {
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // ע<><D7A2>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>block_size <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͬ<EFBFBD>ڴ<EFBFBD>
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
2025-01-20 07:49:54 +00:00
long tid = threadIdx.x;
2025-01-20 07:49:54 +00:00
long bid = blockIdx.x;
long idx = bid * blockDim.x + tid;
long prfId = idx / FreqPoints; // <20><><EFBFBD><EFBFBD>ID
long fId = idx % FreqPoints;//Ƶ<><C6B5>ID
2025-01-20 07:49:54 +00:00
long psid = 0;
long pixelId = 0;
for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF
psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii;
pixelId = prfId * posNum + psid; //
if (psid < posNum) {
s_R[psid] = d_temp_R[pixelId];
s_amp[psid] = d_temp_amps[pixelId];
}
else {
s_R[psid] = 0;
s_amp[psid] = 0;
}
2025-01-20 07:49:54 +00:00
}
__syncthreads(); // ȷ<><C8B7><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݶ<EFBFBD><DDB6>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
2025-03-24 02:36:46 +00:00
long echo_ID = prfId * maxfreqnum + fId; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ӧ<EFBFBD>Ļز<C4BB>λ<EFBFBD><CEBB>
2025-01-20 07:49:54 +00:00
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
float temp_real = 0;
float temp_imag = 0;
float temp_phi = 0;
float temp_amp = 0;
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
2025-01-20 07:49:54 +00:00
temp_phi = s_R[dataid] * factorjTemp;
temp_amp = s_amp[dataid];
temp_real += (temp_amp * cosf(temp_phi));
temp_imag += (temp_amp * sinf(temp_phi));
//if (dataid > 5000) {
// printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]);
//}
if (isnan(temp_phi) || isnan(temp_amp) || isnan(temp_real) || isnan(temp_imag)
|| isinf(temp_phi) || isinf(temp_amp) || isinf(temp_real) || isinf(temp_imag)
) {
2025-03-24 02:36:46 +00:00
printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, temp_real, temp_imag);
}
2025-03-24 02:36:46 +00:00
2025-01-16 02:12:08 +00:00
}
//printf("echo_ID=%d; ehodata=(%f,%f)\n", echo_ID, temp_real, temp_imag);
//printf("(%f %f %f) ", factorjTemp, s_amp[0], s_R[0]);
d_temp_echo_real[echo_ID] += /*d_temp_echo_real[echo_ID] + */temp_real;
d_temp_echo_imag[echo_ID] += /*d_temp_echo_imag[echo_ID] +*/ temp_imag;
2025-01-15 03:35:48 +00:00
}
2025-01-20 07:49:54 +00:00
}
/**
* <20>ֿ<EFBFBD><D6BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2025-01-20 07:49:54 +00:00
*/
void CUDA_RFPC_MainProcess(
double* antX, double* antY, double* antZ,
double* antXaxisX, double* antXaxisY, double* antXaxisZ,
double* antYaxisX, double* antYaxisY, double* antYaxisZ,
double* antZaxisX, double* antZaxisY, double* antZaxisZ,
double* antDirectX, double* antDirectY, double* antDirectZ,
2025-03-24 02:36:46 +00:00
long PRFCount, long FreqNum,
float f0, float dfreq,
double Pt,
double refPhaseRange,
double* TransAntpattern,
double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints,
double* ReceiveAntpattern,
double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,
double maxTransAntPatternValue, double maxReceiveAntPatternValue,
double NearR, double FarR,
double* targetX, double* targetY, double* targetZ, long* demCls, long TargetNumber,
2025-03-24 02:36:46 +00:00
double* demSlopeX, double* demSlopeY, double* demSlopeZ,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
float* out_echoReal, float* out_echoImag,
float* d_temp_R, float* d_temp_amp
)
2025-01-20 07:49:54 +00:00
{
long BLOCK_FREQNUM = NextBlockPad(FreqNum, BLOCK_SIZE); // 256*freqBlockID
2025-01-20 07:49:54 +00:00
long cudaBlocknum = 0;
long freqpoints = BLOCK_FREQNUM;
printf("freqpoints:%d\n", freqpoints);
long process = 0;
for (long sTi = 0; sTi < TargetNumber; sTi = sTi + SHAREMEMORY_FLOAT_HALF) {
cudaBlocknum = (PRFCount * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDA_Kernel_Computer_R_amp << <cudaBlocknum, BLOCK_SIZE >> > (
antX, antY, antZ,
antXaxisX, antXaxisY, antXaxisZ,
antYaxisX, antYaxisY, antYaxisZ,
antZaxisX, antZaxisY, antZaxisZ,
antDirectX, antDirectY, antDirectZ,
2025-03-24 02:36:46 +00:00
PRFCount,
targetX, targetY, targetZ, demCls,
demSlopeX, demSlopeY, demSlopeZ,
sTi, TargetNumber,
sigma0Paramslist, sigmaparamslistlen,
Pt,
refPhaseRange,
TransAntpattern,
Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
ReceiveAntpattern,
Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
maxTransAntPatternValue, maxReceiveAntPatternValue,
NearR, FarR,
d_temp_R, d_temp_amp// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
);
2025-01-20 07:49:54 +00:00
PrintLasterError("CUDA_Kernel_Computer_R_amp");
2025-03-24 02:36:46 +00:00
cudaBlocknum = (PRFCount * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDA_Kernel_Computer_echo << <cudaBlocknum, BLOCK_SIZE >> > (
d_temp_R, d_temp_amp, SHAREMEMORY_FLOAT_HALF,
2025-03-24 02:36:46 +00:00
f0, dfreq,
freqpoints, FreqNum,
out_echoReal, out_echoImag,
PRFCount
);
PrintLasterError("CUDA_Kernel_Computer_echo");
2025-03-24 02:36:46 +00:00
if ((sTi * 100.0 / TargetNumber) - process >= 1) {
process = sTi * 100.0 / TargetNumber;
2025-03-24 02:36:46 +00:00
PRINT("TargetID [%f]: %d / %d finished\n", sTi * 100.0 / TargetNumber, sTi, TargetNumber);
2025-01-20 07:49:54 +00:00
}
2025-01-15 03:35:48 +00:00
}
2025-01-20 07:49:54 +00:00
2025-01-14 01:25:23 +00:00
cudaDeviceSynchronize();
2025-01-14 01:25:23 +00:00
}
2025-03-23 08:01:28 +00:00
2025-03-25 16:27:22 +00:00
2025-03-23 08:01:28 +00:00
/* <20>˺<EFBFBD><CBBA><EFBFBD> ****************************************************************************************************************************/
2025-03-26 02:14:42 +00:00
inline double SincTarg(double x) {
return 1 - (x * x / 6) + (x * x * x * x / 120) - (x * x * x * x * x * x / 5040);
2025-03-25 16:27:22 +00:00
}
2025-03-24 02:36:46 +00:00
__global__ void Kernel_Computer_R_amp_NoAntPattern(
SateState* antlist,
long PRFCount,
GoalState* goallist,
long demLen,
long startPosId, long pixelcount,
CUDASigmaParam sigma0Params,
double Pt,
double refPhaseRange,
double NearR, double FarR,
2025-03-25 16:27:22 +00:00
double maxGain,double GainWeight,
2025-03-24 02:36:46 +00:00
double* d_temp_R, double* d_temp_amps// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
) {
2025-03-24 06:33:21 +00:00
long long idx = blockIdx.x * blockDim.x + threadIdx.x; // <20><>ȡ<EFBFBD><C8A1>ǰ<EFBFBD><C7B0><EFBFBD>̱߳<DFB3><CCB1><EFBFBD>
long long prfId = idx / SHAREMEMORY_FLOAT_HALF;
long long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // <20><>ǰ<EFBFBD>̶߳<DFB3>Ӧ<EFBFBD><D3A6>Ӱ<EFBFBD><D3B0><EFBFBD><EFBFBD>
2025-03-24 02:36:46 +00:00
2025-03-25 15:54:47 +00:00
//if (prfId > 20000) {
// printf("prfid %d,PRFCount : %d\n", prfId, PRFCount);
//}
2025-03-24 02:36:46 +00:00
if (prfId < PRFCount && posId < pixelcount) {
2025-03-25 16:27:22 +00:00
SateState antp = antlist[prfId];
GoalState gp = goallist[posId];
double RstX = antp.Px - gp.Tx; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʸ<EFBFBD><CAB8> T->S
double RstY = antp.Py - gp.Ty;
double RstZ = antp.Pz - gp.Tz;
2025-03-24 02:36:46 +00:00
double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // ʸ<><CAB8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2025-03-25 15:54:47 +00:00
2025-03-25 16:27:22 +00:00
2025-03-25 15:54:47 +00:00
2025-03-24 02:36:46 +00:00
if (RstR<NearR || RstR>FarR) {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
else {
2025-03-28 11:06:59 +00:00
RstX = RstX / RstR;
RstY = RstY / RstR;
RstZ = RstZ / RstR;
2025-03-25 16:27:22 +00:00
double slopeX = gp.TsX;
double slopeY = gp.TsY;
double slopeZ = gp.TsZ;
2025-03-24 02:36:46 +00:00
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
if (abs(slopR - 0) > 1e-3) {
2025-03-28 11:06:59 +00:00
float dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
float localangle = acosf(dotAB / ( slopR));
2025-03-24 02:36:46 +00:00
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
else {}
2025-03-25 16:27:22 +00:00
// <20><><EFBFBD><EFBFBD>б<EFBFBD><D0B1>˥<EFBFBD><CBA5>
2025-03-28 11:06:59 +00:00
float antDirectR = sqrtf(antp.antDirectX * antp.antDirectX
2025-03-25 16:27:22 +00:00
+ antp.antDirectY * antp.antDirectY
+ antp.antDirectZ * antp.antDirectZ);
2025-03-28 11:06:59 +00:00
float diectAngle = -1*(RstX*antp.antDirectX+
2025-03-25 16:27:22 +00:00
RstY*antp.antDirectY+
2025-03-28 11:06:59 +00:00
RstZ*antp.antDirectZ) / (antDirectR );
2025-03-25 16:27:22 +00:00
2025-03-26 02:14:42 +00:00
diectAngle = acosf(diectAngle);// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2025-03-28 11:06:59 +00:00
//if (diectAngle * r2d <3) {
// printf("idx: %d, antAngle : %e \n", prfId, diectAngle * r2d);
//}
2025-03-26 02:14:42 +00:00
diectAngle = diectAngle * GainWeight;
2025-03-28 11:06:59 +00:00
float ampGain = 2 * maxGain * (1 - (diectAngle * diectAngle / 6)
2025-03-26 02:14:42 +00:00
+ (diectAngle * diectAngle * diectAngle * diectAngle / 120)
2025-03-28 11:06:59 +00:00
- (diectAngle * diectAngle * diectAngle * diectAngle * diectAngle * diectAngle / 5040)); //dB
ampGain = powf(10.0, ampGain / 10.0);
2025-03-25 16:27:22 +00:00
2025-03-24 02:36:46 +00:00
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // <20><><EFBFBD><EFBFBD>ǿ<EFBFBD><C7BF>
double sigma = GPU_getSigma0dB(sigma0Params, localangle);
sigma = powf(10.0, sigma / 10.0);
double temp_amp = double(ampGain * Pt * sigma);
double temp_R = double(RstR - refPhaseRange);
bool isNan = !(isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R));
d_temp_amps[idx] = temp_amp * isNan;
d_temp_R[idx] = temp_R * isNan;
2025-03-25 15:54:47 +00:00
2025-03-24 02:36:46 +00:00
return;
}
}
}
}
2025-03-25 16:27:22 +00:00
2025-03-24 02:36:46 +00:00
__global__ void CUDA_Kernel_Computer_echo_NoAntPattern(
double* d_temp_R, double* d_temp_amps, long posNum,
double f0, double dfreq,
long FreqPoints, // <20><>ǰƵ<C7B0>ʵķֿ<C4B7>
long maxfreqnum, // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ
cuComplex* echodata,
long temp_PRF_Count
) {
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // ע<><D7A2>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>block_size <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͬ<EFBFBD>ڴ<EFBFBD>
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
2025-03-24 06:33:21 +00:00
long long tid = threadIdx.x;
long long bid = blockIdx.x;
long long idx = bid * blockDim.x + tid;
long long prfId = idx / FreqPoints; // <20><><EFBFBD><EFBFBD>ID
long long fId = idx % FreqPoints;//Ƶ<><C6B5>ID
2025-03-24 02:36:46 +00:00
2025-03-24 06:33:21 +00:00
long long psid = 0;
long long pixelId = 0;
2025-03-24 02:36:46 +00:00
for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF
psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii;
pixelId = prfId * posNum + psid; //
if (psid < posNum) {
s_R[psid] = d_temp_R[pixelId];
s_amp[psid] = d_temp_amps[pixelId];
}
else {
s_R[psid] = 0;
s_amp[psid] = 0;
}
}
__syncthreads(); // ȷ<><C8B7><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݶ<EFBFBD><DDB6>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2025-03-25 15:54:47 +00:00
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
2025-03-24 02:36:46 +00:00
long echo_ID = prfId * maxfreqnum + fId; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ӧ<EFBFBD>Ļز<C4BB>λ<EFBFBD><CEBB>
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
cuComplex echo = make_cuComplex(0, 0);
float temp_phi = 0;
float temp_amp = 0;
2025-03-25 03:23:14 +00:00
2025-03-24 02:36:46 +00:00
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
temp_phi = s_R[dataid] * factorjTemp;
2025-03-25 15:54:47 +00:00
temp_amp = s_amp[dataid];
2025-03-24 02:36:46 +00:00
echo.x += (temp_amp * cosf(temp_phi));
echo.y += (temp_amp * sinf(temp_phi));
//if (dataid > 5000) {
// printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]);
//}
if (isnan(temp_phi) || isnan(temp_amp) || isnan(echo.x) || isnan(echo.y)
|| isinf(temp_phi) || isinf(temp_amp) || isinf(echo.x) || isinf(echo.y)
) {
printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, echo.x, echo.y);
}
}
2025-03-25 15:54:47 +00:00
2025-03-24 02:36:46 +00:00
echodata[echo_ID] = cuCaddf(echodata[echo_ID], echo);
}
}
2025-03-23 08:01:28 +00:00
__global__ void CUDA_Kernel_RFPC(
SateState* antlist,
long PRFCount, long Freqcount, // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
GoalState* goallist,
long demLen,
double StartFreqGHz, double FreqStep,
double refPhaseRange,
double NearR, double FarR,
CUDASigmaParam clsSigma0,
cuComplex* echodata
2025-03-24 02:36:46 +00:00
)
2025-03-23 08:01:28 +00:00
{
__shared__ GoalState Ts[SHAREMEMORY_DEM_STEP];
2025-03-23 10:07:40 +00:00
size_t threadid = threadIdx.x;
2025-03-24 02:36:46 +00:00
2025-03-23 10:07:40 +00:00
size_t idx = blockIdx.x * blockDim.x + threadIdx.x; // <20><>ȡ<EFBFBD><C8A1>ǰ<EFBFBD><C7B0><EFBFBD>̱߳<DFB3><CCB1><EFBFBD>
size_t prfid = floorf(idx / Freqcount);
size_t freqid = idx % Freqcount;
2025-03-23 08:01:28 +00:00
// printf("%d,%d ",prfid,freqid);
if (prfid < PRFCount && freqid < Freqcount)
{
SateState antPos = antlist[prfid];
double factorjTemp = RFPCPIDIVLIGHT * (StartFreqGHz + freqid * FreqStep);
double Tx = 0;
double Ty = 0;
double Tz = 0;
double R = 0;
double incAngle = 0;
double echo_real = 0;
double echo_imag = 0;
cuComplex echo = make_cuComplex(0, 0);
2025-03-23 10:49:27 +00:00
2025-03-24 02:36:46 +00:00
for (long tid = 0; tid < demLen; tid++) {
GoalState p = goallist[tid];
Tx = p.Tx;
Ty = p.Ty;
Tz = p.Tz;
2025-03-23 10:49:27 +00:00
2025-03-24 02:36:46 +00:00
Tx = antPos.Px - Tx; // T->P
Ty = antPos.Py - Ty;
Tz = antPos.Pz - Tz;
2025-03-23 10:49:27 +00:00
2025-03-24 02:36:46 +00:00
R = sqrt(Tx * Tx + Ty * Ty + Tz * Tz);
bool isNearFar = (R < NearR || R > FarR) && ((abs(p.TsX) > 1000) || (abs(p.TsY) > 1000) || (abs(p.TsZ) > 1000));
2025-03-23 10:49:27 +00:00
2025-03-24 02:36:46 +00:00
incAngle = sqrt(p.TsX * p.TsX + p.TsY * p.TsY + p.TsZ * p.TsZ);
incAngle = acos((Tx * p.TsX + Ty * p.TsY + Tz * p.TsZ) / (R * incAngle));
incAngle = GPU_getSigma0dB_params(clsSigma0.p1, clsSigma0.p2, clsSigma0.p3, clsSigma0.p4, clsSigma0.p5, clsSigma0.p6, incAngle); // sigma
incAngle = pow(10.0, incAngle / 10.0); // amp
incAngle = incAngle / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); //
2025-03-23 10:49:27 +00:00
2025-03-24 02:36:46 +00:00
R = (R - refPhaseRange);
R = factorjTemp * R;
2025-03-23 10:49:27 +00:00
2025-03-24 02:36:46 +00:00
echo_real = incAngle * cos(R) * isNearFar;
echo_imag = incAngle * sin(R) * isNearFar;
echo.x = echo.x + echo_real;
echo.y = echo.y + echo_imag;
2025-03-23 10:49:27 +00:00
2025-03-24 02:36:46 +00:00
if (idx == 0 && tid % (10 * SHAREMEMORY_DEM_STEP) == 0) {
printf("Idx:%d , TsID: %d, TSCOUNT: %d \n", idx, tid, demLen);
}
2025-03-23 08:01:28 +00:00
}
echodata[idx] = cuCaddf(echodata[idx], echo);
}
}
/** <20>ֿ鴦<D6BF><E9B4A6> ****************************************************************************************************************/
2025-03-24 02:36:46 +00:00
extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
2025-03-23 08:01:28 +00:00
{
2025-03-23 10:07:40 +00:00
size_t pixelcount = task.prfNum * task.freqNum;
size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
2025-03-25 15:54:47 +00:00
printf("computer pixelcount goalnum gridsize blocksize prfnum %zu,%zu ,%zu,%d ,%d \n", pixelcount, task.targetnum, grid_size, BLOCK_SIZE,task.prfNum);
2025-03-24 02:36:46 +00:00
double* d_R = (double*)mallocCUDADevice(task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(double), devid);
double* d_amps = (double*)mallocCUDADevice(task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(double), devid);
long BLOCK_FREQNUM = NextBlockPad(task.freqNum, BLOCK_SIZE); // 256*freqBlockID
long cudaBlocknum = 0;
long freqpoints = BLOCK_FREQNUM;
2025-03-25 03:39:28 +00:00
2025-03-24 02:36:46 +00:00
printf("freqpoints:%d\n", freqpoints);
2025-03-25 15:54:47 +00:00
long prfcount = task.prfNum;
2025-03-24 02:36:46 +00:00
long process = 0;
for (long sTi = 0; sTi < task.targetnum; sTi = sTi + SHAREMEMORY_FLOAT_HALF) {
cudaBlocknum = (task.prfNum * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE;
Kernel_Computer_R_amp_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> >(
task.antlist,
2025-03-25 15:54:47 +00:00
prfcount,
2025-03-24 02:36:46 +00:00
task.goallist,
task.targetnum,
sTi, task.targetnum,
task.sigma0_cls,
2025-03-25 04:34:35 +00:00
task.Pt,
2025-03-24 02:36:46 +00:00
task.Rref,
task.Rnear, task.Rfar,
2025-03-25 16:27:22 +00:00
task.maxGain,task.GainWeight,
2025-03-24 02:36:46 +00:00
d_R, d_amps// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2025-03-23 08:01:28 +00:00
);
2025-03-24 02:36:46 +00:00
PrintLasterError("CUDA_Kernel_Computer_R_amp");
2025-03-24 06:03:54 +00:00
cudaDeviceSynchronize();
2025-03-24 02:36:46 +00:00
cudaBlocknum = (task.prfNum * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDA_Kernel_Computer_echo_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> > (
d_R, d_amps, SHAREMEMORY_FLOAT_HALF,
task.startFreq, task.stepFreq,
freqpoints, task.freqNum,
task.d_echoData,
task.prfNum
);
PrintLasterError("CUDA_Kernel_Computer_echo");
2025-03-24 06:03:54 +00:00
cudaDeviceSynchronize();
2025-03-24 02:36:46 +00:00
if ((sTi * 100.0 / task.targetnum) - process >= 1) {
process = sTi * 100.0 / task.targetnum;
2025-03-24 06:33:21 +00:00
PRINT("TargetID [%f]: %d / %d finished %d\n", sTi * 100.0 / task.targetnum, sTi, task.targetnum,devid);
2025-03-24 02:36:46 +00:00
}
}
2025-03-23 08:01:28 +00:00
cudaDeviceSynchronize();
2025-03-24 02:36:46 +00:00
FreeCUDADevice(d_R);
FreeCUDADevice(d_amps);
2025-03-23 08:01:28 +00:00
}
#endif