RasterProcessTool/GPUTool/GPURFPC.cu

887 lines
30 KiB
Plaintext
Raw Normal View History

2025-01-15 03:35:48 +00:00
#include <time.h>
#include <iostream>
#include <memory>
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "BaseConstVariable.h"
#include "GPURFPC.cuh"
2025-01-15 03:35:48 +00:00
#ifdef __CUDANVCC___
2025-01-14 01:25:23 +00:00
__device__ double GPU_getSigma0dB(CUDASigmaParam param, double theta) {//<2F><><EFBFBD><EFBFBD>ֵ
double sigma = param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6);
return sigma;
}
__device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal(
2025-01-14 01:25:23 +00:00
double RstX, double RstY, double RstZ,
double antXaxisX, double antXaxisY, double antXaxisZ,
double antYaxisX, double antYaxisY, double antYaxisZ,
double antZaxisX, double antZaxisY, double antZaxisZ,
double antDirectX, double antDirectY, double antDirectZ
) {
CUDAVectorEllipsoidal result{ 0,0,-1 };
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2025-01-14 01:25:23 +00:00
double Xst = -1 * RstX; // <20><><EFBFBD><EFBFBD> --> <20><><EFBFBD><EFBFBD>
double Yst = -1 * RstY;
double Zst = -1 * RstZ;
double AntXaxisX = antXaxisX;
double AntXaxisY = antXaxisY;
double AntXaxisZ = antXaxisZ;
double AntYaxisX = antYaxisX;
double AntYaxisY = antYaxisY;
double AntYaxisZ = antYaxisZ;
double AntZaxisX = antZaxisX;
double AntZaxisY = antZaxisY;
double AntZaxisZ = antZaxisZ;
// <20><>һ<EFBFBD><D2BB>
2025-01-14 01:25:23 +00:00
double RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
double AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
double AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
double AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);
double Rx = Xst / RstNorm;
double Ry = Yst / RstNorm;
double Rz = Zst / RstNorm;
double Xx = AntXaxisX / AntXaxisNorm;
double Xy = AntXaxisY / AntXaxisNorm;
double Xz = AntXaxisZ / AntXaxisNorm;
double Yx = AntYaxisX / AntYaxisNorm;
double Yy = AntYaxisY / AntYaxisNorm;
double Yz = AntYaxisZ / AntYaxisNorm;
double Zx = AntZaxisX / AntZaxisNorm;
double Zy = AntZaxisY / AntZaxisNorm;
double Zz = AntZaxisZ / AntZaxisNorm;
double Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
double Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
double Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
// <20><><EFBFBD><EFBFBD>theta <20><> phi
2025-01-14 01:25:23 +00:00
double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // <20><><EFBFBD><EFBFBD> pho
double ThetaAnt = acosf(Zant / Norm); // theta <20><> Z<><5A><EFBFBD>ļн<C4BC>
double PhiAnt = atanf(Yant / Xant); // -pi/2 ~pi/2
if (abs(Yant) < PRECISIONTOLERANCE) { // X<><58><EFBFBD><EFBFBD>
PhiAnt = 0;
}
else if (abs(Xant) < PRECISIONTOLERANCE) { // Y<><59><EFBFBD>ϣ<EFBFBD>ԭ<EFBFBD><D4AD>
if (Yant > 0) {
PhiAnt = PI / 2;
}
else {
PhiAnt = -PI / 2;
}
}
else if (Xant < 0) {
if (Yant > 0) {
PhiAnt = PI + PhiAnt;
}
else {
PhiAnt = -PI + PhiAnt;
}
}
else { // Xant>0 X <20><><EFBFBD><EFBFBD>
}
if (isnan(PhiAnt)) {
printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt);
}
2025-01-02 16:15:08 +00:00
result.theta = ThetaAnt;
result.phi = PhiAnt;
result.Rho = Norm;
return result;
}
2025-01-14 01:25:23 +00:00
__device__ double GPU_BillerInterpAntPattern(double* antpattern,
double starttheta, double startphi, double dtheta, double dphi,
long thetapoints, long phipoints,
2025-01-14 01:25:23 +00:00
double searththeta, double searchphi) {
double stheta = searththeta;
double sphi = searchphi;
if (stheta > 90) {
return 0;
}
else {}
2025-01-14 01:25:23 +00:00
double pthetaid = (stheta - starttheta) / dtheta;//
double pphiid = (sphi - startphi) / dphi;
long lasttheta = floorf(pthetaid);
long nextTheta = lasttheta + 1;
long lastphi = floorf(pphiid);
long nextPhi = lastphi + 1;
2025-01-02 16:15:08 +00:00
if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
{
return 0;
}
else {
2025-01-14 01:25:23 +00:00
double x = stheta;
double y = sphi;
2025-01-14 01:25:23 +00:00
double x1 = lasttheta * dtheta + starttheta;
double x2 = nextTheta * dtheta + starttheta;
double y1 = lastphi * dphi + startphi;
double y2 = nextPhi * dphi + startphi;
2025-01-14 01:25:23 +00:00
double z11 = antpattern[lasttheta * phipoints + lastphi];
double z12 = antpattern[lasttheta * phipoints + nextPhi];
double z21 = antpattern[nextTheta * phipoints + lastphi];
double z22 = antpattern[nextTheta * phipoints + nextPhi];
//z11 = powf(10, z11 / 10); // dB-> <20><><EFBFBD><EFBFBD>
//z12 = powf(10, z12 / 10);
//z21 = powf(10, z21 / 10);
//z22 = powf(10, z22 / 10);
2025-01-14 01:25:23 +00:00
double GainValue = (z11 * (x2 - x) * (y2 - y)
+ z21 * (x - x1) * (y2 - y)
+ z12 * (x2 - x) * (y - y1)
+ z22 * (x - x1) * (y - y1));
GainValue = GainValue / ((x2 - x1) * (y2 - y1));
return GainValue;
}
}
2025-01-14 01:25:23 +00:00
__device__ cuComplex GPU_calculationEcho(double sigma0, double TransAnt, double ReciveAnt,
double localangle, double R, double slopeangle, double Pt, double lamda) {
double amp = Pt * TransAnt * ReciveAnt;
amp = amp * sigma0;
amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); // <20><><EFBFBD><EFBFBD>ǿ<EFBFBD><C7BF>
2025-01-14 01:25:23 +00:00
double phi = (-4 * LAMP_CUDA_PI / lamda) * R;
cuComplex echophi = make_cuComplex(0, phi);
cuComplex echophiexp = cuCexpf(echophi);
2025-01-02 16:15:08 +00:00
cuComplex echo = make_cuComplex(echophiexp.x * amp, echophiexp.y * amp);
return echo;
}
2025-01-14 01:25:23 +00:00
__global__ void CUDA_SatelliteAntDirectNormal(double* RstX, double* RstY, double* RstZ,
double antXaxisX, double antXaxisY, double antXaxisZ,
double antYaxisX, double antYaxisY, double antYaxisZ,
double antZaxisX, double antZaxisY, double antZaxisZ,
double antDirectX, double antDirectY, double antDirectZ,
double* thetaAnt, double* phiAnt
, long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
2025-01-14 01:25:23 +00:00
double Xst = -1 * RstX[idx]; // <20><><EFBFBD><EFBFBD> --> <20><><EFBFBD><EFBFBD>
double Yst = -1 * RstY[idx];
double Zst = -1 * RstZ[idx];
double AntXaxisX = antXaxisX;
double AntXaxisY = antXaxisY;
double AntXaxisZ = antXaxisZ;
double AntYaxisX = antYaxisX;
double AntYaxisY = antYaxisY;
double AntYaxisZ = antYaxisZ;
double AntZaxisX = antZaxisX;
double AntZaxisY = antZaxisY;
double AntZaxisZ = antZaxisZ;
// <20><>һ<EFBFBD><D2BB>
2025-01-14 01:25:23 +00:00
double RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
double AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
double AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
double AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);
double Rx = Xst / RstNorm;
double Ry = Yst / RstNorm;
double Rz = Zst / RstNorm;
double Xx = AntXaxisX / AntXaxisNorm;
double Xy = AntXaxisY / AntXaxisNorm;
double Xz = AntXaxisZ / AntXaxisNorm;
double Yx = AntYaxisX / AntYaxisNorm;
double Yy = AntYaxisY / AntYaxisNorm;
double Yz = AntYaxisZ / AntYaxisNorm;
double Zx = AntZaxisX / AntZaxisNorm;
double Zy = AntZaxisY / AntZaxisNorm;
double Zz = AntZaxisZ / AntZaxisNorm;
double Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
double Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
double Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
2025-01-02 16:15:08 +00:00
// <20><><EFBFBD><EFBFBD>theta <20><> phi
2025-01-14 01:25:23 +00:00
double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // <20><><EFBFBD><EFBFBD> pho
double ThetaAnt = acosf(Zant / Norm); // theta <20><> Z<><5A><EFBFBD>ļн<C4BC>
double PhiAnt = atanf(Yant / Xant); // -pi/2 ~pi/2
if (abs(Yant) < PRECISIONTOLERANCE) { // X<><58><EFBFBD><EFBFBD>
PhiAnt = 0;
}
else if (abs(Xant) < PRECISIONTOLERANCE) { // Y<><59><EFBFBD>ϣ<EFBFBD>ԭ<EFBFBD><D4AD>
if (Yant > 0) {
PhiAnt = PI / 2;
}
else {
PhiAnt = -PI / 2;
}
}
else if (Xant < 0) {
if (Yant > 0) {
PhiAnt = PI + PhiAnt;
}
else {
2025-01-02 16:15:08 +00:00
PhiAnt = -PI + PhiAnt;
}
}
else { // Xant>0 X <20><><EFBFBD><EFBFBD>
}
if (isnan(PhiAnt)) {
2025-01-02 16:15:08 +00:00
printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt);
}
//if (abs(ThetaAnt - 0) < PRECISIONTOLERANCE) {
// PhiAnt = 0;
//}
//else {}
2025-01-02 16:15:08 +00:00
thetaAnt[idx] = ThetaAnt * r2d;
phiAnt[idx] = PhiAnt * r2d;
//printf("Rst=[%f,%f,%f];AntXaxis = [%f, %f, %f];AntYaxis=[%f,%f,%f];AntZaxis=[%f,%f,%f];phiAnt=%f;thetaAnt=%f;\n", Xst, Yst, Zst
// , AntXaxisX, AntXaxisY, AntXaxisZ
// , AntYaxisX, AntYaxisY, AntYaxisZ
// , AntZaxisX, AntZaxisY, AntZaxisZ
// , phiAnt[idx]
// , thetaAnt[idx]
//);
}
}
2025-01-14 01:25:23 +00:00
__global__ void CUDA_BillerInterpAntPattern(double* antpattern,
double starttheta, double startphi, double dtheta, double dphi,
long thetapoints, long phipoints,
2025-01-14 01:25:23 +00:00
double* searththeta, double* searchphi, double* searchantpattern,
long len) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
2025-01-14 01:25:23 +00:00
double stheta = searththeta[idx];
double sphi = searchphi[idx];
double pthetaid = (stheta - starttheta) / dtheta;//
double pphiid = (sphi - startphi) / dphi;
long lasttheta = floorf(pthetaid);
long nextTheta = lasttheta + 1;
long lastphi = floorf(pphiid);
long nextPhi = lastphi + 1;
if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
{
searchantpattern[idx] = 0;
}
else {
2025-01-14 01:25:23 +00:00
double x = stheta;
double y = sphi;
2025-01-14 01:25:23 +00:00
double x1 = lasttheta * dtheta + starttheta;
double x2 = nextTheta * dtheta + starttheta;
double y1 = lastphi * dphi + startphi;
double y2 = nextPhi * dphi + startphi;
2025-01-14 01:25:23 +00:00
double z11 = antpattern[lasttheta * phipoints + lastphi];
double z12 = antpattern[lasttheta * phipoints + nextPhi];
double z21 = antpattern[nextTheta * phipoints + lastphi];
double z22 = antpattern[nextTheta * phipoints + nextPhi];
z11 = powf(10, z11 / 10);
z12 = powf(10, z12 / 10);
z21 = powf(10, z21 / 10);
z22 = powf(10, z22 / 10);
2025-01-14 01:25:23 +00:00
double GainValue = (z11 * (x2 - x) * (y2 - y)
+ z21 * (x - x1) * (y2 - y)
+ z12 * (x2 - x) * (y - y1)
+ z22 * (x - x1) * (y - y1));
GainValue = GainValue / ((x2 - x1) * (y2 - y1));
searchantpattern[idx] = GainValue;
}
}
}
2025-01-14 01:25:23 +00:00
__global__ void CUDA_AntPatternInterpGain(double* anttheta, double* antphi, double* gain,
double* antpattern, double starttheta, double startphi, double dtheta, double dphi, int thetapoints, int phipoints, long len) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
2025-01-02 16:15:08 +00:00
if (idx < len) {
2025-01-14 01:25:23 +00:00
double temptheta = anttheta[idx];
double tempphi = antphi[idx];
double antPatternGain = GPU_BillerInterpAntPattern(antpattern,
starttheta, startphi, dtheta, dphi, thetapoints, phipoints,
2025-01-02 16:15:08 +00:00
temptheta, tempphi);
gain[idx] = antPatternGain;
}
}
__global__ void CUDA_InterpSigma(
2025-01-14 01:25:23 +00:00
long* demcls, double* sigmaAmp, double* localanglearr, long len,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
long clsid = demcls[idx];
2025-01-14 01:25:23 +00:00
double localangle = localanglearr[idx];
CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
2025-01-02 16:15:08 +00:00
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2) {
sigmaAmp[idx] = 0;
}
else {}
2025-01-02 16:15:08 +00:00
if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
abs(tempsigma.p2) < PRECISIONTOLERANCE &&
abs(tempsigma.p3) < PRECISIONTOLERANCE &&
2025-01-02 16:15:08 +00:00
abs(tempsigma.p4) < PRECISIONTOLERANCE &&
abs(tempsigma.p5) < PRECISIONTOLERANCE &&
abs(tempsigma.p6) < PRECISIONTOLERANCE
) {
sigmaAmp[idx] = 0;
}
else {
2025-01-14 01:25:23 +00:00
double sigma = GPU_getSigma0dB(tempsigma, localangle);
sigma = powf(10.0, sigma / 10.0);// <20><><EFBFBD><EFBFBD>ɢ<EFBFBD><C9A2>ϵ<EFBFBD><CFB5>
//printf("cls:%d;localangle=%f;sigma0=%f;\n", clsid, localangle, sigma);
sigmaAmp[idx] = sigma;
}
}
}
2025-01-15 03:35:48 +00:00
__global__ void CUDAKernel_RFPC_Computer_R_Gain(
double antX, double antY, double antZ, // <20><><EFBFBD>ߵ<EFBFBD><DFB5><EFBFBD><EFBFBD><EFBFBD>
double* targetX, double* targetY, double* targetZ, long len, // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long* demCls,
double* demSlopeX, double* demSlopeY, double* demSlopeZ, // <20>ر<EFBFBD><D8B1><EFBFBD>ʸ<EFBFBD><CAB8>
double antXaxisX, double antXaxisY, double antXaxisZ, // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϵ<EFBFBD><CFB5>X<EFBFBD><58>
double antYaxisX, double antYaxisY, double antYaxisZ,// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϵ<EFBFBD><CFB5>Y<EFBFBD><59>
double antZaxisX, double antZaxisY, double antZaxisZ,// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϵ<EFBFBD><CFB5>Z<EFBFBD><5A>
double antDirectX, double antDirectY, double antDirectZ,// <20><><EFBFBD>ߵ<EFBFBD>ָ<EFBFBD><D6B8>
double Pt,// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
double refPhaseRange,
2025-01-16 02:12:08 +00:00
double* TransAntpattern, double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints, // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߷<EFBFBD><DFB7><EFBFBD>ͼ
double* ReceiveAntpattern, double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߷<EFBFBD><DFB7><EFBFBD>ͼ
2025-01-15 03:35:48 +00:00
double NearR, double FarR, // <20><><EFBFBD>Χ
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,// <20><>ֵͼ
2025-01-15 03:57:07 +00:00
float* outR, // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
float* outAmp
2025-01-15 03:35:48 +00:00
) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < len) {
double tx = targetX[idx];
double ty = targetY[idx];
double tz = targetZ[idx];
double RstX = antX - tx; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʸ<EFBFBD><CAB8>
double RstY = antY - ty;
double RstZ = antZ - tz;
double slopeX = demSlopeX[idx];
double slopeY = demSlopeY[idx];
double slopeZ = demSlopeZ[idx];
double RstR2 = RstX * RstX + RstY * RstY + RstZ * RstZ;
double RstR = sqrt(RstR2); // ʸ<><CAB8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2025-01-16 02:12:08 +00:00
//printf("idx=%d;antX=%f;antY=%f;antZ=%f;targetX=%f;targetY=%f;targetZ=%f;RstR=%.6f;diffR=%.6f;\n", idx,antX,antY,antZ,targetX,targetY,targetZ,RstR, RstR - 9.010858499003178e+05);
2025-01-15 03:35:48 +00:00
if (RstR<NearR || RstR>FarR) {
2025-01-15 10:48:43 +00:00
outAmp[idx] = 0;
outR[idx] = 0;
2025-01-15 03:35:48 +00:00
}
else {
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
double localangle = acosf(dotAB / (RstR * slopR)); // <20>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
double ampGain = 0;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߷<EFBFBD><DFB7><EFBFBD>ͼָ<CDBC><D6B8>
CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal(
RstX, RstY, RstZ,
antXaxisX, antXaxisY, antXaxisZ,
antYaxisX, antYaxisY, antYaxisZ,
antZaxisX, antZaxisY, antZaxisZ,
antDirectX, antDirectY, antDirectZ
);
if (antVector.Rho > 0) {
// <20><><EFBFBD><EFBFBD><E4B7BD>ͼ
double temptheta = antVector.theta * r2d;
double tempphi = antVector.phi * r2d;
double TansantPatternGain =
GPU_BillerInterpAntPattern(
TransAntpattern,
2025-01-16 02:12:08 +00:00
Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
2025-01-15 03:35:48 +00:00
temptheta, tempphi);
// <20><><EFBFBD>շ<EFBFBD><D5B7><EFBFBD>ͼ
double antPatternGain = GPU_BillerInterpAntPattern(
ReceiveAntpattern,
2025-01-16 02:12:08 +00:00
Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
2025-01-15 03:35:48 +00:00
temptheta, tempphi);
// <20><><EFBFBD><EFBFBD>
double sigma0 = 0;
{
long clsid = demCls[idx];
//printf("clsid=%d\n", clsid);
CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2) {
sigma0 = 0;
}
else {}
if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
abs(tempsigma.p2) < PRECISIONTOLERANCE &&
abs(tempsigma.p3) < PRECISIONTOLERANCE &&
abs(tempsigma.p4) < PRECISIONTOLERANCE &&
abs(tempsigma.p5) < PRECISIONTOLERANCE &&
abs(tempsigma.p6) < PRECISIONTOLERANCE
) {
sigma0 = 0;
}
else {
double sigma = GPU_getSigma0dB(tempsigma, localangle);
sigma0 = powf(10.0, sigma / 10.0);// <20><><EFBFBD><EFBFBD>ɢ<EFBFBD><C9A2>ϵ<EFBFBD><CFB5>
}
}
ampGain = TansantPatternGain * antPatternGain;
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // <20><><EFBFBD><EFBFBD>ǿ<EFBFBD><C7BF>
2025-01-15 03:57:07 +00:00
outAmp[idx] = float(ampGain * Pt * sigma0);
outR[idx] = float(RstR - refPhaseRange);
2025-01-16 02:12:08 +00:00
//printf("%f-%f=%f\n", RstR , refPhaseRange, outR[idx]);
2025-01-15 03:35:48 +00:00
}
else {
}
}
}
}
2025-01-14 01:25:23 +00:00
2025-01-15 10:48:43 +00:00
__global__ void CUDAKernel_PRF_GeneratorEcho(float* Rarr, float* ampArr,
long pixelcount,
float f0, float dfreq,long freqnum,
2025-01-16 02:12:08 +00:00
float* echo_real, float* echo_imag, long prfid)
2025-01-14 01:25:23 +00:00
{
//// <20>ٶ<EFBFBD><D9B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4><EFBFBD>СΪ49152 byte
2025-01-15 03:35:48 +00:00
//// <20>ٶ<EFBFBD>ÿ<EFBFBD><C3BF>Block <20>߳<EFBFBD><DFB3><EFBFBD><EFBFBD><EFBFBD>СΪ 32
2025-01-15 03:57:07 +00:00
__shared__ float s_R[GPU_SHARE_MEMORY]; // <20><><EFBFBD><EFBFBD> 32*12 * 8= 49.2kb
__shared__ float s_Amp[GPU_SHARE_MEMORY]; // <20><><EFBFBD><EFBFBD> 3072 * 8= 49.2kb 49.2*2 = 98.4 < 100 KB
2025-01-14 01:25:23 +00:00
2025-01-15 10:48:43 +00:00
int idx = blockIdx.x * blockDim.x + threadIdx.x;; // <20><>ȡ<EFBFBD><C8A1>ǰ<EFBFBD><C7B0><EFBFBD>̱߳<DFB3><CCB1><EFBFBD>
int tid = threadIdx.x;// <20><>ȡ <20><><EFBFBD><EFBFBD> block <20>е<EFBFBD><D0B5>߳<EFBFBD>ID
2025-01-14 01:25:23 +00:00
2025-01-15 10:48:43 +00:00
const long startPIX = idx * GPU_SHARE_STEP; // <20><><EFBFBD><EFBFBD>ƫ<EFBFBD><C6AB>
2025-01-14 01:25:23 +00:00
int curthreadidx = 0;
for (long i = 0; i < GPU_SHARE_STEP; i++) {
2025-01-15 10:48:43 +00:00
curthreadidx = i * BLOCK_SIZE + tid; // <20><><EFBFBD><EFBFBD><EFBFBD>ֿ<EFBFBD>
2025-01-15 03:35:48 +00:00
s_R[curthreadidx] = (startPIX + i) < pixelcount ? Rarr[startPIX + i] : 0.0;
s_Amp[curthreadidx] = (startPIX + i) < pixelcount ? ampArr[startPIX + i] : 0.0;
2025-01-14 01:25:23 +00:00
}
2025-01-15 10:48:43 +00:00
2025-01-15 03:35:48 +00:00
//__syncthreads(); // ȷ<><C8B7><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݶ<EFBFBD><DDB6>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if (startPIX < pixelcount) { // <20><><EFBFBD>ڿ<EFBFBD><DABF>ܴ<EFBFBD><DCB4><EFBFBD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD>
2025-01-15 10:48:43 +00:00
float temp_real = 0;
float temp_imag = 0;
float factorjTemp = 0;
float temp_phi = 0;
float temp_amp = 0;
2025-01-15 03:35:48 +00:00
long dataid = 0;
curthreadidx = 0;
2025-01-15 10:48:43 +00:00
for (long fid = 0; fid < freqnum; fid++) {
factorjTemp = RFPCPIDIVLIGHT *(f0+ fid* dfreq);
2025-01-17 08:45:20 +00:00
//printf("factorj : %f , %f\n", factorjTemp, f0 + fid * dfreq);
temp_real = 0;
temp_imag = 0;
2025-01-15 10:48:43 +00:00
for (long j = 0; j < GPU_SHARE_STEP; j++) {
2025-01-17 08:45:20 +00:00
dataid = j * BLOCK_SIZE + tid;
2025-01-15 10:48:43 +00:00
temp_phi = s_R[dataid] * factorjTemp;
2025-01-20 07:49:54 +00:00
temp_amp = s_Amp[dataid];
2025-01-15 10:48:43 +00:00
2025-01-20 07:49:54 +00:00
temp_real += temp_amp* cosf(temp_phi);
temp_imag += temp_amp* sinf(temp_phi);
2025-01-15 03:35:48 +00:00
}
2025-01-15 10:48:43 +00:00
atomicAdd(&echo_real[prfid * freqnum + fid], temp_real); // <20><><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5>
atomicAdd(&echo_imag[prfid * freqnum + fid], temp_imag); // <20><><EFBFBD><EFBFBD><EFBFBD>鲿
2025-01-15 03:35:48 +00:00
}
}
2025-01-14 01:25:23 +00:00
}
2025-01-20 07:49:54 +00:00
// <20><><EFBFBD><EFBFBD>ÿ<EFBFBD><C3BF>
__global__ void CUDA_Kernel_Computer_R_amp(
double* antX, double* antY, double* antZ,
double* antXaxisX, double* antXaxisY, double* antXaxisZ,
double* antYaxisX, double* antYaxisY, double* antYaxisZ,
double* antZaxisX, double* antZaxisY, double* antZaxisZ,
double* antDirectX, double* antDirectY, double* antDirectZ,
long sPid, long PRFCount,
double* targetX, double* targetY, double* targetZ, long* demCls, long TargetNumber,
double* demSlopeX, double* demSlopeY, double* demSlopeZ,
long sPosId,long pixelcount,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
double Pt,
double refPhaseRange,
double* TransAntpattern,
double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints,
double* ReceiveAntpattern,
double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,
double NearR, double FarR,
long BlockPRFCount,
long BlockPostions, // ģ<><C4A3>
float* d_temp_R, float* d_temp_amps// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
) {
long idx = blockIdx.x * blockDim.x + threadIdx.x; // <20><>ȡ<EFBFBD><C8A1>ǰ<EFBFBD><C7B0><EFBFBD>̱߳<DFB3><CCB1><EFBFBD>
long prfId = idx / BlockPostions;
long posId = idx % BlockPostions;
long aprfId = sPid + prfId;
long aposId = posId;
if (prfId< BlockPRFCount&& posId < BlockPostions &&(sPid + prfId) < PRFCount) {
double RstX = antX[aprfId] - targetX[aposId]; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʸ<EFBFBD><CAB8>
double RstY = antY[aprfId] - targetY[aposId];
double RstZ = antZ[aprfId] - targetZ[aposId];
double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // ʸ<><CAB8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if (RstR<NearR || RstR>FarR) {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
}
else {
double slopeX = demSlopeX[aposId];
double slopeY = demSlopeY[aposId];
double slopeZ = demSlopeZ[aposId];
2025-01-14 01:25:23 +00:00
2025-01-20 07:49:54 +00:00
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
double localangle = acosf(dotAB / (RstR * slopR)); // <20>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2025-01-14 01:25:23 +00:00
2025-01-20 07:49:54 +00:00
double ampGain = 0;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߷<EFBFBD><DFB7><EFBFBD>ͼָ<CDBC><D6B8>
CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal(
RstX, RstY, RstZ,
antXaxisX[aprfId], antXaxisY[aprfId], antXaxisZ[aprfId],
antYaxisX[aprfId], antYaxisY[aprfId], antYaxisZ[aprfId],
antZaxisX[aprfId], antZaxisY[aprfId], antZaxisZ[aprfId],
antDirectX[aprfId], antDirectY[aprfId], antDirectZ[aprfId]
);
antVector.theta = antVector.theta * r2d;
antVector.phi = antVector.phi * r2d;
if (antVector.Rho > 0) {
double TansantPatternGain = GPU_BillerInterpAntPattern(
TransAntpattern,
Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
antVector.theta, antVector.phi);
double antPatternGain = GPU_BillerInterpAntPattern(
ReceiveAntpattern,
Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
antVector.theta, antVector.phi);
2025-01-20 07:49:54 +00:00
double sigma0 = 0;
{
long clsid = demCls[idx];
//printf("clsid=%d\n", clsid);
CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2) {
sigma0 = 0;
}
else {}
if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
abs(tempsigma.p2) < PRECISIONTOLERANCE &&
abs(tempsigma.p3) < PRECISIONTOLERANCE &&
abs(tempsigma.p4) < PRECISIONTOLERANCE &&
abs(tempsigma.p5) < PRECISIONTOLERANCE &&
abs(tempsigma.p6) < PRECISIONTOLERANCE
) {
sigma0 = 0;
}
else {
double sigma = GPU_getSigma0dB(tempsigma, localangle);
sigma0 = powf(10.0, sigma / 10.0);// <20><><EFBFBD><EFBFBD>ɢ<EFBFBD><C9A2>ϵ<EFBFBD><CFB5>
}
}
2025-01-14 01:25:23 +00:00
2025-01-20 07:49:54 +00:00
ampGain = TansantPatternGain * antPatternGain;
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // <20><><EFBFBD><EFBFBD>ǿ<EFBFBD><C7BF>
d_temp_amps[idx] = float(ampGain * Pt * sigma0);
d_temp_R[idx] = float(RstR - refPhaseRange);
2025-01-14 01:25:23 +00:00
2025-01-20 07:49:54 +00:00
}
else {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
}
2025-01-16 02:12:08 +00:00
}
2025-01-20 07:49:54 +00:00
}
}
__global__ void CUDA_Kernel_Computer_echo(
float* d_temp_R, float* d_temp_amps,long posNum,
float f0, float dfreq, long FreqPoints,long maxfreqnum,
float* d_temp_echo_real, float* d_temp_echo_imag,
long temp_PRF_Count
) {// * blockDim.x + threadIdx.x;
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF] ;
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF] ;
long tid = threadIdx.x;
long bid = blockIdx.x;
long idx= bid * blockDim.x + tid;
long psid = 0;
for (long ii = 0; ii < BLOCK_SIZE; ii++) {
psid = tid * BLOCK_SIZE + ii;
s_R[psid] = d_temp_R[psid];
s_amp[psid] = d_temp_amps[psid];
}
__syncthreads(); // ȷ<><C8B7><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݶ<EFBFBD><DDB6>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long prfId = idx / FreqPoints; // <20><><EFBFBD><EFBFBD>
long fId = idx % FreqPoints;// Ƶ<><C6B5>
if (fId < maxfreqnum&& prfId< temp_PRF_Count) {
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
float temp_real = 0;
float temp_imag = 0;
float temp_phi = 0;
float temp_amp = 0;
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
temp_phi = s_R[dataid] * factorjTemp;
temp_amp = s_amp[dataid];
temp_real += temp_amp * cosf(temp_phi);
temp_imag += temp_amp * sinf(temp_phi);
2025-01-16 02:12:08 +00:00
}
2025-01-20 07:49:54 +00:00
d_temp_echo_real[idx] += temp_real;
d_temp_echo_imag[idx] += temp_imag;
2025-01-15 03:35:48 +00:00
}
2025-01-20 07:49:54 +00:00
}
/**
*
*/
void CUDA_RFPC_MainProcess(
double* antX, double* antY, double* antZ,
double* antXaxisX, double* antXaxisY, double* antXaxisZ,
double* antYaxisX, double* antYaxisY, double* antYaxisZ,
double* antZaxisX, double* antZaxisY, double* antZaxisZ,
double* antDirectX, double* antDirectY, double* antDirectZ,
long PRFCount, long FreqNum,
float f0, float dfreq,
double Pt,
double refPhaseRange,
double* TransAntpattern,
double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints,
double* ReceiveAntpattern,
double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,
double NearR, double FarR,
double* targetX, double* targetY, double* targetZ, long* demCls, long TargetNumber,
double* demSlopeX, double* demSlopeY, double* demSlopeZ,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
float* out_echoReal, float* out_echoImag)
{
long TargetNumberPerIter = 1024;
long maxPositionNumber = (SHAREMEMORY_BYTE / 2 / sizeof(double));
long freqpoints = NextBlockPad(FreqNum, BLOCK_SIZE); // <20>ڴ<EFBFBD><DAB4>ֲ<EFBFBD><D6B2><EFBFBD><EFBFBD><EFBFBD>
long BlockPRFCount = getBlockRows(2000, freqpoints, sizeof(double));
long BlockTarlist = getBlockRows(2000, BlockPRFCount, sizeof(double));//1GB
BlockTarlist = BlockTarlist > SHAREMEMORY_FLOAT_HALF ? SHAREMEMORY_FLOAT_HALF : BlockTarlist;
2025-01-20 09:39:29 +00:00
double* h_tX = (double*)mallocCUDAHost(sizeof(double) * BlockTarlist);
double* h_tY = (double*)mallocCUDAHost(sizeof(double) * BlockTarlist);
double* h_tZ = (double*)mallocCUDAHost(sizeof(double) * BlockTarlist);
double* h_sloperX = (double*)mallocCUDAHost(sizeof(double) * BlockTarlist);
double* h_sloperY = (double*)mallocCUDAHost(sizeof(double) * BlockTarlist);
double* h_sloperZ = (double*)mallocCUDAHost(sizeof(double) * BlockTarlist);
2025-01-20 07:49:54 +00:00
long* h_cls = (long*)mallocCUDAHost(sizeof(long) * BlockTarlist);
double* d_tX = (double*)mallocCUDADevice(sizeof(double) * BlockTarlist);
double* d_tY = (double*)mallocCUDADevice(sizeof(double) * BlockTarlist);
double* d_tZ = (double*)mallocCUDADevice(sizeof(double) * BlockTarlist);
double* d_sloperX = (double*)mallocCUDADevice(sizeof(double) * BlockTarlist);
double* d_sloperY = (double*)mallocCUDADevice(sizeof(double) * BlockTarlist);
double* d_sloperZ = (double*)mallocCUDADevice(sizeof(double) * BlockTarlist);
long* d_cls = (long*)mallocCUDADevice(sizeof(long) * BlockTarlist);
float* d_temp_R = (float*)mallocCUDADevice(sizeof(float) * BlockPRFCount * BlockTarlist); //2GB <20><><EFBFBD><EFBFBD>
float* d_temp_amp = (float*)mallocCUDADevice(sizeof(float) * BlockPRFCount * BlockTarlist);//2GB ǿ<><C7BF>
float* d_temp_echo_real = (float*)mallocCUDADevice(sizeof(float) * BlockPRFCount * freqpoints);//2GB
float* d_temp_echo_imag = (float*)mallocCUDADevice(sizeof(float) * BlockPRFCount * freqpoints);//2GB
float* h_temp_echo_real = (float*)mallocCUDAHost(sizeof(float) * BlockPRFCount * freqpoints);//2GB
float* h_temp_echo_imag = (float*)mallocCUDAHost(sizeof(float) * BlockPRFCount * freqpoints);//2GB
2025-01-20 09:39:29 +00:00
2025-01-20 07:49:54 +00:00
long cudaBlocknum = 0;
for (long spid = 0; spid < PRFCount; spid = spid + BlockPRFCount) {
// step 0 ,<2C><>ʼ<EFBFBD><CABC>
{
cudaBlocknum = (BlockPRFCount * freqpoints + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDAKernel_MemsetBlock << < cudaBlocknum, BLOCK_SIZE >> > (d_temp_echo_real, 0, BlockPRFCount * freqpoints);
CUDAKernel_MemsetBlock << < cudaBlocknum, BLOCK_SIZE >> > (d_temp_echo_imag, 0, BlockPRFCount * freqpoints);
}
2025-01-20 09:39:29 +00:00
2025-01-20 07:49:54 +00:00
for (long sTi = 0; sTi < TargetNumber; sTi = sTi + BlockTarlist) {
// step 1,<2C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-> GPU<50>ڴ<EFBFBD>
{
for (long ii = 0; ii < BlockTarlist && (sTi + ii) < TargetNumber; ii++) {
h_tX[sTi + ii] = targetX[sTi + ii];
h_tY[sTi + ii] = targetY[sTi + ii];
h_tZ[sTi + ii] = targetZ[sTi + ii];
h_sloperX[sTi + ii] = demSlopeX[sTi + ii];
h_sloperY[sTi + ii] = demSlopeY[sTi + ii];
h_sloperZ[sTi + ii] = demSlopeZ[sTi + ii];
h_cls[sTi + ii] = demCls[sTi + ii];
}
2025-01-20 09:39:29 +00:00
PRINT("Host -> Device start ,BlockTarlist %d \n", BlockTarlist);
2025-01-20 07:49:54 +00:00
HostToDevice(h_tX, d_tX, sizeof(double) * BlockTarlist);
HostToDevice(h_tY, d_tY, sizeof(double) * BlockTarlist);
HostToDevice(h_tZ, d_tZ, sizeof(double) * BlockTarlist);
HostToDevice(h_sloperX, d_sloperX, sizeof(double) * BlockTarlist);
HostToDevice(h_sloperY, d_sloperY, sizeof(double) * BlockTarlist);
HostToDevice(h_sloperZ, d_sloperZ, sizeof(double) * BlockTarlist);
HostToDevice(h_cls, d_cls, sizeof(long) * BlockTarlist);
2025-01-20 09:39:29 +00:00
PRINT("Host -> Device finished \n");
2025-01-20 07:49:54 +00:00
}
2025-01-20 09:39:29 +00:00
2025-01-20 07:49:54 +00:00
// step 2 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
cudaBlocknum = (BlockPRFCount * BlockTarlist + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDA_Kernel_Computer_R_amp << <cudaBlocknum, BLOCK_SIZE >> > (
antX, antY, antZ,
antXaxisX, antXaxisY, antXaxisZ,
antYaxisX, antYaxisY, antYaxisZ,
antZaxisX, antZaxisY, antZaxisZ,
antDirectX, antDirectY, antDirectZ,
spid, PRFCount,
d_tX, d_tY, d_tZ, d_cls, BlockTarlist,
d_sloperX, d_sloperY, d_sloperZ,
sTi, TargetNumber,
sigma0Paramslist, sigmaparamslistlen,
Pt,
refPhaseRange,
TransAntpattern,
Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
ReceiveAntpattern,
Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
NearR, FarR,
BlockPRFCount,
BlockTarlist, // ģ<><C4A3>
d_temp_R, d_temp_amp// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
);
}
// step 3 <20><><EFBFBD><EFBFBD><EFBFBD>ز<EFBFBD>
{
cudaBlocknum = (BlockPRFCount * freqpoints + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDA_Kernel_Computer_echo << <cudaBlocknum, BLOCK_SIZE >> > (
d_temp_R, d_temp_amp, BlockTarlist,
f0, dfreq, freqpoints, FreqNum,
d_temp_echo_real, d_temp_echo_imag,
BlockPRFCount
);
}
2025-01-20 09:39:29 +00:00
PRINT("PRF %d / %d , TargetID: %d / %d \n", spid, PRFCount, sTi, sTi+ BlockTarlist);
2025-01-20 07:49:54 +00:00
}
DeviceToDevice(h_temp_echo_real, d_temp_echo_real, sizeof(float) * BlockPRFCount * freqpoints);
DeviceToDevice(h_temp_echo_imag, d_temp_echo_imag, sizeof(float) * BlockPRFCount * freqpoints);
for (long ii = 0; ii < BlockPRFCount ; ii++) {
for (long jj = 0; jj < FreqNum; ii++) {
out_echoReal[(ii+spid) * FreqNum + jj] += h_temp_echo_real[ii * FreqNum + jj];
out_echoImag[(ii+spid) * FreqNum + jj] += h_temp_echo_imag[ii * FreqNum + jj];
}
2025-01-16 02:12:08 +00:00
}
2025-01-20 07:49:54 +00:00
2025-01-20 09:39:29 +00:00
//PRINT("");
2025-01-20 07:49:54 +00:00
2025-01-15 03:35:48 +00:00
}
2025-01-20 07:49:54 +00:00
// <20>Կ<EFBFBD><D4BF>ڴ<EFBFBD><DAB4>ͷ<EFBFBD>
FreeCUDAHost(h_tX);
FreeCUDAHost(h_tY);
FreeCUDAHost(h_tZ);
FreeCUDAHost(h_sloperX);
FreeCUDAHost(h_sloperY);
FreeCUDAHost(h_sloperZ);
FreeCUDAHost(h_cls);
FreeCUDADevice(d_tX);
FreeCUDADevice(d_tY);
FreeCUDADevice(d_tZ);
FreeCUDADevice(d_sloperX);
FreeCUDADevice(d_sloperY);
FreeCUDADevice(d_sloperZ);
FreeCUDADevice(d_cls);
FreeCUDADevice(d_temp_R);
FreeCUDADevice(d_temp_amp);
FreeCUDAHost(h_temp_echo_real);
FreeCUDAHost(h_temp_echo_imag);
FreeCUDADevice(d_temp_echo_real);
FreeCUDADevice(d_temp_echo_imag);
2025-01-14 01:25:23 +00:00
}
2025-01-15 03:35:48 +00:00
2025-01-14 01:25:23 +00:00
#endif