469 lines
15 KiB
Plaintext
469 lines
15 KiB
Plaintext
|
#include <cuda.h>
|
|||
|
#include <device_launch_parameters.h>
|
|||
|
#include <cuda_runtime.h>
|
|||
|
#include <cublas_v2.h>
|
|||
|
#include <cuComplex.h>
|
|||
|
|
|||
|
#include "BaseConstVariable.h"
|
|||
|
#include "GPURFPC_single.cuh"
|
|||
|
|
|||
|
|
|||
|
#ifdef __CUDANVCC___
|
|||
|
|
|||
|
|
|||
|
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> ****************************************************************************************************************************/
|
|||
|
|
|||
|
|
|||
|
extern __device__ float GPU_getSigma0dB_single(CUDASigmaParam_single param, float theta) {//<2F><><EFBFBD><EFBFBD>ֵ
|
|||
|
float sigma = param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6);
|
|||
|
return sigma;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
extern __device__ float GPU_getSigma0dB_single(const float p1, const float p2, const float p3, const float p4, const float p5, const float p6, float theta)
|
|||
|
{
|
|||
|
return p1 + p2 * expf(-p3 * theta) + p4 * cosf(p5 * theta + p6);
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
extern __device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal_single(
|
|||
|
float RstX, float RstY, float RstZ,
|
|||
|
float AntXaxisX, float AntXaxisY, float AntXaxisZ,
|
|||
|
float AntYaxisX, float AntYaxisY, float AntYaxisZ,
|
|||
|
float AntZaxisX, float AntZaxisY, float AntZaxisZ,
|
|||
|
float AntDirectX, float AntDirectY, float AntDirectZ
|
|||
|
) {
|
|||
|
CUDAVectorEllipsoidal result{ 0,0,-1 };
|
|||
|
|
|||
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
float Xst = -1 * RstX; // <20><><EFBFBD><EFBFBD> --> <20><><EFBFBD><EFBFBD>
|
|||
|
float Yst = -1 * RstY;
|
|||
|
float Zst = -1 * RstZ;
|
|||
|
|
|||
|
// <20><>һ<EFBFBD><D2BB>
|
|||
|
float RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
|
|||
|
float AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
|
|||
|
float AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
|
|||
|
float AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);
|
|||
|
|
|||
|
|
|||
|
float Rx = Xst / RstNorm;
|
|||
|
float Ry = Yst / RstNorm;
|
|||
|
float Rz = Zst / RstNorm;
|
|||
|
float Xx = AntXaxisX / AntXaxisNorm;
|
|||
|
float Xy = AntXaxisY / AntXaxisNorm;
|
|||
|
float Xz = AntXaxisZ / AntXaxisNorm;
|
|||
|
float Yx = AntYaxisX / AntYaxisNorm;
|
|||
|
float Yy = AntYaxisY / AntYaxisNorm;
|
|||
|
float Yz = AntYaxisZ / AntYaxisNorm;
|
|||
|
float Zx = AntZaxisX / AntZaxisNorm;
|
|||
|
float Zy = AntZaxisY / AntZaxisNorm;
|
|||
|
float Zz = AntZaxisZ / AntZaxisNorm;
|
|||
|
|
|||
|
float Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
|
|||
|
float Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
|
|||
|
float Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
|
|||
|
|
|||
|
|
|||
|
|
|||
|
// <20><><EFBFBD><EFBFBD>theta <20><> phi
|
|||
|
float Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // <20><><EFBFBD><EFBFBD> pho
|
|||
|
float Zn = Zant / Norm;
|
|||
|
float ThetaAnt = ( - 1 > Zn) ? PI : (Zn > 1 ? 0 : acos(Zn));// acosf(Zant / Norm); // theta <20><> Z<><5A><EFBFBD>ļн<C4BC>
|
|||
|
float PhiAnt = abs(Xant)<PRECISIONTOLERANCE ?0: atanf(Yant / Xant); // -pi/2 ~pi/2
|
|||
|
|
|||
|
if (abs(Yant) < PRECISIONTOLERANCE) { // X<><58><EFBFBD><EFBFBD>
|
|||
|
PhiAnt = 0;
|
|||
|
}
|
|||
|
else if (abs(Xant) < PRECISIONTOLERANCE) { // Y<><59><EFBFBD>ϣ<EFBFBD>ԭ<EFBFBD><D4AD>
|
|||
|
if (Yant > 0) {
|
|||
|
PhiAnt = PI / 2;
|
|||
|
}
|
|||
|
else {
|
|||
|
PhiAnt = -PI / 2;
|
|||
|
}
|
|||
|
}
|
|||
|
else if (Xant < 0) {
|
|||
|
if (Yant > 0) {
|
|||
|
PhiAnt = PI + PhiAnt;
|
|||
|
}
|
|||
|
else {
|
|||
|
PhiAnt = -PI + PhiAnt;
|
|||
|
}
|
|||
|
}
|
|||
|
else { // Xant>0 X <20><><EFBFBD><EFBFBD>
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
if (isnan(PhiAnt)) {
|
|||
|
printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt);
|
|||
|
}
|
|||
|
|
|||
|
result.theta = ThetaAnt;
|
|||
|
result.phi = PhiAnt;
|
|||
|
result.Rho = Norm;
|
|||
|
return result;
|
|||
|
}
|
|||
|
|
|||
|
extern __device__ float GPU_BillerInterpAntPattern_single(float* antpattern,
|
|||
|
float starttheta, float startphi, float dtheta, float dphi,
|
|||
|
long thetapoints, long phipoints,
|
|||
|
float searththeta, float searchphi) {
|
|||
|
float stheta = searththeta;
|
|||
|
float sphi = searchphi;
|
|||
|
if (stheta > 90) {
|
|||
|
return 0;
|
|||
|
}
|
|||
|
else {}
|
|||
|
|
|||
|
|
|||
|
float pthetaid = (stheta - starttheta) / dtheta;//
|
|||
|
float pphiid = (sphi - startphi) / dphi;
|
|||
|
|
|||
|
long lasttheta = floorf(pthetaid);
|
|||
|
long nextTheta = lasttheta + 1;
|
|||
|
long lastphi = floorf(pphiid);
|
|||
|
long nextPhi = lastphi + 1;
|
|||
|
|
|||
|
|
|||
|
if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
|
|||
|
lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
|
|||
|
{
|
|||
|
return 0;
|
|||
|
}
|
|||
|
else {
|
|||
|
float x = stheta;
|
|||
|
float y = sphi;
|
|||
|
|
|||
|
float x1 = lasttheta * dtheta + starttheta;
|
|||
|
float x2 = nextTheta * dtheta + starttheta;
|
|||
|
float y1 = lastphi * dphi + startphi;
|
|||
|
float y2 = nextPhi * dphi + startphi;
|
|||
|
|
|||
|
float z11 = antpattern[lasttheta * phipoints + lastphi];
|
|||
|
float z12 = antpattern[lasttheta * phipoints + nextPhi];
|
|||
|
float z21 = antpattern[nextTheta * phipoints + lastphi];
|
|||
|
float z22 = antpattern[nextTheta * phipoints + nextPhi];
|
|||
|
|
|||
|
|
|||
|
//z11 = powf(10, z11 / 10); // dB-> <20><><EFBFBD><EFBFBD>
|
|||
|
//z12 = powf(10, z12 / 10);
|
|||
|
//z21 = powf(10, z21 / 10);
|
|||
|
//z22 = powf(10, z22 / 10);
|
|||
|
|
|||
|
float GainValue = (z11 * (x2 - x) * (y2 - y)
|
|||
|
+ z21 * (x - x1) * (y2 - y)
|
|||
|
+ z12 * (x2 - x) * (y - y1)
|
|||
|
+ z22 * (x - x1) * (y - y1));
|
|||
|
GainValue = GainValue / ((x2 - x1) * (y2 - y1));
|
|||
|
return GainValue;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
/* <20>˺<EFBFBD><CBBA><EFBFBD> ****************************************************************************************************************************/
|
|||
|
// <20><><EFBFBD><EFBFBD>ÿ<EFBFBD><C3BF>
|
|||
|
__global__ void CUDA_Kernel_Computer_R_amp_single(
|
|||
|
float* antX, float* antY, float* antZ,
|
|||
|
float* antXaxisX, float* antXaxisY, float* antXaxisZ,
|
|||
|
float* antYaxisX, float* antYaxisY, float* antYaxisZ,
|
|||
|
float* antZaxisX, float* antZaxisY, float* antZaxisZ,
|
|||
|
float* antDirectX, float* antDirectY, float* antDirectZ,
|
|||
|
long PRFCount, // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
float* targetX, float* targetY, float* targetZ, long* demCls,
|
|||
|
float* demSlopeX, float* demSlopeY, float* demSlopeZ ,
|
|||
|
long startPosId, long pixelcount,
|
|||
|
CUDASigmaParam_single* sigma0Paramslist, long sigmaparamslistlen,
|
|||
|
float Pt,
|
|||
|
float refPhaseRange,
|
|||
|
float* TransAntpattern,
|
|||
|
float Transtarttheta, float Transstartphi, float Transdtheta, float Transdphi, int Transthetapoints, int Transphipoints,
|
|||
|
float* ReceiveAntpattern,
|
|||
|
float Receivestarttheta, float Receivestartphi, float Receivedtheta, float Receivedphi, int Receivethetapoints, int Receivephipoints,
|
|||
|
float maxTransAntPatternValue, float maxReceiveAntPatternValue,
|
|||
|
float NearR, float FarR,
|
|||
|
float* d_temp_R, float* d_temp_amps// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
) {
|
|||
|
long idx = blockIdx.x * blockDim.x + threadIdx.x; // <20><>ȡ<EFBFBD><C8A1>ǰ<EFBFBD><C7B0><EFBFBD>̱߳<DFB3><CCB1><EFBFBD>
|
|||
|
long prfId = idx / SHAREMEMORY_FLOAT_HALF;
|
|||
|
long posId = idx % SHAREMEMORY_FLOAT_HALF+ startPosId; // <20><>ǰ<EFBFBD>̶߳<DFB3>Ӧ<EFBFBD><D3A6>Ӱ<EFBFBD><D3B0><EFBFBD><EFBFBD>
|
|||
|
|
|||
|
if (prfId < PRFCount && posId < pixelcount) {
|
|||
|
float RstX = antX[prfId] - targetX[posId]; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʸ<EFBFBD><CAB8>
|
|||
|
float RstY = antY[prfId] - targetY[posId];
|
|||
|
float RstZ = antZ[prfId] - targetZ[posId];
|
|||
|
|
|||
|
float RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // ʸ<><CAB8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
if (RstR<NearR || RstR>FarR) {
|
|||
|
d_temp_R[idx] = 0;
|
|||
|
d_temp_amps[idx] = 0;
|
|||
|
return;
|
|||
|
}
|
|||
|
else {
|
|||
|
float slopeX = demSlopeX[posId];
|
|||
|
float slopeY = demSlopeY[posId];
|
|||
|
float slopeZ = demSlopeZ[posId];
|
|||
|
|
|||
|
float slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
|
|||
|
if (abs(slopR - 0) > 1e-3) {
|
|||
|
float dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
|
|||
|
float localangle = acos(dotAB / (RstR * slopR));
|
|||
|
|
|||
|
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2|| isnan(localangle)) {
|
|||
|
d_temp_R[idx] = 0;
|
|||
|
d_temp_amps[idx] = 0;
|
|||
|
return;
|
|||
|
}
|
|||
|
else {}
|
|||
|
|
|||
|
|
|||
|
float ampGain = 0;
|
|||
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߷<EFBFBD><DFB7><EFBFBD>ͼָ<CDBC><D6B8>
|
|||
|
CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal_single(
|
|||
|
RstX, RstY, RstZ,
|
|||
|
antXaxisX[prfId], antXaxisY[prfId], antXaxisZ[prfId],
|
|||
|
antYaxisX[prfId], antYaxisY[prfId], antYaxisZ[prfId],
|
|||
|
antZaxisX[prfId], antZaxisY[prfId], antZaxisZ[prfId],
|
|||
|
antDirectX[prfId], antDirectY[prfId], antDirectZ[prfId]
|
|||
|
);
|
|||
|
antVector.theta = antVector.theta * r2d;
|
|||
|
antVector.phi = antVector.phi * r2d;
|
|||
|
//printf("theta: %f , phi: %f \n", antVector.theta, antVector.phi);
|
|||
|
if (antVector.Rho > 0) {
|
|||
|
//float TansantPatternGain = GPU_BillerInterpAntPattern(
|
|||
|
// TransAntpattern,
|
|||
|
// Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
|
|||
|
// antVector.theta, antVector.phi);
|
|||
|
//float antPatternGain = GPU_BillerInterpAntPattern(
|
|||
|
// ReceiveAntpattern,
|
|||
|
// Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
|
|||
|
// antVector.theta, antVector.phi);
|
|||
|
|
|||
|
float sigma0 = 0;
|
|||
|
{
|
|||
|
long clsid = demCls[posId];
|
|||
|
//printf("clsid=%d\n", clsid);
|
|||
|
CUDASigmaParam_single tempsigma = sigma0Paramslist[clsid];
|
|||
|
|
|||
|
|
|||
|
if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
|
|||
|
abs(tempsigma.p2) < PRECISIONTOLERANCE &&
|
|||
|
abs(tempsigma.p3) < PRECISIONTOLERANCE &&
|
|||
|
abs(tempsigma.p4) < PRECISIONTOLERANCE &&
|
|||
|
abs(tempsigma.p5) < PRECISIONTOLERANCE &&
|
|||
|
abs(tempsigma.p6) < PRECISIONTOLERANCE
|
|||
|
) {
|
|||
|
sigma0 = 0;
|
|||
|
}
|
|||
|
else {
|
|||
|
float sigma = GPU_getSigma0dB_single(tempsigma, localangle);
|
|||
|
sigma0 = powf(10.0, sigma / 10.0);
|
|||
|
}
|
|||
|
}
|
|||
|
//ampGain = TansantPatternGain * antPatternGain;
|
|||
|
ampGain = 1;
|
|||
|
//if (10 * log10(ampGain / maxReceiveAntPatternValue / maxTransAntPatternValue) < -3) { // С<><D0A1>-3dB
|
|||
|
// d_temp_R[idx] = 0;
|
|||
|
// d_temp_amps[idx] = 0;
|
|||
|
// return;
|
|||
|
//}
|
|||
|
//else {}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // <20><><EFBFBD><EFBFBD>ǿ<EFBFBD><C7BF>
|
|||
|
|
|||
|
float temp_amp = float(ampGain * Pt * sigma0);
|
|||
|
float temp_R = float(RstR - refPhaseRange);
|
|||
|
|
|||
|
if (isnan(temp_amp) || isnan(temp_R)|| isinf(temp_amp) || isinf(temp_R)) {
|
|||
|
printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R);
|
|||
|
d_temp_R[idx] = 0;
|
|||
|
d_temp_amps[idx] = 0;
|
|||
|
return;
|
|||
|
}
|
|||
|
else {}
|
|||
|
|
|||
|
|
|||
|
d_temp_amps[idx] = temp_amp;
|
|||
|
d_temp_R[idx] = temp_R;
|
|||
|
return;
|
|||
|
}
|
|||
|
else {
|
|||
|
d_temp_R[idx] = 0;
|
|||
|
d_temp_amps[idx] = 0;
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
else {
|
|||
|
d_temp_R[idx] = 0;
|
|||
|
d_temp_amps[idx] = 0;
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
__global__ void CUDA_Kernel_Computer_echo_single(
|
|||
|
float* d_temp_R, float* d_temp_amps, long posNum,
|
|||
|
float f0, float dfreq,
|
|||
|
long FreqPoints, // <20><>ǰƵ<C7B0>ʵķֿ<C4B7>
|
|||
|
long maxfreqnum, // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ
|
|||
|
float* d_temp_echo_real, float* d_temp_echo_imag,
|
|||
|
long temp_PRF_Count
|
|||
|
) {
|
|||
|
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // ע<><D7A2>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>block_size <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͬ<EFBFBD>ڴ<EFBFBD>
|
|||
|
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
|
|||
|
|
|||
|
long tid = threadIdx.x;
|
|||
|
long bid = blockIdx.x;
|
|||
|
long idx = bid * blockDim.x + tid;
|
|||
|
long prfId = idx / FreqPoints; // <20><><EFBFBD><EFBFBD>ID
|
|||
|
long fId = idx % FreqPoints;//Ƶ<><C6B5>ID
|
|||
|
|
|||
|
long psid = 0;
|
|||
|
long pixelId = 0;
|
|||
|
for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF
|
|||
|
psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii;
|
|||
|
pixelId = prfId * posNum + psid; //
|
|||
|
if (psid < posNum) {
|
|||
|
s_R[psid] = d_temp_R[pixelId];
|
|||
|
s_amp[psid] = d_temp_amps[pixelId];
|
|||
|
}
|
|||
|
else {
|
|||
|
s_R[psid] = 0;
|
|||
|
s_amp[psid] = 0;
|
|||
|
}
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
__syncthreads(); // ȷ<><C8B7><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݶ<EFBFBD><DDB6>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
|
|||
|
|
|||
|
long echo_ID = prfId * maxfreqnum + fId; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ӧ<EFBFBD>Ļز<C4BB>λ<EFBFBD><CEBB>
|
|||
|
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
|
|||
|
float temp_real = 0;
|
|||
|
float temp_imag = 0;
|
|||
|
float temp_phi = 0;
|
|||
|
float temp_amp = 0;
|
|||
|
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
|
|||
|
|
|||
|
temp_phi = s_R[dataid] * factorjTemp;
|
|||
|
temp_amp = s_amp[dataid];
|
|||
|
temp_real += (temp_amp * cosf(temp_phi));
|
|||
|
temp_imag += (temp_amp * sinf(temp_phi));
|
|||
|
//if (dataid > 5000) {
|
|||
|
// printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]);
|
|||
|
//}
|
|||
|
if (isnan(temp_phi) || isnan(temp_amp) || isnan(temp_real) || isnan(temp_imag)
|
|||
|
|| isinf(temp_phi) || isinf(temp_amp) || isinf(temp_real) || isinf(temp_imag)
|
|||
|
) {
|
|||
|
printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n",temp_amp,temp_phi,temp_real,temp_imag);
|
|||
|
}
|
|||
|
|
|||
|
}
|
|||
|
//printf("echo_ID=%d; ehodata=(%f,%f)\n", echo_ID, temp_real, temp_imag);
|
|||
|
//printf("(%f %f %f) ", factorjTemp, s_amp[0], s_R[0]);
|
|||
|
d_temp_echo_real[echo_ID] += /*d_temp_echo_real[echo_ID] + */temp_real;
|
|||
|
d_temp_echo_imag[echo_ID] += /*d_temp_echo_imag[echo_ID] +*/ temp_imag;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
/**
|
|||
|
* <20>ֿ<EFBFBD><D6BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
*/
|
|||
|
void CUDA_RFPC_MainProcess_single(
|
|||
|
float* antX, float* antY, float* antZ,
|
|||
|
float* antXaxisX, float* antXaxisY, float* antXaxisZ,
|
|||
|
float* antYaxisX, float* antYaxisY, float* antYaxisZ,
|
|||
|
float* antZaxisX, float* antZaxisY, float* antZaxisZ,
|
|||
|
float* antDirectX, float* antDirectY, float* antDirectZ,
|
|||
|
long PRFCount, long FreqNum,
|
|||
|
float f0, float dfreq,
|
|||
|
float Pt,
|
|||
|
float refPhaseRange,
|
|||
|
float* TransAntpattern,
|
|||
|
float Transtarttheta, float Transstartphi, float Transdtheta, float Transdphi, int Transthetapoints, int Transphipoints,
|
|||
|
float* ReceiveAntpattern,
|
|||
|
float Receivestarttheta, float Receivestartphi, float Receivedtheta, float Receivedphi, int Receivethetapoints, int Receivephipoints,
|
|||
|
float maxTransAntPatternValue, float maxReceiveAntPatternValue,
|
|||
|
float NearR, float FarR,
|
|||
|
float* targetX, float* targetY, float* targetZ, long* demCls, long TargetNumber,
|
|||
|
float* demSlopeX, float* demSlopeY, float* demSlopeZ,
|
|||
|
CUDASigmaParam_single* sigma0Paramslist, long sigmaparamslistlen,
|
|||
|
float* out_echoReal, float* out_echoImag,
|
|||
|
float* d_temp_R, float* d_temp_amp
|
|||
|
)
|
|||
|
{
|
|||
|
long BLOCK_FREQNUM = NextBlockPad(FreqNum, BLOCK_SIZE); // 256*freqBlockID
|
|||
|
long cudaBlocknum = 0;
|
|||
|
long freqpoints = BLOCK_FREQNUM;
|
|||
|
printf("freqpoints:%d\n", freqpoints);
|
|||
|
long process = 0;
|
|||
|
for (long sTi = 0; sTi < TargetNumber; sTi = sTi + SHAREMEMORY_FLOAT_HALF) {
|
|||
|
cudaBlocknum = (PRFCount * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
|||
|
CUDA_Kernel_Computer_R_amp_single << <cudaBlocknum, BLOCK_SIZE >> > (
|
|||
|
antX, antY, antZ,
|
|||
|
antXaxisX, antXaxisY, antXaxisZ,
|
|||
|
antYaxisX, antYaxisY, antYaxisZ,
|
|||
|
antZaxisX, antZaxisY, antZaxisZ,
|
|||
|
antDirectX, antDirectY, antDirectZ,
|
|||
|
PRFCount,
|
|||
|
targetX, targetY, targetZ, demCls,
|
|||
|
demSlopeX, demSlopeY, demSlopeZ,
|
|||
|
sTi, TargetNumber,
|
|||
|
sigma0Paramslist, sigmaparamslistlen,
|
|||
|
Pt,
|
|||
|
refPhaseRange,
|
|||
|
TransAntpattern,
|
|||
|
Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
|
|||
|
ReceiveAntpattern,
|
|||
|
Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
|
|||
|
maxTransAntPatternValue, maxReceiveAntPatternValue,
|
|||
|
NearR, FarR,
|
|||
|
d_temp_R, d_temp_amp// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
);
|
|||
|
|
|||
|
PrintLasterError("CUDA_Kernel_Computer_R_amp");
|
|||
|
|
|||
|
|
|||
|
cudaBlocknum = (PRFCount * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
|||
|
CUDA_Kernel_Computer_echo_single << <cudaBlocknum, BLOCK_SIZE >> > (
|
|||
|
d_temp_R, d_temp_amp, SHAREMEMORY_FLOAT_HALF,
|
|||
|
f0, dfreq,
|
|||
|
freqpoints, FreqNum,
|
|||
|
out_echoReal, out_echoImag,
|
|||
|
PRFCount
|
|||
|
);
|
|||
|
PrintLasterError("CUDA_Kernel_Computer_echo");
|
|||
|
|
|||
|
if ((sTi * 100.0 / TargetNumber ) - process >= 1) {
|
|||
|
process = sTi * 100.0 / TargetNumber;
|
|||
|
PRINT("TargetID [%f]: %d / %d finished\n", sTi*100.0/ TargetNumber,sTi, TargetNumber);
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
cudaDeviceSynchronize();
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
#endif
|
|||
|
|
|||
|
|