727 lines
22 KiB
Plaintext
727 lines
22 KiB
Plaintext
#include <cuda.h>
|
||
#include <device_launch_parameters.h>
|
||
#include <cuda_runtime.h>
|
||
#include <cublas_v2.h>
|
||
#include <cuComplex.h>
|
||
#include <math_functions.h>
|
||
#include "BaseConstVariable.h"
|
||
#include "GPURFPC.cuh"
|
||
|
||
|
||
#ifdef __CUDANVCC___
|
||
|
||
|
||
/* 机器函数 ****************************************************************************************************************************/
|
||
|
||
|
||
__device__ double GPU_getSigma0dB(CUDASigmaParam param, double theta)
|
||
{
|
||
return param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6);
|
||
}
|
||
|
||
extern __device__ float GPU_getSigma0dB(CUDASigmaParam param, float theta) {//线性值
|
||
return param.p1 + param.p2 * expf(-param.p3 * theta) + param.p4 * cosf(param.p5 * theta + param.p6);;
|
||
}
|
||
|
||
|
||
__device__ double GPU_getSigma0dB_params(
|
||
const double p1, const double p2, const double p3, const double p4, const double p5, const double p6,
|
||
double theta) {//线性值
|
||
return p1 + p2 * exp(-p3 * theta) + p4 * cos(p5 * theta + p6);
|
||
}
|
||
|
||
|
||
|
||
extern __device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal(
|
||
double RstX, double RstY, double RstZ,
|
||
double AntXaxisX, double AntXaxisY, double AntXaxisZ,
|
||
double AntYaxisX, double AntYaxisY, double AntYaxisZ,
|
||
double AntZaxisX, double AntZaxisY, double AntZaxisZ,
|
||
double AntDirectX, double AntDirectY, double AntDirectZ
|
||
) {
|
||
CUDAVectorEllipsoidal result{ 0,0,-1 };
|
||
|
||
// 求解天线增益
|
||
double Xst = -1 * RstX; // 卫星 --> 地面
|
||
double Yst = -1 * RstY;
|
||
double Zst = -1 * RstZ;
|
||
|
||
// 归一化
|
||
double RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
|
||
double AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
|
||
double AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
|
||
double AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);
|
||
|
||
|
||
double Rx = Xst / RstNorm;
|
||
double Ry = Yst / RstNorm;
|
||
double Rz = Zst / RstNorm;
|
||
double Xx = AntXaxisX / AntXaxisNorm;
|
||
double Xy = AntXaxisY / AntXaxisNorm;
|
||
double Xz = AntXaxisZ / AntXaxisNorm;
|
||
double Yx = AntYaxisX / AntYaxisNorm;
|
||
double Yy = AntYaxisY / AntYaxisNorm;
|
||
double Yz = AntYaxisZ / AntYaxisNorm;
|
||
double Zx = AntZaxisX / AntZaxisNorm;
|
||
double Zy = AntZaxisY / AntZaxisNorm;
|
||
double Zz = AntZaxisZ / AntZaxisNorm;
|
||
|
||
double Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
|
||
double Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
|
||
double Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
|
||
|
||
|
||
|
||
// 计算theta 与 phi
|
||
double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
|
||
double Zn = Zant / Norm;
|
||
double ThetaAnt = (-1 > Zn) ? PI : (Zn > 1 ? 0 : acos(Zn));// acosf(Zant / Norm); // theta 与 Z轴的夹角
|
||
double PhiAnt = abs(Xant) < PRECISIONTOLERANCE ? 0 : atanf(Yant / Xant); // -pi/2 ~pi/2
|
||
|
||
if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上
|
||
PhiAnt = 0;
|
||
}
|
||
else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上,原点
|
||
if (Yant > 0) {
|
||
PhiAnt = PI / 2;
|
||
}
|
||
else {
|
||
PhiAnt = -PI / 2;
|
||
}
|
||
}
|
||
else if (Xant < 0) {
|
||
if (Yant > 0) {
|
||
PhiAnt = PI + PhiAnt;
|
||
}
|
||
else {
|
||
PhiAnt = -PI + PhiAnt;
|
||
}
|
||
}
|
||
else { // Xant>0 X 正轴
|
||
|
||
}
|
||
|
||
if (isnan(PhiAnt)) {
|
||
printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt);
|
||
}
|
||
|
||
result.theta = ThetaAnt;
|
||
result.phi = PhiAnt;
|
||
result.Rho = Norm;
|
||
return result;
|
||
}
|
||
|
||
extern __device__ double GPU_BillerInterpAntPattern(double* antpattern,
|
||
double starttheta, double startphi, double dtheta, double dphi,
|
||
long thetapoints, long phipoints,
|
||
double searththeta, double searchphi) {
|
||
double stheta = searththeta;
|
||
double sphi = searchphi;
|
||
if (stheta > 90) {
|
||
return 0;
|
||
}
|
||
else {}
|
||
|
||
|
||
double pthetaid = (stheta - starttheta) / dtheta;//
|
||
double pphiid = (sphi - startphi) / dphi;
|
||
|
||
long lasttheta = floorf(pthetaid);
|
||
long nextTheta = lasttheta + 1;
|
||
long lastphi = floorf(pphiid);
|
||
long nextPhi = lastphi + 1;
|
||
|
||
|
||
if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
|
||
lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
|
||
{
|
||
return 0;
|
||
}
|
||
else {
|
||
double x = stheta;
|
||
double y = sphi;
|
||
|
||
double x1 = lasttheta * dtheta + starttheta;
|
||
double x2 = nextTheta * dtheta + starttheta;
|
||
double y1 = lastphi * dphi + startphi;
|
||
double y2 = nextPhi * dphi + startphi;
|
||
|
||
double z11 = antpattern[lasttheta * phipoints + lastphi];
|
||
double z12 = antpattern[lasttheta * phipoints + nextPhi];
|
||
double z21 = antpattern[nextTheta * phipoints + lastphi];
|
||
double z22 = antpattern[nextTheta * phipoints + nextPhi];
|
||
|
||
|
||
//z11 = powf(10, z11 / 10); // dB-> 线性
|
||
//z12 = powf(10, z12 / 10);
|
||
//z21 = powf(10, z21 / 10);
|
||
//z22 = powf(10, z22 / 10);
|
||
|
||
double GainValue = (z11 * (x2 - x) * (y2 - y)
|
||
+ z21 * (x - x1) * (y2 - y)
|
||
+ z12 * (x2 - x) * (y - y1)
|
||
+ z22 * (x - x1) * (y - y1));
|
||
GainValue = GainValue / ((x2 - x1) * (y2 - y1));
|
||
return GainValue;
|
||
}
|
||
}
|
||
|
||
|
||
|
||
/* 核函数 ****************************************************************************************************************************/
|
||
// 计算每块
|
||
__global__ void CUDA_Kernel_Computer_R_amp(
|
||
double* antX, double* antY, double* antZ,
|
||
double* antXaxisX, double* antXaxisY, double* antXaxisZ,
|
||
double* antYaxisX, double* antYaxisY, double* antYaxisZ,
|
||
double* antZaxisX, double* antZaxisY, double* antZaxisZ,
|
||
double* antDirectX, double* antDirectY, double* antDirectZ,
|
||
long PRFCount, // 整体的脉冲数,
|
||
double* targetX, double* targetY, double* targetZ, long* demCls,
|
||
double* demSlopeX, double* demSlopeY, double* demSlopeZ,
|
||
long startPosId, long pixelcount,
|
||
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
|
||
double Pt,
|
||
double refPhaseRange,
|
||
double* TransAntpattern,
|
||
double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints,
|
||
double* ReceiveAntpattern,
|
||
double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,
|
||
double maxTransAntPatternValue, double maxReceiveAntPatternValue,
|
||
double NearR, double FarR,
|
||
float* d_temp_R, float* d_temp_amps// 计算输出
|
||
) {
|
||
long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
|
||
long prfId = idx / SHAREMEMORY_FLOAT_HALF;
|
||
long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点
|
||
|
||
if (prfId < PRFCount && posId < pixelcount) {
|
||
double RstX = antX[prfId] - targetX[posId]; // 计算坐标矢量
|
||
double RstY = antY[prfId] - targetY[posId];
|
||
double RstZ = antZ[prfId] - targetZ[posId];
|
||
|
||
double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离
|
||
if (RstR<NearR || RstR>FarR) {
|
||
d_temp_R[idx] = 0;
|
||
d_temp_amps[idx] = 0;
|
||
return;
|
||
}
|
||
else {
|
||
double slopeX = demSlopeX[posId];
|
||
double slopeY = demSlopeY[posId];
|
||
double slopeZ = demSlopeZ[posId];
|
||
|
||
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
|
||
if (abs(slopR - 0) > 1e-3) {
|
||
double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
|
||
double localangle = acos(dotAB / (RstR * slopR));
|
||
|
||
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) {
|
||
d_temp_R[idx] = 0;
|
||
d_temp_amps[idx] = 0;
|
||
return;
|
||
}
|
||
else {}
|
||
|
||
|
||
double ampGain = 0;
|
||
// 求解天线方向图指向
|
||
CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal(
|
||
RstX, RstY, RstZ,
|
||
antXaxisX[prfId], antXaxisY[prfId], antXaxisZ[prfId],
|
||
antYaxisX[prfId], antYaxisY[prfId], antYaxisZ[prfId],
|
||
antZaxisX[prfId], antZaxisY[prfId], antZaxisZ[prfId],
|
||
antDirectX[prfId], antDirectY[prfId], antDirectZ[prfId]
|
||
);
|
||
antVector.theta = antVector.theta * r2d;
|
||
antVector.phi = antVector.phi * r2d;
|
||
//printf("theta: %f , phi: %f \n", antVector.theta, antVector.phi);
|
||
if (antVector.Rho > 0) {
|
||
//double TansantPatternGain = GPU_BillerInterpAntPattern(
|
||
// TransAntpattern,
|
||
// Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
|
||
// antVector.theta, antVector.phi);
|
||
//double antPatternGain = GPU_BillerInterpAntPattern(
|
||
// ReceiveAntpattern,
|
||
// Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
|
||
// antVector.theta, antVector.phi);
|
||
|
||
double sigma0 = 0;
|
||
{
|
||
long clsid = demCls[posId];
|
||
//printf("clsid=%d\n", clsid);
|
||
CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
|
||
|
||
|
||
if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
|
||
abs(tempsigma.p2) < PRECISIONTOLERANCE &&
|
||
abs(tempsigma.p3) < PRECISIONTOLERANCE &&
|
||
abs(tempsigma.p4) < PRECISIONTOLERANCE &&
|
||
abs(tempsigma.p5) < PRECISIONTOLERANCE &&
|
||
abs(tempsigma.p6) < PRECISIONTOLERANCE
|
||
) {
|
||
sigma0 = 0;
|
||
}
|
||
else {
|
||
double sigma = GPU_getSigma0dB(tempsigma, localangle);
|
||
sigma0 = powf(10.0, sigma / 10.0);
|
||
}
|
||
}
|
||
//ampGain = TansantPatternGain * antPatternGain;
|
||
ampGain = 1;
|
||
//if (10 * log10(ampGain / maxReceiveAntPatternValue / maxTransAntPatternValue) < -3) { // 小于-3dB
|
||
// d_temp_R[idx] = 0;
|
||
// d_temp_amps[idx] = 0;
|
||
// return;
|
||
//}
|
||
//else {}
|
||
|
||
|
||
|
||
|
||
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度
|
||
|
||
float temp_amp = float(ampGain * Pt * sigma0);
|
||
float temp_R = float(RstR - refPhaseRange);
|
||
|
||
if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) {
|
||
printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R);
|
||
d_temp_R[idx] = 0;
|
||
d_temp_amps[idx] = 0;
|
||
return;
|
||
}
|
||
else {}
|
||
|
||
|
||
d_temp_amps[idx] = temp_amp;
|
||
d_temp_R[idx] = temp_R;
|
||
return;
|
||
}
|
||
else {
|
||
d_temp_R[idx] = 0;
|
||
d_temp_amps[idx] = 0;
|
||
return;
|
||
}
|
||
}
|
||
else {
|
||
d_temp_R[idx] = 0;
|
||
d_temp_amps[idx] = 0;
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
__global__ void CUDA_Kernel_Computer_echo(
|
||
float* d_temp_R, float* d_temp_amps, long posNum,
|
||
float f0, float dfreq,
|
||
long FreqPoints, // 当前频率的分块
|
||
long maxfreqnum, // 最大脉冲值
|
||
float* d_temp_echo_real, float* d_temp_echo_imag,
|
||
long temp_PRF_Count
|
||
) {
|
||
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存
|
||
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
|
||
|
||
long tid = threadIdx.x;
|
||
long bid = blockIdx.x;
|
||
long idx = bid * blockDim.x + tid;
|
||
long prfId = idx / FreqPoints; // 脉冲ID
|
||
long fId = idx % FreqPoints;//频率ID
|
||
|
||
long psid = 0;
|
||
long pixelId = 0;
|
||
for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF
|
||
psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii;
|
||
pixelId = prfId * posNum + psid; //
|
||
if (psid < posNum) {
|
||
s_R[psid] = d_temp_R[pixelId];
|
||
s_amp[psid] = d_temp_amps[pixelId];
|
||
}
|
||
else {
|
||
s_R[psid] = 0;
|
||
s_amp[psid] = 0;
|
||
}
|
||
|
||
}
|
||
|
||
__syncthreads(); // 确定所有待处理数据都已经进入程序中
|
||
|
||
|
||
|
||
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
|
||
|
||
long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置
|
||
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
|
||
float temp_real = 0;
|
||
float temp_imag = 0;
|
||
float temp_phi = 0;
|
||
float temp_amp = 0;
|
||
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
|
||
|
||
temp_phi = s_R[dataid] * factorjTemp;
|
||
temp_amp = s_amp[dataid];
|
||
temp_real += (temp_amp * cosf(temp_phi));
|
||
temp_imag += (temp_amp * sinf(temp_phi));
|
||
//if (dataid > 5000) {
|
||
// printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]);
|
||
//}
|
||
if (isnan(temp_phi) || isnan(temp_amp) || isnan(temp_real) || isnan(temp_imag)
|
||
|| isinf(temp_phi) || isinf(temp_amp) || isinf(temp_real) || isinf(temp_imag)
|
||
) {
|
||
printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, temp_real, temp_imag);
|
||
}
|
||
|
||
}
|
||
//printf("echo_ID=%d; ehodata=(%f,%f)\n", echo_ID, temp_real, temp_imag);
|
||
//printf("(%f %f %f) ", factorjTemp, s_amp[0], s_R[0]);
|
||
d_temp_echo_real[echo_ID] += /*d_temp_echo_real[echo_ID] + */temp_real;
|
||
d_temp_echo_imag[echo_ID] += /*d_temp_echo_imag[echo_ID] +*/ temp_imag;
|
||
}
|
||
}
|
||
|
||
|
||
/**
|
||
* 分块计算主流程
|
||
*/
|
||
void CUDA_RFPC_MainProcess(
|
||
double* antX, double* antY, double* antZ,
|
||
double* antXaxisX, double* antXaxisY, double* antXaxisZ,
|
||
double* antYaxisX, double* antYaxisY, double* antYaxisZ,
|
||
double* antZaxisX, double* antZaxisY, double* antZaxisZ,
|
||
double* antDirectX, double* antDirectY, double* antDirectZ,
|
||
long PRFCount, long FreqNum,
|
||
float f0, float dfreq,
|
||
double Pt,
|
||
double refPhaseRange,
|
||
double* TransAntpattern,
|
||
double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints,
|
||
double* ReceiveAntpattern,
|
||
double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,
|
||
double maxTransAntPatternValue, double maxReceiveAntPatternValue,
|
||
double NearR, double FarR,
|
||
double* targetX, double* targetY, double* targetZ, long* demCls, long TargetNumber,
|
||
double* demSlopeX, double* demSlopeY, double* demSlopeZ,
|
||
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
|
||
float* out_echoReal, float* out_echoImag,
|
||
float* d_temp_R, float* d_temp_amp
|
||
)
|
||
{
|
||
long BLOCK_FREQNUM = NextBlockPad(FreqNum, BLOCK_SIZE); // 256*freqBlockID
|
||
long cudaBlocknum = 0;
|
||
long freqpoints = BLOCK_FREQNUM;
|
||
printf("freqpoints:%d\n", freqpoints);
|
||
long process = 0;
|
||
for (long sTi = 0; sTi < TargetNumber; sTi = sTi + SHAREMEMORY_FLOAT_HALF) {
|
||
cudaBlocknum = (PRFCount * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||
CUDA_Kernel_Computer_R_amp << <cudaBlocknum, BLOCK_SIZE >> > (
|
||
antX, antY, antZ,
|
||
antXaxisX, antXaxisY, antXaxisZ,
|
||
antYaxisX, antYaxisY, antYaxisZ,
|
||
antZaxisX, antZaxisY, antZaxisZ,
|
||
antDirectX, antDirectY, antDirectZ,
|
||
PRFCount,
|
||
targetX, targetY, targetZ, demCls,
|
||
demSlopeX, demSlopeY, demSlopeZ,
|
||
sTi, TargetNumber,
|
||
sigma0Paramslist, sigmaparamslistlen,
|
||
Pt,
|
||
refPhaseRange,
|
||
TransAntpattern,
|
||
Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
|
||
ReceiveAntpattern,
|
||
Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
|
||
maxTransAntPatternValue, maxReceiveAntPatternValue,
|
||
NearR, FarR,
|
||
d_temp_R, d_temp_amp// 计算输出
|
||
);
|
||
|
||
PrintLasterError("CUDA_Kernel_Computer_R_amp");
|
||
|
||
|
||
cudaBlocknum = (PRFCount * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||
CUDA_Kernel_Computer_echo << <cudaBlocknum, BLOCK_SIZE >> > (
|
||
d_temp_R, d_temp_amp, SHAREMEMORY_FLOAT_HALF,
|
||
f0, dfreq,
|
||
freqpoints, FreqNum,
|
||
out_echoReal, out_echoImag,
|
||
PRFCount
|
||
);
|
||
PrintLasterError("CUDA_Kernel_Computer_echo");
|
||
|
||
if ((sTi * 100.0 / TargetNumber) - process >= 1) {
|
||
process = sTi * 100.0 / TargetNumber;
|
||
PRINT("TargetID [%f]: %d / %d finished\n", sTi * 100.0 / TargetNumber, sTi, TargetNumber);
|
||
}
|
||
}
|
||
|
||
|
||
cudaDeviceSynchronize();
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
/* 核函数 ****************************************************************************************************************************/
|
||
inline double SincTarg(double x) {
|
||
return 1 - (x * x / 6) + (x * x * x * x / 120) - (x * x * x * x * x * x / 5040);
|
||
}
|
||
|
||
|
||
__global__ void Kernel_Computer_R_amp_NoAntPattern(
|
||
SateState* antlist,
|
||
long PRFCount,
|
||
GoalState* goallist,
|
||
long demLen,
|
||
long startPosId, long pixelcount,
|
||
CUDASigmaParam sigma0Params,
|
||
double Pt,
|
||
double refPhaseRange,
|
||
double NearR, double FarR,
|
||
double maxGain, double GainWeight,
|
||
float* d_temp_R, float* d_temp_amps// 计算输出
|
||
) {
|
||
int tid = threadIdx.x;
|
||
int bid = blockIdx.x;
|
||
int dmx = blockDim.x;
|
||
int idx = bid*dmx+tid; // 获取当前的线程编码
|
||
int prfId = idx / SHAREMEMORY_FLOAT_HALF;
|
||
int posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点
|
||
//d_temp_R[idx] = pixelcount;
|
||
//d_temp_amps[idx] = posId;
|
||
|
||
if (prfId < PRFCount && posId < pixelcount) {
|
||
SateState antp = antlist[prfId];
|
||
GoalState gp = goallist[posId];
|
||
double RstX = antp.Px - gp.Tx; // 计算坐标矢量 T->S
|
||
double RstY = antp.Py - gp.Ty;
|
||
double RstZ = antp.Pz - gp.Tz;
|
||
double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离
|
||
|
||
if (RstR<NearR || RstR>FarR) {
|
||
d_temp_R[idx] = 0;
|
||
d_temp_amps[idx] = 0;
|
||
return;
|
||
}
|
||
else {
|
||
|
||
RstX = RstX / RstR;
|
||
RstY = RstY / RstR;
|
||
RstZ = RstZ / RstR;
|
||
|
||
float slopeX = gp.TsX;
|
||
float slopeY = gp.TsY;
|
||
float slopeZ = gp.TsZ;
|
||
|
||
float slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
|
||
if (slopR > 1e-3) {
|
||
float dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
|
||
float localangle = acosf(dotAB / (slopR));
|
||
|
||
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) {
|
||
d_temp_R[idx] = 0;
|
||
d_temp_amps[idx] = 0;
|
||
return;
|
||
}
|
||
else {}
|
||
|
||
|
||
// 计算斜距衰减
|
||
|
||
float antDirectR = sqrtf(antp.antDirectX * antp.antDirectX
|
||
+ antp.antDirectY * antp.antDirectY
|
||
+ antp.antDirectZ * antp.antDirectZ);
|
||
|
||
float diectAngle = -1 * (RstX * antp.antDirectX +
|
||
RstY * antp.antDirectY +
|
||
RstZ * antp.antDirectZ) / (antDirectR);
|
||
|
||
diectAngle = acosf(diectAngle);// 弧度制
|
||
diectAngle = diectAngle * GainWeight;
|
||
|
||
float ampGain = 1;
|
||
ampGain = 2 * maxGain * (1 - (powf(diectAngle, 2) / 6)
|
||
+ (powf(diectAngle, 4) / 120)
|
||
- (powf(diectAngle, 6) / 5040)); //dB
|
||
|
||
ampGain = powf(10.0, ampGain / 10.0);
|
||
|
||
ampGain = ampGain / (PI4POW2 * powf(RstR, 4)); // 反射强度
|
||
float sigma = GPU_getSigma0dB(sigma0Params, localangle);
|
||
sigma = powf(10.0, sigma / 10.0);
|
||
|
||
float temp_amp = float(ampGain * Pt * sigma);
|
||
double temp_R = RstR - refPhaseRange;
|
||
if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) {
|
||
d_temp_R[idx] = 0;
|
||
d_temp_amps[idx] = 0;
|
||
return;
|
||
}
|
||
else {
|
||
d_temp_amps[idx] = temp_amp;
|
||
d_temp_R[idx] =static_cast<float>(temp_R);
|
||
return;
|
||
}
|
||
|
||
}
|
||
}
|
||
}
|
||
else {
|
||
d_temp_R[idx] = 0;
|
||
d_temp_amps[idx] =0;
|
||
return;
|
||
}
|
||
|
||
}
|
||
|
||
__global__ void CUDA_Kernel_Computer_echo_NoAntPattern(
|
||
float* d_temp_R, float* d_temp_amps,
|
||
double f0, double dfreq,
|
||
long maxfreqnum, long nextfreqNum,// 最大脉冲值
|
||
cuComplex* echodata,
|
||
long temp_PRF_Count
|
||
) {
|
||
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存
|
||
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
|
||
|
||
long tid = threadIdx.x;
|
||
long bid = blockIdx.x;
|
||
long idx = bid * blockDim.x + tid;
|
||
long prfId = idx / nextfreqNum; // 脉冲ID
|
||
long fId = idx % nextfreqNum;//频率ID
|
||
|
||
long psid = 0;
|
||
long pixelId = 0;
|
||
for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF
|
||
psid = ii * BLOCK_SIZE + tid;
|
||
pixelId = prfId * SHAREMEMORY_FLOAT_HALF + psid;
|
||
s_R[psid] = d_temp_R[pixelId];
|
||
s_amp[psid] = d_temp_amps[pixelId];
|
||
|
||
}
|
||
|
||
__syncthreads(); // 确定所有待处理数据都已经进入程序中
|
||
|
||
|
||
|
||
|
||
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
|
||
|
||
long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置
|
||
double factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
|
||
cuComplex echo = make_cuComplex(0, 0);
|
||
|
||
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
|
||
|
||
float temp_real = 0;
|
||
float temp_imag = 0;
|
||
double R = s_R[dataid];
|
||
float temp_phi = fmod(R * factorjTemp, 2 * PI);
|
||
float temp_amp = s_amp[dataid];
|
||
sincosf(temp_phi, &temp_imag, &temp_real);
|
||
echo.x = echo.x + (temp_amp * temp_real);
|
||
echo.y = echo.y + (temp_amp * temp_imag);
|
||
//if (dataid > 5000) {
|
||
// printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]);
|
||
//}
|
||
//if (isnan(temp_phi) || isnan(temp_amp) || isnan(echo.x) || isnan(echo.y)
|
||
// || isinf(temp_phi) || isinf(temp_amp) || isinf(echo.x) || isinf(echo.y)
|
||
// ) {
|
||
// printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, echo.x, echo.y);
|
||
//}
|
||
|
||
}
|
||
|
||
echodata[echo_ID] = cuCaddf(echodata[echo_ID], echo);
|
||
|
||
}
|
||
}
|
||
|
||
|
||
|
||
|
||
/** 分块处理 ****************************************************************************************************************/
|
||
extern "C" void ProcessRFPCTask(RFPCTask& task, long devid, float* h_R, float* h_amp)
|
||
{
|
||
size_t pixelcount = task.prfNum * task.freqNum;
|
||
size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||
printf("computer pixelcount goalnum gridsize blocksize prfnum %zu,%zu ,%zu,%d ,%d \n", pixelcount, task.targetnum, grid_size, BLOCK_SIZE, task.prfNum);
|
||
printf("Dev:%d ,freqnum:%d , prfnum:%d ,Rref: %e ,Rnear : %e ,Rfar: %e , StartFreq: %e ,DeletFreq: %e \n",
|
||
devid, task.freqNum, task.prfNum, task.Rref, task.Rnear, task.Rfar, task.startFreq, task.stepFreq);
|
||
float* d_R = (float*)mallocCUDADevice(task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float), devid);
|
||
float* d_amps = (float*)mallocCUDADevice(task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float), devid);
|
||
|
||
|
||
long BLOCK_FREQNUM = NextBlockPad(task.freqNum, BLOCK_SIZE); // 256*freqBlockID
|
||
long cudaBlocknum = 0;
|
||
long freqpoints = BLOCK_SIZE;
|
||
|
||
printf("freqpoints:%d\n", task.freqNum);
|
||
long prfcount = task.prfNum;
|
||
long process = 0;
|
||
for (long sTi = 0; sTi < task.targetnum; sTi = sTi + SHAREMEMORY_FLOAT_HALF) {
|
||
|
||
cudaBlocknum = (task.prfNum * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||
Kernel_Computer_R_amp_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> > (
|
||
task.antlist,
|
||
prfcount,
|
||
task.goallist,
|
||
task.targetnum,
|
||
sTi, task.targetnum,
|
||
task.sigma0_cls,
|
||
task.Pt,
|
||
task.Rref,
|
||
task.Rnear, task.Rfar,
|
||
task.maxGain, task.GainWeight,
|
||
d_R, d_amps// 计算输出
|
||
);
|
||
PrintLasterError("CUDA_Kernel_Computer_R_amp");
|
||
cudaDeviceSynchronize();
|
||
|
||
//DeviceToHost(h_R, d_R, task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float));
|
||
//DeviceToHost(h_amp, d_amps, task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float));
|
||
//break;
|
||
|
||
cudaBlocknum = (task.prfNum * BLOCK_FREQNUM + BLOCK_SIZE- 1) / BLOCK_SIZE;
|
||
CUDA_Kernel_Computer_echo_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> > (
|
||
d_R, d_amps,
|
||
task.startFreq / 1e9, task.stepFreq / 1e9,
|
||
task.freqNum, BLOCK_FREQNUM,
|
||
task.d_echoData,
|
||
task.prfNum
|
||
);
|
||
PrintLasterError("CUDA_Kernel_Computer_echo");
|
||
cudaDeviceSynchronize();
|
||
if ((sTi * 100.0 / task.targetnum) - process >= 10) {
|
||
process = sTi * 100.0 / task.targetnum;
|
||
PRINT("device ID : %d , TargetID [%f]: %d / %d finished %d\n", devid, sTi * 100.0 / task.targetnum, sTi, task.targetnum, devid);
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
}
|
||
|
||
|
||
cudaDeviceSynchronize();
|
||
|
||
FreeCUDADevice(d_R);
|
||
FreeCUDADevice(d_amps);
|
||
}
|
||
|
||
|
||
|
||
#endif
|
||
|
||
|