#include #include #include #include #include #include "BaseConstVariable.h" #include "GPURFPC.cuh" #ifdef __CUDANVCC___ /* 机器函数 ****************************************************************************************************************************/ extern __device__ double GPU_getSigma0dB(CUDASigmaParam param, double theta) {//线性值 double sigma = param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6); return sigma; } __device__ double GPU_getSigma0dB_params( const double p1, const double p2, const double p3, const double p4, const double p5, const double p6, double theta) {//线性值 return p1 + p2 * exp(-p3 * theta) + p4 * cos(p5 * theta + p6); } extern __device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal( double RstX, double RstY, double RstZ, double AntXaxisX, double AntXaxisY, double AntXaxisZ, double AntYaxisX, double AntYaxisY, double AntYaxisZ, double AntZaxisX, double AntZaxisY, double AntZaxisZ, double AntDirectX, double AntDirectY, double AntDirectZ ) { CUDAVectorEllipsoidal result{ 0,0,-1 }; // 求解天线增益 double Xst = -1 * RstX; // 卫星 --> 地面 double Yst = -1 * RstY; double Zst = -1 * RstZ; // 归一化 double RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst); double AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ); double AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ); double AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ); double Rx = Xst / RstNorm; double Ry = Yst / RstNorm; double Rz = Zst / RstNorm; double Xx = AntXaxisX / AntXaxisNorm; double Xy = AntXaxisY / AntXaxisNorm; double Xz = AntXaxisZ / AntXaxisNorm; double Yx = AntYaxisX / AntYaxisNorm; double Yy = AntYaxisY / AntYaxisNorm; double Yz = AntYaxisZ / AntYaxisNorm; double Zx = AntZaxisX / AntZaxisNorm; double Zy = AntZaxisY / AntZaxisNorm; double Zz = AntZaxisZ / AntZaxisNorm; double Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx); double Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx); double Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx); // 计算theta 与 phi double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho double Zn = Zant / Norm; double ThetaAnt = (-1 > Zn) ? PI : (Zn > 1 ? 0 : acos(Zn));// acosf(Zant / Norm); // theta 与 Z轴的夹角 double PhiAnt = abs(Xant) < PRECISIONTOLERANCE ? 0 : atanf(Yant / Xant); // -pi/2 ~pi/2 if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上 PhiAnt = 0; } else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上,原点 if (Yant > 0) { PhiAnt = PI / 2; } else { PhiAnt = -PI / 2; } } else if (Xant < 0) { if (Yant > 0) { PhiAnt = PI + PhiAnt; } else { PhiAnt = -PI + PhiAnt; } } else { // Xant>0 X 正轴 } if (isnan(PhiAnt)) { printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt); } result.theta = ThetaAnt; result.phi = PhiAnt; result.Rho = Norm; return result; } extern __device__ double GPU_BillerInterpAntPattern(double* antpattern, double starttheta, double startphi, double dtheta, double dphi, long thetapoints, long phipoints, double searththeta, double searchphi) { double stheta = searththeta; double sphi = searchphi; if (stheta > 90) { return 0; } else {} double pthetaid = (stheta - starttheta) / dtheta;// double pphiid = (sphi - startphi) / dphi; long lasttheta = floorf(pthetaid); long nextTheta = lasttheta + 1; long lastphi = floorf(pphiid); long nextPhi = lastphi + 1; if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 || lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints) { return 0; } else { double x = stheta; double y = sphi; double x1 = lasttheta * dtheta + starttheta; double x2 = nextTheta * dtheta + starttheta; double y1 = lastphi * dphi + startphi; double y2 = nextPhi * dphi + startphi; double z11 = antpattern[lasttheta * phipoints + lastphi]; double z12 = antpattern[lasttheta * phipoints + nextPhi]; double z21 = antpattern[nextTheta * phipoints + lastphi]; double z22 = antpattern[nextTheta * phipoints + nextPhi]; //z11 = powf(10, z11 / 10); // dB-> 线性 //z12 = powf(10, z12 / 10); //z21 = powf(10, z21 / 10); //z22 = powf(10, z22 / 10); double GainValue = (z11 * (x2 - x) * (y2 - y) + z21 * (x - x1) * (y2 - y) + z12 * (x2 - x) * (y - y1) + z22 * (x - x1) * (y - y1)); GainValue = GainValue / ((x2 - x1) * (y2 - y1)); return GainValue; } } /* 核函数 ****************************************************************************************************************************/ // 计算每块 __global__ void CUDA_Kernel_Computer_R_amp( double* antX, double* antY, double* antZ, double* antXaxisX, double* antXaxisY, double* antXaxisZ, double* antYaxisX, double* antYaxisY, double* antYaxisZ, double* antZaxisX, double* antZaxisY, double* antZaxisZ, double* antDirectX, double* antDirectY, double* antDirectZ, long PRFCount, // 整体的脉冲数, double* targetX, double* targetY, double* targetZ, long* demCls, double* demSlopeX, double* demSlopeY, double* demSlopeZ, long startPosId, long pixelcount, CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen, double Pt, double refPhaseRange, double* TransAntpattern, double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints, double* ReceiveAntpattern, double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints, double maxTransAntPatternValue, double maxReceiveAntPatternValue, double NearR, double FarR, float* d_temp_R, float* d_temp_amps// 计算输出 ) { long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码 long prfId = idx / SHAREMEMORY_FLOAT_HALF; long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点 if (prfId < PRFCount && posId < pixelcount) { double RstX = antX[prfId] - targetX[posId]; // 计算坐标矢量 double RstY = antY[prfId] - targetY[posId]; double RstZ = antZ[prfId] - targetZ[posId]; double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离 if (RstRFarR) { d_temp_R[idx] = 0; d_temp_amps[idx] = 0; return; } else { double slopeX = demSlopeX[posId]; double slopeY = demSlopeY[posId]; double slopeZ = demSlopeZ[posId]; double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); // if (abs(slopR - 0) > 1e-3) { double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ; double localangle = acos(dotAB / (RstR * slopR)); if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) { d_temp_R[idx] = 0; d_temp_amps[idx] = 0; return; } else {} double ampGain = 0; // 求解天线方向图指向 CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal( RstX, RstY, RstZ, antXaxisX[prfId], antXaxisY[prfId], antXaxisZ[prfId], antYaxisX[prfId], antYaxisY[prfId], antYaxisZ[prfId], antZaxisX[prfId], antZaxisY[prfId], antZaxisZ[prfId], antDirectX[prfId], antDirectY[prfId], antDirectZ[prfId] ); antVector.theta = antVector.theta * r2d; antVector.phi = antVector.phi * r2d; //printf("theta: %f , phi: %f \n", antVector.theta, antVector.phi); if (antVector.Rho > 0) { //double TansantPatternGain = GPU_BillerInterpAntPattern( // TransAntpattern, // Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints, // antVector.theta, antVector.phi); //double antPatternGain = GPU_BillerInterpAntPattern( // ReceiveAntpattern, // Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints, // antVector.theta, antVector.phi); double sigma0 = 0; { long clsid = demCls[posId]; //printf("clsid=%d\n", clsid); CUDASigmaParam tempsigma = sigma0Paramslist[clsid]; if (abs(tempsigma.p1) < PRECISIONTOLERANCE && abs(tempsigma.p2) < PRECISIONTOLERANCE && abs(tempsigma.p3) < PRECISIONTOLERANCE && abs(tempsigma.p4) < PRECISIONTOLERANCE && abs(tempsigma.p5) < PRECISIONTOLERANCE && abs(tempsigma.p6) < PRECISIONTOLERANCE ) { sigma0 = 0; } else { double sigma = GPU_getSigma0dB(tempsigma, localangle); sigma0 = powf(10.0, sigma / 10.0); } } //ampGain = TansantPatternGain * antPatternGain; ampGain = 1; //if (10 * log10(ampGain / maxReceiveAntPatternValue / maxTransAntPatternValue) < -3) { // 小于-3dB // d_temp_R[idx] = 0; // d_temp_amps[idx] = 0; // return; //} //else {} ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度 float temp_amp = float(ampGain * Pt * sigma0); float temp_R = float(RstR - refPhaseRange); if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) { printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R); d_temp_R[idx] = 0; d_temp_amps[idx] = 0; return; } else {} d_temp_amps[idx] = temp_amp; d_temp_R[idx] = temp_R; return; } else { d_temp_R[idx] = 0; d_temp_amps[idx] = 0; return; } } else { d_temp_R[idx] = 0; d_temp_amps[idx] = 0; return; } } } } __global__ void CUDA_Kernel_Computer_echo( float* d_temp_R, float* d_temp_amps, long posNum, float f0, float dfreq, long FreqPoints, // 当前频率的分块 long maxfreqnum, // 最大脉冲值 float* d_temp_echo_real, float* d_temp_echo_imag, long temp_PRF_Count ) { __shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存 __shared__ float s_amp[SHAREMEMORY_FLOAT_HALF]; long tid = threadIdx.x; long bid = blockIdx.x; long idx = bid * blockDim.x + tid; long prfId = idx / FreqPoints; // 脉冲ID long fId = idx % FreqPoints;//频率ID long psid = 0; long pixelId = 0; for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii; pixelId = prfId * posNum + psid; // if (psid < posNum) { s_R[psid] = d_temp_R[pixelId]; s_amp[psid] = d_temp_amps[pixelId]; } else { s_R[psid] = 0; s_amp[psid] = 0; } } __syncthreads(); // 确定所有待处理数据都已经进入程序中 if (fId < maxfreqnum && prfId < temp_PRF_Count) { long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置 float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq); float temp_real = 0; float temp_imag = 0; float temp_phi = 0; float temp_amp = 0; for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) { temp_phi = s_R[dataid] * factorjTemp; temp_amp = s_amp[dataid]; temp_real += (temp_amp * cosf(temp_phi)); temp_imag += (temp_amp * sinf(temp_phi)); //if (dataid > 5000) { // printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]); //} if (isnan(temp_phi) || isnan(temp_amp) || isnan(temp_real) || isnan(temp_imag) || isinf(temp_phi) || isinf(temp_amp) || isinf(temp_real) || isinf(temp_imag) ) { printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, temp_real, temp_imag); } } //printf("echo_ID=%d; ehodata=(%f,%f)\n", echo_ID, temp_real, temp_imag); //printf("(%f %f %f) ", factorjTemp, s_amp[0], s_R[0]); d_temp_echo_real[echo_ID] += /*d_temp_echo_real[echo_ID] + */temp_real; d_temp_echo_imag[echo_ID] += /*d_temp_echo_imag[echo_ID] +*/ temp_imag; } } /** * 分块计算主流程 */ void CUDA_RFPC_MainProcess( double* antX, double* antY, double* antZ, double* antXaxisX, double* antXaxisY, double* antXaxisZ, double* antYaxisX, double* antYaxisY, double* antYaxisZ, double* antZaxisX, double* antZaxisY, double* antZaxisZ, double* antDirectX, double* antDirectY, double* antDirectZ, long PRFCount, long FreqNum, float f0, float dfreq, double Pt, double refPhaseRange, double* TransAntpattern, double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints, double* ReceiveAntpattern, double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints, double maxTransAntPatternValue, double maxReceiveAntPatternValue, double NearR, double FarR, double* targetX, double* targetY, double* targetZ, long* demCls, long TargetNumber, double* demSlopeX, double* demSlopeY, double* demSlopeZ, CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen, float* out_echoReal, float* out_echoImag, float* d_temp_R, float* d_temp_amp ) { long BLOCK_FREQNUM = NextBlockPad(FreqNum, BLOCK_SIZE); // 256*freqBlockID long cudaBlocknum = 0; long freqpoints = BLOCK_FREQNUM; printf("freqpoints:%d\n", freqpoints); long process = 0; for (long sTi = 0; sTi < TargetNumber; sTi = sTi + SHAREMEMORY_FLOAT_HALF) { cudaBlocknum = (PRFCount * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE; CUDA_Kernel_Computer_R_amp << > > ( antX, antY, antZ, antXaxisX, antXaxisY, antXaxisZ, antYaxisX, antYaxisY, antYaxisZ, antZaxisX, antZaxisY, antZaxisZ, antDirectX, antDirectY, antDirectZ, PRFCount, targetX, targetY, targetZ, demCls, demSlopeX, demSlopeY, demSlopeZ, sTi, TargetNumber, sigma0Paramslist, sigmaparamslistlen, Pt, refPhaseRange, TransAntpattern, Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints, ReceiveAntpattern, Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints, maxTransAntPatternValue, maxReceiveAntPatternValue, NearR, FarR, d_temp_R, d_temp_amp// 计算输出 ); PrintLasterError("CUDA_Kernel_Computer_R_amp"); cudaBlocknum = (PRFCount * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE; CUDA_Kernel_Computer_echo << > > ( d_temp_R, d_temp_amp, SHAREMEMORY_FLOAT_HALF, f0, dfreq, freqpoints, FreqNum, out_echoReal, out_echoImag, PRFCount ); PrintLasterError("CUDA_Kernel_Computer_echo"); if ((sTi * 100.0 / TargetNumber) - process >= 1) { process = sTi * 100.0 / TargetNumber; PRINT("TargetID [%f]: %d / %d finished\n", sTi * 100.0 / TargetNumber, sTi, TargetNumber); } } cudaDeviceSynchronize(); } /* 核函数 ****************************************************************************************************************************/ __global__ void Kernel_Computer_R_amp_NoAntPattern( SateState* antlist, long PRFCount, GoalState* goallist, long demLen, long startPosId, long pixelcount, CUDASigmaParam sigma0Params, double Pt, double refPhaseRange, double NearR, double FarR, double* d_temp_R, double* d_temp_amps// 计算输出 ) { long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码 long prfId = idx / SHAREMEMORY_FLOAT_HALF; long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点 if (prfId < PRFCount && posId < pixelcount) { double RstX = antlist[prfId].Px - goallist[posId].Tx; // 计算坐标矢量 double RstY = antlist[prfId].Py - goallist[posId].Ty; double RstZ = antlist[prfId].Pz - goallist[posId].Tz; double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离 if (RstRFarR) { d_temp_R[idx] = 0; d_temp_amps[idx] = 0; return; } else { double slopeX = goallist[posId].TsX; double slopeY = goallist[posId].TsY; double slopeZ = goallist[posId].TsZ; double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); // if (abs(slopR - 0) > 1e-3) { double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ; double localangle = acos(dotAB / (RstR * slopR)); if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) { d_temp_R[idx] = 0; d_temp_amps[idx] = 0; return; } else {} double ampGain = 1; ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度 double sigma = GPU_getSigma0dB(sigma0Params, localangle); sigma = powf(10.0, sigma / 10.0); double temp_amp = double(ampGain * Pt * sigma); double temp_R = double(RstR - refPhaseRange); bool isNan = !(isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)); d_temp_amps[idx] = temp_amp * isNan; d_temp_R[idx] = temp_R * isNan; return; } } } } __global__ void CUDA_Kernel_Computer_echo_NoAntPattern( double* d_temp_R, double* d_temp_amps, long posNum, double f0, double dfreq, long FreqPoints, // 当前频率的分块 long maxfreqnum, // 最大脉冲值 cuComplex* echodata, long temp_PRF_Count ) { __shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存 __shared__ float s_amp[SHAREMEMORY_FLOAT_HALF]; long tid = threadIdx.x; long bid = blockIdx.x; long idx = bid * blockDim.x + tid; long prfId = idx / FreqPoints; // 脉冲ID long fId = idx % FreqPoints;//频率ID long psid = 0; long pixelId = 0; for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii; pixelId = prfId * posNum + psid; // if (psid < posNum) { s_R[psid] = d_temp_R[pixelId]; s_amp[psid] = d_temp_amps[pixelId]; } else { s_R[psid] = 0; s_amp[psid] = 0; } } __syncthreads(); // 确定所有待处理数据都已经进入程序中 if (fId < maxfreqnum && prfId < temp_PRF_Count) { long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置 float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq); cuComplex echo = make_cuComplex(0, 0); float temp_phi = 0; float temp_amp = 0; for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) { temp_phi = s_R[dataid] * factorjTemp; temp_amp = s_amp[dataid]; echo.x += (temp_amp * cosf(temp_phi)); echo.y += (temp_amp * sinf(temp_phi)); //if (dataid > 5000) { // printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]); //} if (isnan(temp_phi) || isnan(temp_amp) || isnan(echo.x) || isnan(echo.y) || isinf(temp_phi) || isinf(temp_amp) || isinf(echo.x) || isinf(echo.y) ) { printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, echo.x, echo.y); } } echodata[echo_ID] = cuCaddf(echodata[echo_ID], echo); } } __global__ void CUDA_Kernel_RFPC( SateState* antlist, long PRFCount, long Freqcount, // 整体的脉冲数, GoalState* goallist, long demLen, double StartFreqGHz, double FreqStep, double refPhaseRange, double NearR, double FarR, CUDASigmaParam clsSigma0, cuComplex* echodata ) { __shared__ GoalState Ts[SHAREMEMORY_DEM_STEP]; size_t threadid = threadIdx.x; size_t idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码 size_t prfid = floorf(idx / Freqcount); size_t freqid = idx % Freqcount; // printf("%d,%d ",prfid,freqid); if (prfid < PRFCount && freqid < Freqcount) { SateState antPos = antlist[prfid]; double factorjTemp = RFPCPIDIVLIGHT * (StartFreqGHz + freqid * FreqStep); double Tx = 0; double Ty = 0; double Tz = 0; double R = 0; double incAngle = 0; double echo_real = 0; double echo_imag = 0; cuComplex echo = make_cuComplex(0, 0); for (long tid = 0; tid < demLen; tid++) { GoalState p = goallist[tid]; Tx = p.Tx; Ty = p.Ty; Tz = p.Tz; Tx = antPos.Px - Tx; // T->P Ty = antPos.Py - Ty; Tz = antPos.Pz - Tz; R = sqrt(Tx * Tx + Ty * Ty + Tz * Tz); bool isNearFar = (R < NearR || R > FarR) && ((abs(p.TsX) > 1000) || (abs(p.TsY) > 1000) || (abs(p.TsZ) > 1000)); incAngle = sqrt(p.TsX * p.TsX + p.TsY * p.TsY + p.TsZ * p.TsZ); incAngle = acos((Tx * p.TsX + Ty * p.TsY + Tz * p.TsZ) / (R * incAngle)); incAngle = GPU_getSigma0dB_params(clsSigma0.p1, clsSigma0.p2, clsSigma0.p3, clsSigma0.p4, clsSigma0.p5, clsSigma0.p6, incAngle); // sigma incAngle = pow(10.0, incAngle / 10.0); // amp incAngle = incAngle / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); // R = (R - refPhaseRange); R = factorjTemp * R; echo_real = incAngle * cos(R) * isNearFar; echo_imag = incAngle * sin(R) * isNearFar; echo.x = echo.x + echo_real; echo.y = echo.y + echo_imag; if (idx == 0 && tid % (10 * SHAREMEMORY_DEM_STEP) == 0) { printf("Idx:%d , TsID: %d, TSCOUNT: %d \n", idx, tid, demLen); } } echodata[idx] = cuCaddf(echodata[idx], echo); } } /** 分块处理 ****************************************************************************************************************/ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid) { size_t pixelcount = task.prfNum * task.freqNum; size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE; printf("start %d,%d ,%d,%d\n", pixelcount, task.targetnum, grid_size, BLOCK_SIZE); double* d_R = (double*)mallocCUDADevice(task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(double), devid); double* d_amps = (double*)mallocCUDADevice(task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(double), devid); long BLOCK_FREQNUM = NextBlockPad(task.freqNum, BLOCK_SIZE); // 256*freqBlockID long cudaBlocknum = 0; long freqpoints = BLOCK_FREQNUM; printf("freqpoints:%d\n", freqpoints); long process = 0; for (long sTi = 0; sTi < task.targetnum; sTi = sTi + SHAREMEMORY_FLOAT_HALF) { cudaBlocknum = (task.prfNum * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE; Kernel_Computer_R_amp_NoAntPattern << > >( task.antlist, task.prfNum, task.goallist, task.targetnum, sTi, task.targetnum, task.sigma0_cls, 1, task.Rref, task.Rnear, task.Rfar, d_R, d_amps// 计算输出 ); PrintLasterError("CUDA_Kernel_Computer_R_amp"); cudaBlocknum = (task.prfNum * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE; CUDA_Kernel_Computer_echo_NoAntPattern << > > ( d_R, d_amps, SHAREMEMORY_FLOAT_HALF, task.startFreq, task.stepFreq, freqpoints, task.freqNum, task.d_echoData, task.prfNum ); PrintLasterError("CUDA_Kernel_Computer_echo"); if ((sTi * 100.0 / task.targetnum) - process >= 1) { process = sTi * 100.0 / task.targetnum; PRINT("TargetID [%f]: %d / %d finished\n", sTi * 100.0 / task.targetnum, sTi, task.targetnum); } } cudaDeviceSynchronize(); FreeCUDADevice(d_R); FreeCUDADevice(d_amps); } #endif