#include #include #include #include #include #include #include #include #include "BaseConstVariable.h" #include "GPURFPC.cuh" #ifdef __CUDANVCC___ __device__ float GPU_getSigma0dB(CUDASigmaParam param, float theta) {//线性值 float sigma= param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6); return sigma; } __device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal( float RstX, float RstY, float RstZ, float antXaxisX, float antXaxisY, float antXaxisZ, float antYaxisX, float antYaxisY, float antYaxisZ, float antZaxisX, float antZaxisY, float antZaxisZ, float antDirectX, float antDirectY, float antDirectZ ) { CUDAVectorEllipsoidal result{ 0,0,-1 }; // 求解天线增益 float Xst = -1 * RstX; // 卫星 --> 地面 float Yst = -1 * RstY; float Zst = -1 * RstZ; float AntXaxisX = antXaxisX; float AntXaxisY = antXaxisY; float AntXaxisZ = antXaxisZ; float AntYaxisX = antYaxisX; float AntYaxisY = antYaxisY; float AntYaxisZ = antYaxisZ; float AntZaxisX = antZaxisX; float AntZaxisY = antZaxisY; float AntZaxisZ = antZaxisZ; // 归一化 float RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst); float AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ); float AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ); float AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ); float Rx = Xst / RstNorm; float Ry = Yst / RstNorm; float Rz = Zst / RstNorm; float Xx = AntXaxisX / AntXaxisNorm; float Xy = AntXaxisY / AntXaxisNorm; float Xz = AntXaxisZ / AntXaxisNorm; float Yx = AntYaxisX / AntYaxisNorm; float Yy = AntYaxisY / AntYaxisNorm; float Yz = AntYaxisZ / AntYaxisNorm; float Zx = AntZaxisX / AntZaxisNorm; float Zy = AntZaxisY / AntZaxisNorm; float Zz = AntZaxisZ / AntZaxisNorm; float Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx); float Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx); float Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx); // 计算theta 与 phi float Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho float ThetaAnt = acosf(Zant / Norm); // theta 与 Z轴的夹角 float PhiAnt = atanf(Yant / Xant); // -pi/2 ~pi/2 if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上 PhiAnt = 0; } else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上，原点 if (Yant > 0) { PhiAnt = PI / 2; } else { PhiAnt = -PI / 2; } } else if (Xant < 0) { if (Yant > 0) { PhiAnt = PI + PhiAnt; } else { PhiAnt = -PI + PhiAnt; } } else { // Xant>0 X 正轴 } if (isnan(PhiAnt)) { printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt); } result.theta = ThetaAnt; result.phi = PhiAnt; result.Rho = Norm; return result; } __device__ float GPU_BillerInterpAntPattern(float* antpattern, float starttheta, float startphi, float dtheta, float dphi, long thetapoints, long phipoints, float searththeta, float searchphi) { float stheta = searththeta; float sphi = searchphi; if (stheta > 90) { return 0; } else {} float pthetaid = (stheta - starttheta) / dtheta;// float pphiid = (sphi - startphi) / dphi; long lasttheta = floorf(pthetaid); long nextTheta = lasttheta + 1; long lastphi = floorf(pphiid); long nextPhi = lastphi + 1; if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 || lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints) { return 0; } else { float x = stheta; float y = sphi; float x1 = lasttheta * dtheta + starttheta; float x2 = nextTheta * dtheta + starttheta; float y1 = lastphi * dphi + startphi; float y2 = nextPhi * dphi + startphi; float z11 = antpattern[lasttheta * phipoints + lastphi]; float z12 = antpattern[lasttheta * phipoints + nextPhi]; float z21 = antpattern[nextTheta * phipoints + lastphi]; float z22 = antpattern[nextTheta * phipoints + nextPhi]; //z11 = powf(10, z11 / 10); // dB-> 线性 //z12 = powf(10, z12 / 10); //z21 = powf(10, z21 / 10); //z22 = powf(10, z22 / 10); float GainValue = (z11 * (x2 - x) * (y2 - y) + z21 * (x - x1) * (y2 - y) + z12 * (x2 - x) * (y - y1) + z22 * (x - x1) * (y - y1)); GainValue = GainValue / ((x2 - x1) * (y2 - y1)); return GainValue; } } __device__ cuComplex GPU_calculationEcho(float sigma0, float TransAnt, float ReciveAnt, float localangle, float R, float slopeangle, float Pt, float lamda) { float amp = Pt * TransAnt * ReciveAnt; amp = amp * sigma0; amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); // 反射强度 float phi = (-4 * LAMP_CUDA_PI / lamda) * R; cuComplex echophi = make_cuComplex(0, phi); cuComplex echophiexp = cuCexpf(echophi); cuComplex echo=make_cuComplex(echophiexp.x * amp, echophiexp.y * amp); return echo; } __global__ void CUDA_SatelliteAntDirectNormal(float* RstX, float* RstY, float* RstZ, float antXaxisX, float antXaxisY, float antXaxisZ, float antYaxisX, float antYaxisY, float antYaxisZ, float antZaxisX, float antZaxisY, float antZaxisZ, float antDirectX, float antDirectY, float antDirectZ, float* thetaAnt, float* phiAnt , long len) { long idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < len) { float Xst = -1 * RstX[idx]; // 卫星 --> 地面 float Yst = -1 * RstY[idx]; float Zst = -1 * RstZ[idx]; float AntXaxisX = antXaxisX; float AntXaxisY = antXaxisY; float AntXaxisZ = antXaxisZ; float AntYaxisX = antYaxisX; float AntYaxisY = antYaxisY; float AntYaxisZ = antYaxisZ; float AntZaxisX = antZaxisX; float AntZaxisY = antZaxisY; float AntZaxisZ = antZaxisZ; // 归一化 float RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst); float AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ); float AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ); float AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ); float Rx = Xst / RstNorm; float Ry = Yst / RstNorm; float Rz = Zst / RstNorm; float Xx = AntXaxisX / AntXaxisNorm; float Xy = AntXaxisY / AntXaxisNorm; float Xz = AntXaxisZ / AntXaxisNorm; float Yx = AntYaxisX / AntYaxisNorm; float Yy = AntYaxisY / AntYaxisNorm; float Yz = AntYaxisZ / AntYaxisNorm; float Zx = AntZaxisX / AntZaxisNorm; float Zy = AntZaxisY / AntZaxisNorm; float Zz = AntZaxisZ / AntZaxisNorm; float Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx); float Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx); float Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx); // 计算theta 与 phi float Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho float ThetaAnt = acosf(Zant / Norm); // theta 与 Z轴的夹角 float PhiAnt = atanf(Yant / Xant); // -pi/2 ~pi/2 if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上 PhiAnt = 0; } else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上，原点 if (Yant > 0) { PhiAnt = PI / 2; } else { PhiAnt = -PI / 2; } } else if (Xant < 0) { if (Yant > 0) { PhiAnt = PI + PhiAnt; } else { PhiAnt = -PI+PhiAnt ; } } else { // Xant>0 X 正轴 } if (isnan(PhiAnt)) { printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant,Norm, ThetaAnt, PhiAnt); } //if (abs(ThetaAnt - 0) < PRECISIONTOLERANCE) { // PhiAnt = 0; //} //else {} thetaAnt[idx] = ThetaAnt*r2d; phiAnt[idx] = PhiAnt*r2d; //printf("Rst=[%f,%f,%f];AntXaxis = [%f, %f, %f];AntYaxis=[%f,%f,%f];AntZaxis=[%f,%f,%f];phiAnt=%f;thetaAnt=%f;\n", Xst, Yst, Zst // , AntXaxisX, AntXaxisY, AntXaxisZ // , AntYaxisX, AntYaxisY, AntYaxisZ // , AntZaxisX, AntZaxisY, AntZaxisZ // , phiAnt[idx] // , thetaAnt[idx] //); } } __global__ void CUDA_BillerInterpAntPattern(float* antpattern, float starttheta, float startphi, float dtheta, float dphi, long thetapoints, long phipoints, float* searththeta, float* searchphi, float* searchantpattern, long len) { long idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < len) { float stheta = searththeta[idx]; float sphi = searchphi[idx]; float pthetaid = (stheta - starttheta) / dtheta;// float pphiid = (sphi - startphi) / dphi; long lasttheta = floorf(pthetaid); long nextTheta = lasttheta + 1; long lastphi = floorf(pphiid); long nextPhi = lastphi + 1; if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 || lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints) { searchantpattern[idx] = 0; } else { float x = stheta; float y = sphi; float x1 = lasttheta * dtheta + starttheta; float x2 = nextTheta * dtheta + starttheta; float y1 = lastphi * dphi + startphi; float y2 = nextPhi * dphi + startphi; float z11 = antpattern[lasttheta * phipoints + lastphi]; float z12 = antpattern[lasttheta * phipoints + nextPhi]; float z21 = antpattern[nextTheta * phipoints + lastphi]; float z22 = antpattern[nextTheta * phipoints + nextPhi]; z11 = powf(10, z11 / 10); z12 = powf(10, z12 / 10); z21 = powf(10, z21 / 10); z22 = powf(10, z22 / 10); float GainValue = (z11 * (x2 - x) * (y2 - y) + z21 * (x - x1) * (y2 - y) + z12 * (x2 - x) * (y - y1) + z22 * (x - x1) * (y - y1)); GainValue = GainValue / ((x2 - x1) * (y2 - y1)); searchantpattern[idx] = GainValue; } } } __global__ void CUDA_calculationEcho(float* sigma0, float* TransAnt, float* ReciveAnt, float* localangle, float* R, float* slopeangle, float nearRange, float Fs, float Pt, float lamda, long FreqIDmax, cuComplex* echoArr, long* FreqID, long len) { long idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < len) { float r = R[idx]; float amp = Pt * TransAnt[idx] * ReciveAnt[idx]; amp = amp * sigma0[idx]; amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(r, 4)); // 反射强度 // 处理相位 float phi = (-4 * LAMP_CUDA_PI / lamda) * r; cuComplex echophi = make_cuComplex(0, phi); cuComplex echophiexp = cuCexpf(echophi); float timeR = 2 * (r - nearRange) / LIGHTSPEED * Fs; long timeID = floorf(timeR); //if (timeID < 0 || timeID >= FreqIDmax) { // timeID = 0; // amp = 0; //} cuComplex echo = make_cuComplex(echophiexp.x , echophiexp.y); echoArr[idx] = echo; FreqID[idx] = timeID; } } __global__ void CUDA_AntPatternInterpGain(float* anttheta, float* antphi, float* gain, float* antpattern, float starttheta, float startphi, float dtheta, float dphi, int thetapoints, int phipoints, long len) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < len) { float temptheta = anttheta[idx]; float tempphi = antphi[idx]; float antPatternGain = GPU_BillerInterpAntPattern(antpattern, starttheta, startphi, dtheta, dphi, thetapoints, phipoints, temptheta, tempphi) ; gain[idx] = antPatternGain; } } __global__ void CUDA_InterpSigma( long* demcls, float* sigmaAmp, float* localanglearr, long len, CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen) { long idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < len) { long clsid = demcls[idx]; float localangle = localanglearr[idx]; CUDASigmaParam tempsigma = sigma0Paramslist[clsid]; if (localangle < 0 || localangle >= LAMP_CUDA_PI/2) { sigmaAmp[idx] = 0; } else {} if (abs(tempsigma.p1)< PRECISIONTOLERANCE&& abs(tempsigma.p2) < PRECISIONTOLERANCE && abs(tempsigma.p3) < PRECISIONTOLERANCE && abs(tempsigma.p4) < PRECISIONTOLERANCE&& abs(tempsigma.p5) < PRECISIONTOLERANCE&& abs(tempsigma.p6) < PRECISIONTOLERANCE ) { sigmaAmp[idx] = 0; } else { float sigma = GPU_getSigma0dB(tempsigma, localangle); sigma = powf(10.0, sigma / 10.0);// 后向散射系数 //printf("cls:%d;localangle=%f;sigma0=%f;\n", clsid, localangle, sigma); sigmaAmp[idx] = sigma; } } } __global__ void CUDA_CalculationEchoAmp(float* sigma0, float* TransAnt, float* ReciveAnt, float* R, float Pt, float* ampArr, long len) { long idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < len) { float r = R[idx]; float amptemp = Pt * TransAnt[idx] * ReciveAnt[idx] * sigma0[idx]; amptemp = amptemp / (powf(4 * LAMP_CUDA_PI, 2) * powf(r, 4)); // 反射强度 ampArr[idx] = amptemp; } } __global__ void CUDA_CalculationEchoPhase(float* R, float lamda, float* phaseArr, long len) { long idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < len) { float r = R[idx]; // 处理相位 float phi = (-4 * LAMP_CUDA_PI / lamda) * r; phaseArr[idx] = phi; } } __global__ void CUDA_CombinationEchoAmpAndPhase(float* R, float* echoAmp,float* echoPhase, float nearRange, float Fs, long plusepoints, cuComplex* echo, long* FreqID, long len) { long idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < len) { float r = R[idx]; float phase = echoPhase[idx]; float amp = echoAmp[idx]; cuComplex echophi = make_cuComplex(0, phase); cuComplex echophiexp = cuCexpf(echophi); float timeR = 2 * (r - nearRange) / LIGHTSPEED * Fs; long timeID = floorf(timeR); if (timeID < 0 || timeID >= plusepoints) { timeID = 0; amp = 0; } cuComplex echotemp = make_cuComplex(echophiexp.x*amp, echophiexp.y*amp); echo[idx] = echotemp; FreqID[idx] = timeID; } } __global__ void CUDAKernel_RFPC_Caluation_R_Gain( float antX, float antY, float antZ, float* targetX, float* targetY, float* targetZ, long len, float* demSlopeX, float* demSlopeY, float* demSlopeZ, float antXaxisX, float antXaxisY, float antXaxisZ, float antYaxisX, float antYaxisY, float antYaxisZ, float antZaxisX, float antZaxisY, float antZaxisZ, float antDirectX, float antDirectY, float antDirectZ, float* TransAntpattern, float Transtarttheta, float Transstartphi, float Transdtheta, float Transdphi, int Transthetapoints, int Transphipoints, float* ReceiveAntpattern, float Receivestarttheta, float Receivestartphi, float Receivedtheta, float Receivedphi, int Receivethetapoints, int Receivephipoints, float NearR, float FarR, float* outR, float* outLocalAngle, float* AmpGain) { long idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < len) { float RstX = antX - targetX[idx]; // 计算坐标矢量 float RstY = antY - targetY[idx]; float RstZ = antZ - targetZ[idx]; float slopeX = demSlopeX[idx]; float slopeY = demSlopeY[idx]; float slopeZ = demSlopeZ[idx]; float RstR= sqrtf(RstX* RstX + RstY* RstY + RstZ* RstZ); // 矢量距离 if (RstRFarR) { outLocalAngle[idx] = 0; outR[idx] = 0; AmpGain[idx] = 0; } else { // 求解坡度 float slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); // float dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ; outLocalAngle[idx] = acosf(dotAB / (RstR * slopR)); // 局地入射角 float ampGain = 0; // 求解天线方向图指向 CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal( RstX, RstY, RstZ, antXaxisX, antXaxisY, antXaxisZ, antYaxisX, antYaxisY, antYaxisZ, antZaxisX, antZaxisY, antZaxisZ, antDirectX, antDirectY, antDirectZ ); if (antVector.Rho > 0) { // 发射方向图 float temptheta = antVector.theta * r2d; float tempphi = antVector.phi * r2d; float TansantPatternGain = GPU_BillerInterpAntPattern( TransAntpattern, Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints, temptheta, tempphi); // 接收方向图 float antPatternGain = GPU_BillerInterpAntPattern( ReceiveAntpattern, Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints, temptheta, tempphi); ampGain = TansantPatternGain * antPatternGain; ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度 AmpGain[idx] = ampGain; outR[idx] = RstR; } else { outR[idx] = 0; AmpGain[idx] = 0; } } } } __global__ void CUDARFPCKernel_Target_Freq_EchoData( float* InR, float* InlocalAngle, float* InampGain, long* Indemcls, long len, float Pt, float freq, CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen, cuComplex* OutechoArr) { long idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < len) { float sigma0 = 0; { long clsid = Indemcls[idx]; float localangle = InlocalAngle[idx]; CUDASigmaParam tempsigma = sigma0Paramslist[clsid]; if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2) { sigma0 = 0; } else {} if (abs(tempsigma.p1) < PRECISIONTOLERANCE && abs(tempsigma.p2) < PRECISIONTOLERANCE && abs(tempsigma.p3) < PRECISIONTOLERANCE && abs(tempsigma.p4) < PRECISIONTOLERANCE && abs(tempsigma.p5) < PRECISIONTOLERANCE && abs(tempsigma.p6) < PRECISIONTOLERANCE ) { sigma0 = 0; } else { float sigma = GPU_getSigma0dB(tempsigma, localangle); sigma0 = powf(10.0, sigma / 10.0);// 后向散射系数 } } float amp = Pt * InampGain[idx] * sigma0; float phi = 4 * PI * freq / LIGHTSPEED * InR[idx]; // 欧拉公式 exp(ix)=cos(x)+isin(x) // echo=Aexp(ix)=A*cos(x)+i*A*sin(x) cuComplex echotemp = make_cuComplex(amp * cosf(phi), amp * sinf(phi)); OutechoArr[idx] = echotemp; } } __global__ void CUDACkernel_Complex_SUM_reduce_dynamicshared(cuComplex* d_x, cuComplex* d_y, long N) { const int tid = threadIdx.x; // 某个block内的线程标号 index const int bid = blockIdx.x; // 某个block在网格grid内的标号 index const int n = bid * blockDim.x + tid; // n 是某个线程的标号 index __shared__ cuComplex s_y[128]; // 分配共享内存空间，不同的block都有共享内存变量的副本 s_y[tid] = (n < N) ? d_x[n] : make_cuComplex(0.0,0.0); // 每个block的共享内存变量副本，都用全局内存数组d_x来赋值，最后一个多出来的用0 __syncthreads(); // 线程块内部直接同步 for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) // 折半 { if (tid < offset) // 线程标号的index 不越界折半 { s_y[tid] = cuCaddf(s_y[tid], s_y[tid + offset]); // 某个block内的线程做折半规约 } __syncthreads(); // 同步block内部的线程 } if (tid == 0) // 某个block只做一次操作 { d_y[bid] = s_y[0]; // 复制共享内存变量累加的结果到全局内存 } } /** 对外封装接口 *******************************************************************************************************/ extern "C" void SatelliteAntDirectNormal(float* RstX, float* RstY, float* RstZ, float antXaxisX, float antXaxisY, float antXaxisZ, float antYaxisX, float antYaxisY, float antYaxisZ, float antZaxisX, float antZaxisY, float antZaxisZ, float antDirectX, float antDirectY, float antDirectZ, float* thetaAnt, float* phiAnt , long len) { int blockSize = 256; // 每个块的线程数 int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小 // 调用 CUDA 核函数 CUDA_SatelliteAntDirectNormal << > > (RstX, RstY, RstZ, antXaxisX, antXaxisY, antXaxisZ, antYaxisX, antYaxisY, antYaxisZ, antZaxisX, antZaxisY, antZaxisZ, antDirectX, antDirectY, antDirectZ, thetaAnt, phiAnt , len); #ifdef __CUDADEBUG__ cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } #endif // __CUDADEBUG__ cudaDeviceSynchronize(); } extern "C" void AntPatternInterpGain(float* anttheta, float* antphi, float* gain, float* antpattern, float starttheta, float startphi, float dtheta, float dphi, int thetapoints, int phipoints, long len) { int blockSize = 256; // 每个块的线程数 int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小 //printf("\nCUDA_RFPC_SiglePRF blockSize:%d ,numBlock:%d\n", blockSize, numBlocks); CUDA_AntPatternInterpGain << > > ( anttheta,antphi, gain, antpattern, starttheta, startphi, dtheta, dphi, thetapoints, phipoints, len); #ifdef __CUDADEBUG__ cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } #endif // __CUDADEBUG__ cudaDeviceSynchronize(); } extern "C" void calculationEcho(float* sigma0, float* TransAnt, float* ReciveAnt, float* localangle, float* R, float* slopeangle, float nearRange, float Fs, float pt, float lamda, long FreqIDmax, cuComplex* echoAmp, long* FreqID, long len) { int blockSize = 256; // 每个块的线程数 int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小 // 调用 CUDA 核函数 CUDA_calculationEcho << > > (sigma0, TransAnt, ReciveAnt, localangle, R, slopeangle, nearRange, Fs, pt, lamda, FreqIDmax, echoAmp, FreqID, len); #ifdef __CUDADEBUG__ cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } #endif // __CUDADEBUG__ cudaDeviceSynchronize(); } extern "C" void CUDACalculationEchoAmp(float* sigma0, float* TransAnt, float* ReciveAnt, float* R, float Pt, float* ampArr, long len) { int blockSize = 256; // 每个块的线程数 int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小 // 调用 CUDA 核函数 CUDA_CalculationEchoAmp << > > ( sigma0, TransAnt, ReciveAnt, R, Pt, ampArr, len); #ifdef __CUDADEBUG__ cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } #endif // __CUDADEBUG__ cudaDeviceSynchronize(); } extern "C" void CUDACalculationEchoPhase(float* R, float lamda, float* phaseArr, long len) { int blockSize = 256; // 每个块的线程数 int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小 // 调用 CUDA 核函数 CUDA_CalculationEchoPhase << > > ( R, lamda, phaseArr, len); #ifdef __CUDADEBUG__ cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } #endif // __CUDADEBUG__ cudaDeviceSynchronize(); } extern "C" void CUDACombinationEchoAmpAndPhase(float* R, float* echoAmp, float* echoPhase, float nearRange, float Fs, long plusepoints, cuComplex* echo, long* FreqID, long len) { int blockSize = 256; // 每个块的线程数 int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小 // 调用 CUDA 核函数 CUDA_CombinationEchoAmpAndPhase << > > ( R, echoAmp, echoPhase, nearRange, Fs, plusepoints, echo, FreqID, len ); #ifdef __CUDADEBUG__ cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } #endif // __CUDADEBUG__ cudaDeviceSynchronize(); } extern "C" void CUDAInterpSigma( long* demcls,float* sigmaAmp, float* localanglearr,long len, CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen) {// 地表覆盖类型-sigma插值对应函数-ulaby int blockSize = 256; // 每个块的线程数 int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小 // 调用 CUDA 核函数 CUDA_InterpSigma << > > ( demcls, sigmaAmp, localanglearr, len, sigma0Paramslist, sigmaparamslistlen ); #ifdef __CUDADEBUG__ cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } #endif // __CUDADEBUG__ cudaDeviceSynchronize(); } extern "C" void CUDARFPC_Caluation_R_Gain(float antX, float antY, float antZ, float* targetX, float* targetY, float* targetZ, long TargetPixelNumber, float* demSlopeX, float* demSlopeY, float* demSlopeZ, float antXaxisX, float antXaxisY, float antXaxisZ, float antYaxisX, float antYaxisY, float antYaxisZ, float antZaxisX, float antZaxisY, float antZaxisZ, float antDirectX, float antDirectY, float antDirectZ, float* TransAntpattern, float Transtarttheta, float Transstartphi, float Transdtheta, float Transdphi, int Transthetapoints, int Transphipoints, float* ReceiveAntpattern, float Receivestarttheta, float Receivestartphi, float Receivedtheta, float Receivedphi, int Receivethetapoints, int Receivephipoints, float NearR, float FarR, float* outR, float* outLocalAngle, float* AmpGain) { int blockSize = 256; // 每个块的线程数 int numBlocks = (TargetPixelNumber + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小 // 调用 CUDA 核函数 CUDAKernel_RFPC_Caluation_R_Gain << > > ( antX, antY, antZ, targetX,targetY, targetZ, TargetPixelNumber, demSlopeX, demSlopeY, demSlopeZ, antXaxisX, antXaxisY, antXaxisZ, antYaxisX, antYaxisY, antYaxisZ, antZaxisX, antZaxisY, antZaxisZ, antDirectX, antDirectY, antDirectZ, TransAntpattern, Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints, ReceiveAntpattern, Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints, NearR, FarR, outR, outLocalAngle, AmpGain ); #ifdef __CUDADEBUG__ cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDARFPC_Caluation_R_Gain CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } #endif // __CUDADEBUG__ cudaDeviceSynchronize(); } extern "C" void CUDARFPC_Target_Freq_EchoData(float* InR, float* InlocalAngle, float* InampGain, long* Indemcls, long len, float Pt, float freq, CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen, cuComplex* OutechoArr ) { int blockSize = 256; // 每个块的线程数 int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小 // 调用 CUDA 核函数 CUDARFPCKernel_Target_Freq_EchoData << > > ( InR, InlocalAngle, InampGain, Indemcls, len, Pt, freq, sigma0Paramslist, sigmaparamslistlen, OutechoArr ); #ifdef __CUDADEBUG__ cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDARFPC_Target_Freq_EchoData CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } #endif // __CUDADEBUG__ cudaDeviceSynchronize(); } extern "C" void CUDA_DemEchoSUM_NoMalloc(cuComplex* d_dem_echo,long N, cuComplex* d_echosum_temp, int grid_size, cuComplex* d_echo,long ehcoid ) { long NUM_REPEATS = 100; const int smem = sizeof(float) * BLOCK_SIZE; for (long i = 0; i < grid_size; i++) { // 初始化 d_echosum_temp[i] = make_cuComplex(0,0); } CUDACkernel_Complex_SUM_reduce_dynamicshared << > > (d_dem_echo, d_echosum_temp,N); //归约求和 #ifdef __CUDADEBUG__ cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("CUDALinearInterp1 CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } #endif // __CUDADEBUG__ cudaDeviceSynchronize(); for (int n = 0; n < grid_size; ++n) { d_echo[ehcoid] =cuCaddf(d_echo[ehcoid],d_echosum_temp[n]); } } #endif