RasterProcessTool/GPUTool/GPURFPC.cu


#include <time.h>
#include <iostream>
#include <memory>
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>

#include "BaseConstVariable.h"
#include "GPURFPC.cuh"


#ifdef __CUDANVCC___


__device__ double GPU_getSigma0dB(CUDASigmaParam param, double theta) {//线性值
	double sigma = param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6);
	return  sigma;
}


__device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal(
	double RstX, double RstY, double RstZ,
	double  antXaxisX, double  antXaxisY, double  antXaxisZ,
	double  antYaxisX, double  antYaxisY, double  antYaxisZ,
	double  antZaxisX, double  antZaxisY, double  antZaxisZ,
	double  antDirectX, double  antDirectY, double  antDirectZ
) {
	CUDAVectorEllipsoidal result{ 0,0,-1 };

	// 求解天线增益
	double Xst = -1 * RstX; // 卫星 -->  地面
	double Yst = -1 * RstY;
	double Zst = -1 * RstZ;
	double AntXaxisX = antXaxisX;
	double AntXaxisY = antXaxisY;
	double AntXaxisZ = antXaxisZ;
	double AntYaxisX = antYaxisX;
	double AntYaxisY = antYaxisY;
	double AntYaxisZ = antYaxisZ;
	double AntZaxisX = antZaxisX;
	double AntZaxisY = antZaxisY;
	double AntZaxisZ = antZaxisZ;

	// 归一化
	double RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
	double AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
	double AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
	double AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);


	double Rx = Xst / RstNorm;
	double Ry = Yst / RstNorm;
	double Rz = Zst / RstNorm;
	double Xx = AntXaxisX / AntXaxisNorm;
	double Xy = AntXaxisY / AntXaxisNorm;
	double Xz = AntXaxisZ / AntXaxisNorm;
	double Yx = AntYaxisX / AntYaxisNorm;
	double Yy = AntYaxisY / AntYaxisNorm;
	double Yz = AntYaxisZ / AntYaxisNorm;
	double Zx = AntZaxisX / AntZaxisNorm;
	double Zy = AntZaxisY / AntZaxisNorm;
	double Zz = AntZaxisZ / AntZaxisNorm;

	double Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
	double Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
	double Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);


	// 计算theta 与 phi
	double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
	double ThetaAnt = acosf(Zant / Norm); // theta 与 Z轴的夹角
	double PhiAnt = atanf(Yant / Xant); // -pi/2 ~pi/2


	if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上
		PhiAnt = 0;
	}
	else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上，原点
		if (Yant > 0) {
			PhiAnt = PI / 2;
		}
		else {
			PhiAnt = -PI / 2;
		}
	}
	else if (Xant < 0) {
		if (Yant > 0) {
			PhiAnt = PI + PhiAnt;
		}
		else {
			PhiAnt = -PI + PhiAnt;
		}
	}
	else {  // Xant>0  X 正轴

	}

	if (isnan(PhiAnt)) {
		printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt);
	}


	result.theta = ThetaAnt;
	result.phi = PhiAnt;
	result.Rho = Norm;
	return result;
}

__device__ double GPU_BillerInterpAntPattern(double* antpattern,
	double starttheta, double startphi, double dtheta, double dphi,
	long thetapoints, long phipoints,
	double searththeta, double searchphi) {
	double stheta = searththeta;
	double sphi = searchphi;
	if (stheta > 90) {
		return 0;
	}
	else {}


	double pthetaid = (stheta - starttheta) / dtheta;//
	double pphiid = (sphi - startphi) / dphi;

	long lasttheta = floorf(pthetaid);
	long nextTheta = lasttheta + 1;
	long lastphi = floorf(pphiid);
	long nextPhi = lastphi + 1;


	if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
		lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
	{
		return 0;
	}
	else {
		double x = stheta;
		double y = sphi;

		double x1 = lasttheta * dtheta + starttheta;
		double x2 = nextTheta * dtheta + starttheta;
		double y1 = lastphi * dphi + startphi;
		double y2 = nextPhi * dphi + startphi;

		double z11 = antpattern[lasttheta * phipoints + lastphi];
		double z12 = antpattern[lasttheta * phipoints + nextPhi];
		double z21 = antpattern[nextTheta * phipoints + lastphi];
		double z22 = antpattern[nextTheta * phipoints + nextPhi];


		//z11 = powf(10, z11 / 10); // dB-> 线性
		//z12 = powf(10, z12 / 10);
		//z21 = powf(10, z21 / 10);
		//z22 = powf(10, z22 / 10);

		double GainValue = (z11 * (x2 - x) * (y2 - y)
			+ z21 * (x - x1) * (y2 - y)
			+ z12 * (x2 - x) * (y - y1)
			+ z22 * (x - x1) * (y - y1));
		GainValue = GainValue / ((x2 - x1) * (y2 - y1));
		return GainValue;
	}
}

__device__ cuComplex  GPU_calculationEcho(double sigma0, double TransAnt, double ReciveAnt,
	double localangle, double R, double slopeangle, double Pt, double lamda) {
	double amp = Pt * TransAnt * ReciveAnt;
	amp = amp * sigma0;
	amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); // 反射强度
	double phi = (-4 * LAMP_CUDA_PI / lamda) * R;
	cuComplex echophi = make_cuComplex(0, phi);
	cuComplex echophiexp = cuCexpf(echophi);
	cuComplex echo = make_cuComplex(echophiexp.x * amp, echophiexp.y * amp);
	return echo;
}


__global__ void CUDA_SatelliteAntDirectNormal(double* RstX, double* RstY, double* RstZ,
	double  antXaxisX, double  antXaxisY, double  antXaxisZ,
	double  antYaxisX, double  antYaxisY, double  antYaxisZ,
	double  antZaxisX, double  antZaxisY, double  antZaxisZ,
	double  antDirectX, double  antDirectY, double  antDirectZ,
	double* thetaAnt, double* phiAnt
	, long len) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		double Xst = -1 * RstX[idx]; // 卫星 -->  地面
		double Yst = -1 * RstY[idx];
		double Zst = -1 * RstZ[idx];
		double AntXaxisX = antXaxisX;
		double AntXaxisY = antXaxisY;
		double AntXaxisZ = antXaxisZ;
		double AntYaxisX = antYaxisX;
		double AntYaxisY = antYaxisY;
		double AntYaxisZ = antYaxisZ;
		double AntZaxisX = antZaxisX;
		double AntZaxisY = antZaxisY;
		double AntZaxisZ = antZaxisZ;

		// 归一化
		double RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
		double AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
		double AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
		double AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);


		double Rx = Xst / RstNorm;
		double Ry = Yst / RstNorm;
		double Rz = Zst / RstNorm;
		double Xx = AntXaxisX / AntXaxisNorm;
		double Xy = AntXaxisY / AntXaxisNorm;
		double Xz = AntXaxisZ / AntXaxisNorm;
		double Yx = AntYaxisX / AntYaxisNorm;
		double Yy = AntYaxisY / AntYaxisNorm;
		double Yz = AntYaxisZ / AntYaxisNorm;
		double Zx = AntZaxisX / AntZaxisNorm;
		double Zy = AntZaxisY / AntZaxisNorm;
		double Zz = AntZaxisZ / AntZaxisNorm;

		double Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
		double Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
		double Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);


		// 计算theta 与 phi
		double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
		double ThetaAnt = acosf(Zant / Norm); // theta 与 Z轴的夹角
		double PhiAnt = atanf(Yant / Xant); // -pi/2 ~pi/2


		if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上
			PhiAnt = 0;
		}
		else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上，原点
			if (Yant > 0) {
				PhiAnt = PI / 2;
			}
			else {
				PhiAnt = -PI / 2;
			}
		}
		else if (Xant < 0) {
			if (Yant > 0) {
				PhiAnt = PI + PhiAnt;
			}
			else {
				PhiAnt = -PI + PhiAnt;
			}
		}
		else {  // Xant>0  X 正轴

		}

		if (isnan(PhiAnt)) {
			printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt);
		}

		//if (abs(ThetaAnt - 0) < PRECISIONTOLERANCE) {
		//	PhiAnt = 0;
		//}
		//else {}


		thetaAnt[idx] = ThetaAnt * r2d;
		phiAnt[idx] = PhiAnt * r2d;
		//printf("Rst=[%f,%f,%f];AntXaxis = [%f, %f, %f];AntYaxis=[%f,%f,%f];AntZaxis=[%f,%f,%f];phiAnt=%f;thetaAnt=%f;\n", Xst, Yst, Zst
		//	, AntXaxisX, AntXaxisY, AntXaxisZ
		//	, AntYaxisX, AntYaxisY, AntYaxisZ
		//	, AntZaxisX, AntZaxisY, AntZaxisZ
		//	, phiAnt[idx]
		//	, thetaAnt[idx]
		//);
	}
}

__global__ void CUDA_BillerInterpAntPattern(double* antpattern,
	double starttheta, double startphi, double dtheta, double dphi,
	long thetapoints, long phipoints,
	double* searththeta, double* searchphi, double* searchantpattern,
	long len) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		double stheta = searththeta[idx];
		double sphi = searchphi[idx];
		double pthetaid = (stheta - starttheta) / dtheta;//
		double pphiid = (sphi - startphi) / dphi;

		long lasttheta = floorf(pthetaid);
		long nextTheta = lasttheta + 1;
		long lastphi = floorf(pphiid);
		long nextPhi = lastphi + 1;

		if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
			lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
		{
			searchantpattern[idx] = 0;
		}
		else {
			double x = stheta;
			double y = sphi;

			double x1 = lasttheta * dtheta + starttheta;
			double x2 = nextTheta * dtheta + starttheta;
			double y1 = lastphi * dphi + startphi;
			double y2 = nextPhi * dphi + startphi;

			double z11 = antpattern[lasttheta * phipoints + lastphi];
			double z12 = antpattern[lasttheta * phipoints + nextPhi];
			double z21 = antpattern[nextTheta * phipoints + lastphi];
			double z22 = antpattern[nextTheta * phipoints + nextPhi];


			z11 = powf(10, z11 / 10);
			z12 = powf(10, z12 / 10);
			z21 = powf(10, z21 / 10);
			z22 = powf(10, z22 / 10);

			double GainValue = (z11 * (x2 - x) * (y2 - y)
				+ z21 * (x - x1) * (y2 - y)
				+ z12 * (x2 - x) * (y - y1)
				+ z22 * (x - x1) * (y - y1));
			GainValue = GainValue / ((x2 - x1) * (y2 - y1));
			searchantpattern[idx] = GainValue;
		}
	}
}


__global__ void CUDA_AntPatternInterpGain(double* anttheta, double* antphi, double* gain,
	double* antpattern, double starttheta, double startphi, double dtheta, double dphi, int thetapoints, int phipoints, long len) {
	int  idx = blockIdx.x * blockDim.x + threadIdx.x;

	if (idx < len) {

		double temptheta = anttheta[idx];
		double tempphi = antphi[idx];
		double antPatternGain = GPU_BillerInterpAntPattern(antpattern,
			starttheta, startphi, dtheta, dphi, thetapoints, phipoints,
			temptheta, tempphi);
		gain[idx] = antPatternGain;
	}
}


__global__ void CUDA_InterpSigma(
	long* demcls, double* sigmaAmp, double* localanglearr, long len,
	CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		long clsid = demcls[idx];
		double localangle = localanglearr[idx];
		CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
		if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2) {
			sigmaAmp[idx] = 0;
		}
		else {}

		if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
			abs(tempsigma.p2) < PRECISIONTOLERANCE &&
			abs(tempsigma.p3) < PRECISIONTOLERANCE &&
			abs(tempsigma.p4) < PRECISIONTOLERANCE &&
			abs(tempsigma.p5) < PRECISIONTOLERANCE &&
			abs(tempsigma.p6) < PRECISIONTOLERANCE
			) {
			sigmaAmp[idx] = 0;
		}
		else {
			double sigma = GPU_getSigma0dB(tempsigma, localangle);
			sigma = powf(10.0, sigma / 10.0);// 后向散射系数
			//printf("cls:%d;localangle=%f;sigma0=%f;\n", clsid, localangle, sigma);
			sigmaAmp[idx] = sigma;
		}
	}
}


__global__ void CUDAKernel_RFPC_Computer_R_Gain(
	double antX, double antY, double antZ, // 天线的坐标
	double* targetX, double* targetY, double* targetZ, long len, // 地面坐标
	long* demCls,
	double* demSlopeX, double* demSlopeY, double* demSlopeZ, // 地表坡度矢量
	double  antXaxisX, double  antXaxisY, double  antXaxisZ, // 天线坐标系的X轴
	double  antYaxisX, double  antYaxisY, double  antYaxisZ,// 天线坐标系的Y轴
	double  antZaxisX, double  antZaxisY, double  antZaxisZ,// 天线坐标系的Z轴
	double  antDirectX, double  antDirectY, double  antDirectZ,// 天线的指向
	double Pt,// 发射能量
	double refPhaseRange,
	double* TransAntpattern, PatternImageDesc TransAntpatternDesc,// double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints, // 发射天线方向图
	double* ReceiveAntpattern, PatternImageDesc ReceiveAntpatternDesc,// double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,//接收天线方向图
	double NearR, double FarR, // 距离范围
	CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,// 插值图
	float* outR,  // 输出距离
	float* outAmp
) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		double tx = targetX[idx];
		double ty = targetY[idx];
		double tz = targetZ[idx];
		double RstX = antX - tx; // 计算坐标矢量
		double RstY = antY - ty;
		double RstZ = antZ - tz;

		double slopeX = demSlopeX[idx];
		double slopeY = demSlopeY[idx];
		double slopeZ = demSlopeZ[idx];

		double RstR2 = RstX * RstX + RstY * RstY + RstZ * RstZ;
		double RstR = sqrt(RstR2); // 矢量距离

		//printf("antX=%f;antY=%f;antZ=%f;targetX=%f;targetY=%f;targetZ=%f;RstR=%.6f;diffR=%.6f;\n",antX,antY,antZ,targetX,targetY,targetZ,RstR, RstR - 9.010858499003178e+05);

		if (RstR<NearR || RstR>FarR) {
			outAmp[idx] = 0;
			outR[idx] = 0;
		}
		else {
			// 求解坡度
			double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
			double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
			double localangle = acosf(dotAB / (RstR * slopR)); // 局地入射角
			double ampGain = 0;
			// 求解天线方向图指向
			CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal(
				RstX, RstY, RstZ,
				antXaxisX, antXaxisY, antXaxisZ,
				antYaxisX, antYaxisY, antYaxisZ,
				antZaxisX, antZaxisY, antZaxisZ,
				antDirectX, antDirectY, antDirectZ
			);
			if (antVector.Rho > 0) {
				// 发射方向图
				double temptheta = antVector.theta * r2d;
				double tempphi = antVector.phi * r2d;
				double TansantPatternGain =
					GPU_BillerInterpAntPattern(
						TransAntpattern,
						TransAntpatternDesc.startTheta, TransAntpatternDesc.startPhi,
						TransAntpatternDesc.dtheta, TransAntpatternDesc.dphi, TransAntpatternDesc.thetanum, TransAntpatternDesc.phinum,
						//Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
						temptheta, tempphi);

				// 接收方向图
				double antPatternGain = GPU_BillerInterpAntPattern(
					ReceiveAntpattern,
					ReceiveAntpatternDesc.startTheta, ReceiveAntpatternDesc.startPhi,
					ReceiveAntpatternDesc.dtheta, ReceiveAntpatternDesc.dphi, ReceiveAntpatternDesc.thetanum, ReceiveAntpatternDesc.phinum,
					//Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
					temptheta, tempphi);

				// 计算
				double sigma0 = 0;
				{
					long clsid = demCls[idx];
					//printf("clsid=%d\n", clsid);
					CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
					if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2) {
						sigma0 = 0;
					}
					else {}

					if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
						abs(tempsigma.p2) < PRECISIONTOLERANCE &&
						abs(tempsigma.p3) < PRECISIONTOLERANCE &&
						abs(tempsigma.p4) < PRECISIONTOLERANCE &&
						abs(tempsigma.p5) < PRECISIONTOLERANCE &&
						abs(tempsigma.p6) < PRECISIONTOLERANCE
						) {
						sigma0 = 0;
					}
					else {
						double sigma = GPU_getSigma0dB(tempsigma, localangle);
						sigma0 = powf(10.0, sigma / 10.0);// 后向散射系数
					}
				}

				ampGain = TansantPatternGain * antPatternGain;
				ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度
				outAmp[idx] = float(ampGain * Pt * sigma0);
				outR[idx] = float(RstR - refPhaseRange);
			}
			else {
			}
		}
	}
}


__global__ void CUDAKernel_PRF_GeneratorEcho(float* Rarr, float* ampArr,
	long pixelcount,
	float f0, float dfreq,long freqnum,
	double* echo_real,double* echo_imag, long prfid)
{
	//// 假定共享内存大小为49152 byte
	//// 假定每个Block 线程数大小为 32
	__shared__ float s_R[GPU_SHARE_MEMORY];   // 距离  32*12 * 8= 49.2kb
	__shared__ float s_Amp[GPU_SHARE_MEMORY]; // 振幅  3072 * 8= 49.2kb  49.2*2 = 98.4 < 100 KB

	int idx = blockIdx.x * blockDim.x + threadIdx.x;; // 获取当前的线程编码
	int tid = threadIdx.x;// 获取 单个 block 中的线程ID

	const long startPIX = idx * GPU_SHARE_STEP; // 计算偏移
	int curthreadidx = 0;
	for (long i = 0; i < GPU_SHARE_STEP; i++) {
		curthreadidx = i * BLOCK_SIZE + tid; // 计算分块
		s_R[curthreadidx] = (startPIX + i) < pixelcount ? Rarr[startPIX + i] : 0.0;
		s_Amp[curthreadidx] = (startPIX + i) < pixelcount ? ampArr[startPIX + i] : 0.0;
	}


	//__syncthreads(); // 确定所有待处理数据都已经进入程序中

	if (startPIX < pixelcount) { // 存在可能处理的计算
		float temp_real = 0;
		float temp_imag = 0;
		float factorjTemp = 0;
		float temp_phi = 0;
		float temp_amp = 0;
		long dataid = 0;
		curthreadidx = 0;
		for (long fid = 0; fid < freqnum; fid++) {
			factorjTemp = RFPCPIDIVLIGHT *(f0+ fid* dfreq);
			for (long j = 0; j < GPU_SHARE_STEP; j++) {
				dataid = j * BLOCK_SIZE + tid; //
				temp_phi = s_R[dataid] * factorjTemp;
				temp_amp = s_Amp[dataid];

				temp_real += temp_amp * cosf(temp_phi);
				temp_imag += temp_amp * sinf(temp_phi);
			}
			atomicAdd(&echo_real[prfid * freqnum + fid], temp_real); // 更新实部
			atomicAdd(&echo_imag[prfid * freqnum + fid], temp_imag); // 更新虚部
		}
	}
}


/** 对外封装接口 *******************************************************************************************************/

extern "C" void CUDA_RFPC_MainBlock(
	double* antX, double* antY, double* antZ, // 天线的坐标
	double* antXaxisX, double* antXaxisY, double* antXaxisZ, // 天线坐标系的X轴
	double* antYaxisX, double* antYaxisY, double* antYaxisZ,// 天线坐标系的Y轴
	double* antZaxisX, double* antZaxisY, double* antZaxisZ,// 天线坐标系的Z轴
	double* antDirectX, double* antDirectY, double* antDirectZ,// 天线的指向
	long PRFCount, // 脉冲数
	float f0, float dfreq, long freqnum, // 频率数
	double* targetX, double* targetY, double* targetZ, long TargetPixelNumber, // 地面坐标
	long* demCls, // 地表类别
	double* demSlopeX, double* demSlopeY, double* demSlopeZ, // 地表坡度矢量
	double Pt,// 发射能量
	double refPhaseRange,
	double* TransAntpattern, PatternImageDesc TransAntpatternDesc,// double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints, // 发射天线方向图
	double* ReceiveAntpattern, PatternImageDesc ReceiveAntpatternDesc, //double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,//接收天线方向图
	double NearR, double FarR, // 距离范围
	CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,// 插值图
	double* out_echoReal, double* out_echoImag,// 输出回波
	float* temp_R, float* temp_amp
	//, double* temp_phi, double* temp_real, double* temp_imag// 临时变量
) {

	long blocknum = 0;
	long pixelcount=TargetPixelNumber;
	int numBlocks = 0;
	for(long pid=0;pid<PRFCount;pid++){
		numBlocks = (TargetPixelNumber + BLOCK_SIZE - 1) / BLOCK_SIZE; // 根据 pixelcount 计算网格大小


		CUDAKernel_RFPC_Computer_R_Gain<<<numBlocks , BLOCK_SIZE >>>(
			antX[pid], antY[pid], antZ[pid],
			targetX, targetY, targetZ, TargetPixelNumber,
			demCls,
			demSlopeX, demSlopeY, demSlopeZ,
			antXaxisX[pid], antXaxisY[pid], antXaxisZ[pid],
			antYaxisX[pid], antYaxisY[pid], antYaxisZ[pid],
			antZaxisX[pid], antZaxisY[pid], antZaxisZ[pid],
			antDirectX[pid], antDirectY[pid], antDirectZ[pid],
			Pt,// 增益后发射能量
			refPhaseRange,
			TransAntpattern,
			TransAntpatternDesc,
			//Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
			ReceiveAntpattern,
			ReceiveAntpatternDesc,
			//Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
			NearR, FarR,
			sigma0Paramslist, sigmaparamslistlen,
			//factorj, freqnum,
			temp_R,  // 输出距离
			temp_amp
			//out_echoReal, out_echoImag, pid // 输出振幅
		);

		blocknum = (pixelcount + GPU_SHARE_STEP - 1) / GPU_SHARE_STEP;
		numBlocks = (blocknum + BLOCK_SIZE - 1) / BLOCK_SIZE; // 网格数量
		CUDAKernel_PRF_GeneratorEcho << <numBlocks, BLOCK_SIZE >> >
			(temp_R, temp_amp, pixelcount,
			f0,dfreq,freqnum,
			out_echoReal, out_echoImag, pid);

	}
	cudaDeviceSynchronize();

}


#endif