RasterProcessTool/GPUTool/GPURFPC.cu


#include <time.h>
#include <iostream>
#include <memory>
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>

#include "BaseConstVariable.h"
#include "GPURFPC.cuh"


#ifdef __CUDANVCC___


__device__ double GPU_getSigma0dB(CUDASigmaParam param, double theta) {//线性值
	double sigma = param.p1 + param.p2 * exp(-param.p3 * theta) + param.p4 * cos(param.p5 * theta + param.p6);
	return  sigma;
}


__device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal(
	double RstX, double RstY, double RstZ,
	double  antXaxisX, double  antXaxisY, double  antXaxisZ,
	double  antYaxisX, double  antYaxisY, double  antYaxisZ,
	double  antZaxisX, double  antZaxisY, double  antZaxisZ,
	double  antDirectX, double  antDirectY, double  antDirectZ
) {
	CUDAVectorEllipsoidal result{ 0,0,-1 };

	// 求解天线增益
	double Xst = -1 * RstX; // 卫星 -->  地面
	double Yst = -1 * RstY;
	double Zst = -1 * RstZ;
	double AntXaxisX = antXaxisX;
	double AntXaxisY = antXaxisY;
	double AntXaxisZ = antXaxisZ;
	double AntYaxisX = antYaxisX;
	double AntYaxisY = antYaxisY;
	double AntYaxisZ = antYaxisZ;
	double AntZaxisX = antZaxisX;
	double AntZaxisY = antZaxisY;
	double AntZaxisZ = antZaxisZ;

	// 归一化
	double RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
	double AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
	double AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
	double AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);


	double Rx = Xst / RstNorm;
	double Ry = Yst / RstNorm;
	double Rz = Zst / RstNorm;
	double Xx = AntXaxisX / AntXaxisNorm;
	double Xy = AntXaxisY / AntXaxisNorm;
	double Xz = AntXaxisZ / AntXaxisNorm;
	double Yx = AntYaxisX / AntYaxisNorm;
	double Yy = AntYaxisY / AntYaxisNorm;
	double Yz = AntYaxisZ / AntYaxisNorm;
	double Zx = AntZaxisX / AntZaxisNorm;
	double Zy = AntZaxisY / AntZaxisNorm;
	double Zz = AntZaxisZ / AntZaxisNorm;

	double Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
	double Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
	double Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);


	// 计算theta 与 phi
	double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
	double ThetaAnt = acosf(Zant / Norm); // theta 与 Z轴的夹角
	double PhiAnt = atanf(Yant / Xant); // -pi/2 ~pi/2


	if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上
		PhiAnt = 0;
	}
	else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上，原点
		if (Yant > 0) {
			PhiAnt = PI / 2;
		}
		else {
			PhiAnt = -PI / 2;
		}
	}
	else if (Xant < 0) {
		if (Yant > 0) {
			PhiAnt = PI + PhiAnt;
		}
		else {
			PhiAnt = -PI + PhiAnt;
		}
	}
	else {  // Xant>0  X 正轴

	}

	if (isnan(PhiAnt)) {
		printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt);
	}


	result.theta = ThetaAnt;
	result.phi = PhiAnt;
	result.Rho = Norm;
	return result;
}

__device__ double GPU_BillerInterpAntPattern(double* antpattern,
	double starttheta, double startphi, double dtheta, double dphi,
	long thetapoints, long phipoints,
	double searththeta, double searchphi) {
	double stheta = searththeta;
	double sphi = searchphi;
	if (stheta > 90) {
		return 0;
	}
	else {}


	double pthetaid = (stheta - starttheta) / dtheta;//
	double pphiid = (sphi - startphi) / dphi;

	long lasttheta = floorf(pthetaid);
	long nextTheta = lasttheta + 1;
	long lastphi = floorf(pphiid);
	long nextPhi = lastphi + 1;


	if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
		lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
	{
		return 0;
	}
	else {
		double x = stheta;
		double y = sphi;

		double x1 = lasttheta * dtheta + starttheta;
		double x2 = nextTheta * dtheta + starttheta;
		double y1 = lastphi * dphi + startphi;
		double y2 = nextPhi * dphi + startphi;

		double z11 = antpattern[lasttheta * phipoints + lastphi];
		double z12 = antpattern[lasttheta * phipoints + nextPhi];
		double z21 = antpattern[nextTheta * phipoints + lastphi];
		double z22 = antpattern[nextTheta * phipoints + nextPhi];


		//z11 = powf(10, z11 / 10); // dB-> 线性
		//z12 = powf(10, z12 / 10);
		//z21 = powf(10, z21 / 10);
		//z22 = powf(10, z22 / 10);

		double GainValue = (z11 * (x2 - x) * (y2 - y)
			+ z21 * (x - x1) * (y2 - y)
			+ z12 * (x2 - x) * (y - y1)
			+ z22 * (x - x1) * (y - y1));
		GainValue = GainValue / ((x2 - x1) * (y2 - y1));
		return GainValue;
	}
}

__device__ cuComplex  GPU_calculationEcho(double sigma0, double TransAnt, double ReciveAnt,
	double localangle, double R, double slopeangle, double Pt, double lamda) {
	double amp = Pt * TransAnt * ReciveAnt;
	amp = amp * sigma0;
	amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); // 反射强度
	double phi = (-4 * LAMP_CUDA_PI / lamda) * R;
	cuComplex echophi = make_cuComplex(0, phi);
	cuComplex echophiexp = cuCexpf(echophi);
	cuComplex echo = make_cuComplex(echophiexp.x * amp, echophiexp.y * amp);
	return echo;
}


__global__ void CUDA_SatelliteAntDirectNormal(double* RstX, double* RstY, double* RstZ,
	double  antXaxisX, double  antXaxisY, double  antXaxisZ,
	double  antYaxisX, double  antYaxisY, double  antYaxisZ,
	double  antZaxisX, double  antZaxisY, double  antZaxisZ,
	double  antDirectX, double  antDirectY, double  antDirectZ,
	double* thetaAnt, double* phiAnt
	, long len) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		double Xst = -1 * RstX[idx]; // 卫星 -->  地面
		double Yst = -1 * RstY[idx];
		double Zst = -1 * RstZ[idx];
		double AntXaxisX = antXaxisX;
		double AntXaxisY = antXaxisY;
		double AntXaxisZ = antXaxisZ;
		double AntYaxisX = antYaxisX;
		double AntYaxisY = antYaxisY;
		double AntYaxisZ = antYaxisZ;
		double AntZaxisX = antZaxisX;
		double AntZaxisY = antZaxisY;
		double AntZaxisZ = antZaxisZ;

		// 归一化
		double RstNorm = sqrtf(Xst * Xst + Yst * Yst + Zst * Zst);
		double AntXaxisNorm = sqrtf(AntXaxisX * AntXaxisX + AntXaxisY * AntXaxisY + AntXaxisZ * AntXaxisZ);
		double AntYaxisNorm = sqrtf(AntYaxisX * AntYaxisX + AntYaxisY * AntYaxisY + AntYaxisZ * AntYaxisZ);
		double AntZaxisNorm = sqrtf(AntZaxisX * AntZaxisX + AntZaxisY * AntZaxisY + AntZaxisZ * AntZaxisZ);


		double Rx = Xst / RstNorm;
		double Ry = Yst / RstNorm;
		double Rz = Zst / RstNorm;
		double Xx = AntXaxisX / AntXaxisNorm;
		double Xy = AntXaxisY / AntXaxisNorm;
		double Xz = AntXaxisZ / AntXaxisNorm;
		double Yx = AntYaxisX / AntYaxisNorm;
		double Yy = AntYaxisY / AntYaxisNorm;
		double Yz = AntYaxisZ / AntYaxisNorm;
		double Zx = AntZaxisX / AntZaxisNorm;
		double Zy = AntZaxisY / AntZaxisNorm;
		double Zz = AntZaxisZ / AntZaxisNorm;

		double Xant = (Rx * Yy * Zz - Rx * Yz * Zy - Ry * Yx * Zz + Ry * Yz * Zx + Rz * Yx * Zy - Rz * Yy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
		double Yant = -(Rx * Xy * Zz - Rx * Xz * Zy - Ry * Xx * Zz + Ry * Xz * Zx + Rz * Xx * Zy - Rz * Xy * Zx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);
		double Zant = (Rx * Xy * Yz - Rx * Xz * Yy - Ry * Xx * Yz + Ry * Xz * Yx + Rz * Xx * Yy - Rz * Xy * Yx) / (Xx * Yy * Zz - Xx * Yz * Zy - Xy * Yx * Zz + Xy * Yz * Zx + Xz * Yx * Zy - Xz * Yy * Zx);


		// 计算theta 与 phi
		double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
		double ThetaAnt = acosf(Zant / Norm); // theta 与 Z轴的夹角
		double PhiAnt = atanf(Yant / Xant); // -pi/2 ~pi/2


		if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上
			PhiAnt = 0;
		}
		else if (abs(Xant) < PRECISIONTOLERANCE) { // Y轴上，原点
			if (Yant > 0) {
				PhiAnt = PI / 2;
			}
			else {
				PhiAnt = -PI / 2;
			}
		}
		else if (Xant < 0) {
			if (Yant > 0) {
				PhiAnt = PI + PhiAnt;
			}
			else {
				PhiAnt = -PI + PhiAnt;
			}
		}
		else {  // Xant>0  X 正轴

		}

		if (isnan(PhiAnt)) {
			printf("V=[%f,%f,%f];norm=%f;thetaAnt=%f;phiAnt=%f;\n", Xant, Yant, Zant, Norm, ThetaAnt, PhiAnt);
		}

		//if (abs(ThetaAnt - 0) < PRECISIONTOLERANCE) {
		//	PhiAnt = 0;
		//}
		//else {}


		thetaAnt[idx] = ThetaAnt * r2d;
		phiAnt[idx] = PhiAnt * r2d;
		//printf("Rst=[%f,%f,%f];AntXaxis = [%f, %f, %f];AntYaxis=[%f,%f,%f];AntZaxis=[%f,%f,%f];phiAnt=%f;thetaAnt=%f;\n", Xst, Yst, Zst
		//	, AntXaxisX, AntXaxisY, AntXaxisZ
		//	, AntYaxisX, AntYaxisY, AntYaxisZ
		//	, AntZaxisX, AntZaxisY, AntZaxisZ
		//	, phiAnt[idx]
		//	, thetaAnt[idx]
		//);
	}
}

__global__ void CUDA_BillerInterpAntPattern(double* antpattern,
	double starttheta, double startphi, double dtheta, double dphi,
	long thetapoints, long phipoints,
	double* searththeta, double* searchphi, double* searchantpattern,
	long len) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		double stheta = searththeta[idx];
		double sphi = searchphi[idx];
		double pthetaid = (stheta - starttheta) / dtheta;//
		double pphiid = (sphi - startphi) / dphi;

		long lasttheta = floorf(pthetaid);
		long nextTheta = lasttheta + 1;
		long lastphi = floorf(pphiid);
		long nextPhi = lastphi + 1;

		if (lasttheta < 0 || nextTheta < 0 || lastphi < 0 || nextPhi < 0 ||
			lasttheta >= thetapoints || nextTheta >= thetapoints || lastphi >= phipoints || nextPhi >= phipoints)
		{
			searchantpattern[idx] = 0;
		}
		else {
			double x = stheta;
			double y = sphi;

			double x1 = lasttheta * dtheta + starttheta;
			double x2 = nextTheta * dtheta + starttheta;
			double y1 = lastphi * dphi + startphi;
			double y2 = nextPhi * dphi + startphi;

			double z11 = antpattern[lasttheta * phipoints + lastphi];
			double z12 = antpattern[lasttheta * phipoints + nextPhi];
			double z21 = antpattern[nextTheta * phipoints + lastphi];
			double z22 = antpattern[nextTheta * phipoints + nextPhi];


			z11 = powf(10, z11 / 10);
			z12 = powf(10, z12 / 10);
			z21 = powf(10, z21 / 10);
			z22 = powf(10, z22 / 10);

			double GainValue = (z11 * (x2 - x) * (y2 - y)
				+ z21 * (x - x1) * (y2 - y)
				+ z12 * (x2 - x) * (y - y1)
				+ z22 * (x - x1) * (y - y1));
			GainValue = GainValue / ((x2 - x1) * (y2 - y1));
			searchantpattern[idx] = GainValue;
		}
	}
}

__global__ void CUDA_calculationEcho(double* sigma0, double* TransAnt, double* ReciveAnt,
	double* localangle, double* R, double* slopeangle,
	double nearRange, double Fs, double Pt, double lamda, long FreqIDmax,
	cuComplex* echoArr, long* FreqID,
	long len) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		double r = R[idx];
		double amp = Pt * TransAnt[idx] * ReciveAnt[idx];
		amp = amp * sigma0[idx];
		amp = amp / (powf(4 * LAMP_CUDA_PI, 2) * powf(r, 4)); // 反射强度

		// 处理相位
		double phi = (-4 * LAMP_CUDA_PI / lamda) * r;
		cuComplex echophi = make_cuComplex(0, phi);
		cuComplex echophiexp = cuCexpf(echophi);

		double timeR = 2 * (r - nearRange) / LIGHTSPEED * Fs;
		long timeID = floorf(timeR);
		//if (timeID < 0 || timeID >= FreqIDmax) {
		//	timeID = 0;
		//	amp = 0;
		//}

		cuComplex echo = make_cuComplex(echophiexp.x, echophiexp.y);
		echoArr[idx] = echo;
		FreqID[idx] = timeID;
	}
}


__global__ void CUDA_AntPatternInterpGain(double* anttheta, double* antphi, double* gain,
	double* antpattern, double starttheta, double startphi, double dtheta, double dphi, int thetapoints, int phipoints, long len) {
	int  idx = blockIdx.x * blockDim.x + threadIdx.x;

	if (idx < len) {

		double temptheta = anttheta[idx];
		double tempphi = antphi[idx];
		double antPatternGain = GPU_BillerInterpAntPattern(antpattern,
			starttheta, startphi, dtheta, dphi, thetapoints, phipoints,
			temptheta, tempphi);
		gain[idx] = antPatternGain;
	}
}


__global__ void CUDA_InterpSigma(
	long* demcls, double* sigmaAmp, double* localanglearr, long len,
	CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		long clsid = demcls[idx];
		double localangle = localanglearr[idx];
		CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
		if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2) {
			sigmaAmp[idx] = 0;
		}
		else {}

		if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
			abs(tempsigma.p2) < PRECISIONTOLERANCE &&
			abs(tempsigma.p3) < PRECISIONTOLERANCE &&
			abs(tempsigma.p4) < PRECISIONTOLERANCE &&
			abs(tempsigma.p5) < PRECISIONTOLERANCE &&
			abs(tempsigma.p6) < PRECISIONTOLERANCE
			) {
			sigmaAmp[idx] = 0;
		}
		else {
			double sigma = GPU_getSigma0dB(tempsigma, localangle);
			sigma = powf(10.0, sigma / 10.0);// 后向散射系数
			//printf("cls:%d;localangle=%f;sigma0=%f;\n", clsid, localangle, sigma);
			sigmaAmp[idx] = sigma;
		}
	}
}


__global__ void CUDAKernel_RFPC_Caluation_R_Gain(
	double antX, double antY, double antZ, // 天线的坐标
	double* targetX, double* targetY, double* targetZ, long len, // 地面坐标
	long* demCls,
	double* demSlopeX, double* demSlopeY, double* demSlopeZ, // 地表坡度矢量
	double  antXaxisX, double  antXaxisY, double  antXaxisZ, // 天线坐标系的X轴
	double  antYaxisX, double  antYaxisY, double  antYaxisZ,// 天线坐标系的Y轴
	double  antZaxisX, double  antZaxisY, double  antZaxisZ,// 天线坐标系的Z轴
	double  antDirectX, double  antDirectY, double  antDirectZ,// 天线的指向
	double Pt,// 发射能量
	double refPhaseRange,
	double* TransAntpattern, double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints, // 发射天线方向图
	double* ReceiveAntpattern, double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,//接收天线方向图
	double NearR, double FarR, // 距离范围
	CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,// 插值图
	double* factorj, long freqnum,
	double* outR,  // 输出距离
	//double* outAmp // 输出增益
	double* PRFEcho_real, double* PRFEcho_imag, long prfid
) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		double tx = targetX[idx];
		double ty = targetY[idx];
		double tz = targetZ[idx];
		double RstX = antX - tx; // 计算坐标矢量
		double RstY = antY - ty;
		double RstZ = antZ - tz;

		double slopeX = demSlopeX[idx];
		double slopeY = demSlopeY[idx];
		double slopeZ = demSlopeZ[idx];

		double RstR2 = RstX * RstX + RstY * RstY + RstZ * RstZ;
		double RstR = sqrt(RstR2); // 矢量距离

		//printf("antX=%f;antY=%f;antZ=%f;targetX=%f;targetY=%f;targetZ=%f;RstR=%.6f;diffR=%.6f;\n",antX,antY,antZ,targetX,targetY,targetZ,RstR, RstR - 9.010858499003178e+05);

		if (RstR<NearR || RstR>FarR) {
		}
		else {
			// 求解坡度
			double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
			double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
			double localangle = acosf(dotAB / (RstR * slopR)); // 局地入射角
			double ampGain = 0;
			// 求解天线方向图指向
			CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal(
				RstX, RstY, RstZ,
				antXaxisX, antXaxisY, antXaxisZ,
				antYaxisX, antYaxisY, antYaxisZ,
				antZaxisX, antZaxisY, antZaxisZ,
				antDirectX, antDirectY, antDirectZ
			);
			if (antVector.Rho > 0) {
				// 发射方向图
				double temptheta = antVector.theta * r2d;
				double tempphi = antVector.phi * r2d;
				double TansantPatternGain =
					GPU_BillerInterpAntPattern(
						TransAntpattern,
						Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
						temptheta, tempphi);

				// 接收方向图
				double antPatternGain = GPU_BillerInterpAntPattern(
					ReceiveAntpattern,
					Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
					temptheta, tempphi);

				// 计算
				double sigma0 = 0;
				{
					long clsid = demCls[idx];
					//printf("clsid=%d\n", clsid);
					CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
					if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2) {
						sigma0 = 0;
					}
					else {}

					if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
						abs(tempsigma.p2) < PRECISIONTOLERANCE &&
						abs(tempsigma.p3) < PRECISIONTOLERANCE &&
						abs(tempsigma.p4) < PRECISIONTOLERANCE &&
						abs(tempsigma.p5) < PRECISIONTOLERANCE &&
						abs(tempsigma.p6) < PRECISIONTOLERANCE
						) {
						sigma0 = 0;
					}
					else {
						double sigma = GPU_getSigma0dB(tempsigma, localangle);
						sigma0 = powf(10.0, sigma / 10.0);// 后向散射系数
					}
				}

				ampGain = TansantPatternGain * antPatternGain;
				ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度
				double outAmp_temp = ampGain * Pt * sigma0;
				double tempR =  RstR- refPhaseRange;
				outR[idx] = RstR ;

				for (long ii = 0; ii < freqnum; ii++) {
					double phi= tempR * factorj[ii]; // 相位
					 // Eular; exp(ix)=cos(x)+isin(x)
					double real = outAmp_temp * cos(phi); // 实部
					double imag = outAmp_temp * sin(phi); // 虚部
					atomicAdd(&PRFEcho_real[prfid * freqnum+ ii], real);
					atomicAdd(&PRFEcho_imag[prfid * freqnum+ ii], imag);
				}
			}
			else {
			}
		}
	}
}


__global__ void CUDAKernel_RFPC_Computer_R_Gain(
	double antX, double antY, double antZ, // 天线的坐标
	double* targetX, double* targetY, double* targetZ, long len, // 地面坐标
	long* demCls,
	double* demSlopeX, double* demSlopeY, double* demSlopeZ, // 地表坡度矢量
	double  antXaxisX, double  antXaxisY, double  antXaxisZ, // 天线坐标系的X轴
	double  antYaxisX, double  antYaxisY, double  antYaxisZ,// 天线坐标系的Y轴
	double  antZaxisX, double  antZaxisY, double  antZaxisZ,// 天线坐标系的Z轴
	double  antDirectX, double  antDirectY, double  antDirectZ,// 天线的指向
	double Pt,// 发射能量
	double refPhaseRange,
	double* TransAntpattern, double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints, // 发射天线方向图
	double* ReceiveAntpattern, double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,//接收天线方向图
	double NearR, double FarR, // 距离范围
	CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,// 插值图
	float* outR,  // 输出距离
	float* outAmp
) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		double tx = targetX[idx];
		double ty = targetY[idx];
		double tz = targetZ[idx];
		double RstX = antX - tx; // 计算坐标矢量
		double RstY = antY - ty;
		double RstZ = antZ - tz;

		double slopeX = demSlopeX[idx];
		double slopeY = demSlopeY[idx];
		double slopeZ = demSlopeZ[idx];

		double RstR2 = RstX * RstX + RstY * RstY + RstZ * RstZ;
		double RstR = sqrt(RstR2); // 矢量距离

		//printf("antX=%f;antY=%f;antZ=%f;targetX=%f;targetY=%f;targetZ=%f;RstR=%.6f;diffR=%.6f;\n",antX,antY,antZ,targetX,targetY,targetZ,RstR, RstR - 9.010858499003178e+05);

		if (RstR<NearR || RstR>FarR) {
		}
		else {
			// 求解坡度
			double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
			double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
			double localangle = acosf(dotAB / (RstR * slopR)); // 局地入射角
			double ampGain = 0;
			// 求解天线方向图指向
			CUDAVectorEllipsoidal antVector = GPU_SatelliteAntDirectNormal(
				RstX, RstY, RstZ,
				antXaxisX, antXaxisY, antXaxisZ,
				antYaxisX, antYaxisY, antYaxisZ,
				antZaxisX, antZaxisY, antZaxisZ,
				antDirectX, antDirectY, antDirectZ
			);
			if (antVector.Rho > 0) {
				// 发射方向图
				double temptheta = antVector.theta * r2d;
				double tempphi = antVector.phi * r2d;
				double TansantPatternGain =
					GPU_BillerInterpAntPattern(
						TransAntpattern,
						Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
						temptheta, tempphi);

				// 接收方向图
				double antPatternGain = GPU_BillerInterpAntPattern(
					ReceiveAntpattern,
					Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
					temptheta, tempphi);

				// 计算
				double sigma0 = 0;
				{
					long clsid = demCls[idx];
					//printf("clsid=%d\n", clsid);
					CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
					if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2) {
						sigma0 = 0;
					}
					else {}

					if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
						abs(tempsigma.p2) < PRECISIONTOLERANCE &&
						abs(tempsigma.p3) < PRECISIONTOLERANCE &&
						abs(tempsigma.p4) < PRECISIONTOLERANCE &&
						abs(tempsigma.p5) < PRECISIONTOLERANCE &&
						abs(tempsigma.p6) < PRECISIONTOLERANCE
						) {
						sigma0 = 0;
					}
					else {
						double sigma = GPU_getSigma0dB(tempsigma, localangle);
						sigma0 = powf(10.0, sigma / 10.0);// 后向散射系数
					}
				}

				ampGain = TansantPatternGain * antPatternGain;
				ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度
				outAmp[idx] = float(ampGain * Pt * sigma0);
				outR[idx] = float(RstR - refPhaseRange);
			}
			else {
			}
		}
	}
}


__global__ void CUDAKernel_PRF_FreqEcho(double* temp_R,double factorj,double* temp_real,double* temp_imag,long len) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < len) {
		double phi = factorj * temp_R[idx];
		temp_real[idx] = cos(phi);
		temp_imag[idx] = sin(phi);
	}
}


__global__ void CUDAKernel_PRF_CalFreqEcho(
	double* Rarr, double* ampArr, long pixelcount,
	double* factorj, long freqnum,
	double dx, double nearR,
	cuComplex* PRFEcho, long prfid) {
	long  idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx < freqnum) {
		double fatorj = factorj[idx];
		double phi = 0;
		double amptemp = 0;
		cuComplex tempfreqEcho = PRFEcho[prfid * freqnum + idx];
		for (long i = 0; i < pixelcount; i++) { // 区域积分
			//phi = (R = R - (floor(R / lamda) - 1) * lamda)* fatorj; // 相位
			double phi = Rarr[i] * factorj[idx]; // 相位
			amptemp =  ampArr[i];
			//printf("amp=%f\n", amptemp);
			// Eular; exp(ix)=cos(x)+isin(x)
			tempfreqEcho.x = tempfreqEcho.x + amptemp * cos(phi); // 实部
			tempfreqEcho.y = tempfreqEcho.y + amptemp * sin(phi); // 虚部
			//printf("freqid=%d;fatorj=%.12f;d_R=%.10f;phi=%.10f;echo=complex(%.5f,%.5f)\n", idx,  fatorj, Rarr[i], phi, tempfreqEcho.x, tempfreqEcho.y);
		}
		PRFEcho[prfid*freqnum+idx] = tempfreqEcho;
	}
}


__global__ void CUDAKernel_PRF_GeneratorEcho(float* Rarr, float* ampArr, long blocknum, long pixelcount, double* factorj, long freqnum,
	double nearR, double farR, double* echo_real, double* echo_imag, long prfid) //11
{
	//// 假定共享内存大小为49152 byte
	//// 假定每个Block 线程数大小为 32
	__shared__ float s_R[GPU_SHARE_MEMORY];   // 距离  32*12 * 8= 49.2kb
	__shared__ float s_Amp[GPU_SHARE_MEMORY]; // 振幅  3072 * 8= 49.2kb  49.2*2 = 98.4 < 100 KB

	const int bid = blockIdx.x; // 获取 grid网格编号ID
	const int tid = threadIdx.x;// 获取 单个 block 中的线程ID

	const int startPIX = bid * GPU_SHARE_STEP;
	int curthreadidx = 0;
	for (long i = 0; i < GPU_SHARE_STEP; i++) {
		curthreadidx = i * blockDim.x + tid; // 计算分块
		s_R[curthreadidx] = (startPIX + i) < pixelcount ? Rarr[startPIX + i] : 0.0;
		s_Amp[curthreadidx] = (startPIX + i) < pixelcount ? ampArr[startPIX + i] : 0.0;
	}

	//__syncthreads(); // 确定所有待处理数据都已经进入程序中

	long freqnumblock = (freqnum + 32 - 1) / 32; //16
	if (startPIX < pixelcount) { // 存在可能处理的计算
		double temp_real = 0;
		double temp_imag = 0;
		double factorjTemp = 0;
		double temp_phi = 0;
		double temp_amp = 0;
		long dataid = 0;
		curthreadidx = 0;
		for (long i = 0; i < freqnumblock; i++) {
			curthreadidx = tid * freqnumblock + i; // 获取当前频率
			if (curthreadidx < freqnum) { // 存在频率
				factorjTemp = factorj[curthreadidx];
				for (long j = 0; j < GPU_SHARE_STEP; j++) {
					dataid = j * blockDim.x + tid; // 数据编辑
					temp_phi = s_R[dataid] * factorjTemp;
					temp_amp = s_Amp[dataid];

					temp_real = temp_real + temp_amp * cos(temp_phi);
					temp_imag = temp_imag + temp_amp * sin(temp_phi);
				}
				atomicAdd(&echo_real[prfid * freqnum + curthreadidx], temp_real); // 更新实部
				atomicAdd(&echo_imag[prfid * freqnum + curthreadidx], temp_imag); // 更新虚部
			}
		}
	}
}


/** 对外封装接口 *******************************************************************************************************/

extern "C" void SatelliteAntDirectNormal(double* RstX, double* RstY, double* RstZ,
	double  antXaxisX, double  antXaxisY, double  antXaxisZ,
	double  antYaxisX, double  antYaxisY, double  antYaxisZ,
	double  antZaxisX, double  antZaxisY, double  antZaxisZ,
	double  antDirectX, double  antDirectY, double  antDirectZ,
	double* thetaAnt, double* phiAnt,
	long len) {

	int blockSize = 256; // 每个块的线程数
	int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
	// 调用 CUDA 核函数
	CUDA_SatelliteAntDirectNormal << <numBlocks, blockSize >> > (RstX, RstY, RstZ,
		antXaxisX, antXaxisY, antXaxisZ,
		antYaxisX, antYaxisY, antYaxisZ,
		antZaxisX, antZaxisY, antZaxisZ,
		antDirectX, antDirectY, antDirectZ,
		thetaAnt, phiAnt
		, len);

#ifdef __CUDADEBUG__
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess) {
		printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
		// Possibly: exit(-1) if program cannot continue....
	}
#endif // __CUDADEBUG__
	cudaDeviceSynchronize();

}

extern "C" void   AntPatternInterpGain(double* anttheta, double* antphi, double* gain,
	double* antpattern, double starttheta, double startphi, double dtheta, double dphi, int thetapoints, int phipoints, long len) {
	int blockSize = 256; // 每个块的线程数
	int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
	//printf("\nCUDA_RFPC_SiglePRF blockSize:%d ,numBlock:%d\n", blockSize, numBlocks);

	CUDA_AntPatternInterpGain << <numBlocks, blockSize >> > (anttheta, antphi, gain,
		antpattern,
		starttheta, startphi, dtheta, dphi, thetapoints, phipoints,
		len);
#ifdef __CUDADEBUG__
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess) {
		printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
		// Possibly: exit(-1) if program cannot continue....
	}
#endif // __CUDADEBUG__
	cudaDeviceSynchronize();
}


extern "C" void calculationEcho(double* sigma0, double* TransAnt, double* ReciveAnt,
	double* localangle, double* R, double* slopeangle,
	double nearRange, double Fs, double pt, double lamda, long FreqIDmax,
	cuComplex* echoAmp, long* FreqID,
	long len)
{
	int blockSize = 256; // 每个块的线程数
	int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
	// 调用 CUDA 核函数
	CUDA_calculationEcho << <numBlocks, blockSize >> > (sigma0, TransAnt, ReciveAnt,
		localangle, R, slopeangle,
		nearRange, Fs, pt, lamda, FreqIDmax,
		echoAmp, FreqID,
		len);
#ifdef __CUDADEBUG__
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess) {
		printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
		// Possibly: exit(-1) if program cannot continue....
	}
#endif // __CUDADEBUG__
	cudaDeviceSynchronize();
}


extern "C" void CUDAInterpSigma(
	long* demcls, double* sigmaAmp, double* localanglearr, long len,
	CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen) {// 地表覆盖类型-sigma插值对应函数-ulaby
	int blockSize = 256; // 每个块的线程数
	int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
	// 调用 CUDA 核函数
	CUDA_InterpSigma << <numBlocks, blockSize >> > (
		demcls, sigmaAmp, localanglearr, len,
		sigma0Paramslist, sigmaparamslistlen
		);
#ifdef __CUDADEBUG__
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess) {
		printf("CUDA_RFPC_SiglePRF CUDA Error: %s\n", cudaGetErrorString(err));
		// Possibly: exit(-1) if program cannot continue....
	}
#endif // __CUDADEBUG__
	cudaDeviceSynchronize();
}

extern "C" void CUDARFPC_Caluation_R_Gain(
	double antX, double antY, double antZ, // 天线的坐标
	double* targetX, double* targetY, double* targetZ, long TargetPixelNumber, // 地面坐标
	long* demCls,
	double* demSlopeX, double* demSlopeY, double* demSlopeZ, // 地表坡度矢量
	double  antXaxisX, double  antXaxisY, double  antXaxisZ, // 天线坐标系的X轴
	double  antYaxisX, double  antYaxisY, double  antYaxisZ,// 天线坐标系的Y轴
	double  antZaxisX, double  antZaxisY, double  antZaxisZ,// 天线坐标系的Z轴
	double  antDirectX, double  antDirectY, double  antDirectZ,// 天线的指向
	double Pt,// 发射能量
	double refPhaseRange,
	double* TransAntpattern, double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints, // 发射天线方向图
	double* ReceiveAntpattern, double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,//接收天线方向图
	double NearR, double FarR, // 距离范围
	CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,// 插值图
	double* factorj, long freqnum,
	double* outR,  // 输出距离
	//double* outAmp // 输出增益
	double* PRFEcho_real, double* PRFEcho_imag, long prfid
)
{

	int blockSize = 256; // 每个块的线程数
	int numBlocks = (TargetPixelNumber + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
	// 调用 CUDA 核函数
	CUDAKernel_RFPC_Caluation_R_Gain << <numBlocks, blockSize >> > (
		antX, antY, antZ,
		targetX, targetY, targetZ, TargetPixelNumber,
		demCls,
		demSlopeX, demSlopeY, demSlopeZ,
		antXaxisX, antXaxisY, antXaxisZ,
		antYaxisX, antYaxisY, antYaxisZ,
		antZaxisX, antZaxisY, antZaxisZ,
		antDirectX, antDirectY, antDirectZ,
		Pt,
		refPhaseRange,
		TransAntpattern,
		Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
		ReceiveAntpattern,
		Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
		NearR, FarR,
		sigma0Paramslist, sigmaparamslistlen,
		factorj, freqnum,
		outR,
		//outAmp
		PRFEcho_real, PRFEcho_imag, prfid
		);


#ifdef __CUDADEBUG__
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess) {
		printf("CUDARFPC_Caluation_R_Gain CUDA Error: %s\n", cudaGetErrorString(err));
		// Possibly: exit(-1) if program cannot continue....
	}
#endif // __CUDADEBUG__
	cudaDeviceSynchronize();
}

extern "C"  void CUDA_PRF_CalFreqEcho(
	double* Rarr, double* ampArr, long pixelcount,
	double* factorj, long freqnum,
	double dx, double nearR,
	cuComplex* PRFEcho, long prfid)
{
	int blockSize = 256; // 每个块的线程数
	int numBlocks = (freqnum + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小

	CUDAKernel_PRF_CalFreqEcho << <numBlocks, blockSize >> > (
		Rarr, ampArr, pixelcount,
		factorj, freqnum,
		dx,nearR,
		PRFEcho, prfid
		);
#ifdef __CUDADEBUG__
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess) {
		printf("CUDA_PRF_CalFreqEcho CUDA Error: %s\n", cudaGetErrorString(err));
		// Possibly: exit(-1) if program cannot continue....
	}
#endif // __CUDADEBUG__
	cudaDeviceSynchronize();
}

extern "C"  void CUDA_PRF_GeneratorEcho(cublasHandle_t handle,double* Rarr, double* ampArr, long pixelcount, double* factorj, long freqnum, double nearR, double farR, double* echo_real, double* echo_imag, long prfid)
{
	//cublasHandle_t handle;
	//cublasStatus_t status = cublasCreate(&handle);
	long blocknum = pixelcount / GPU_SHARE_MEMORY + 1;

	int blockSize = 256; // 每个块的线程数
	int numBlocks = (pixelcount + GPU_SHARE_MEMORY - 1) / GPU_SHARE_MEMORY; // 网格数量

	CUDAKernel_PRF_GeneratorEcho << <numBlocks, blockSize >> > (Rarr, ampArr, blocknum, pixelcount,
		factorj, freqnum,
		nearR, farR,
		echo_real, echo_imag, prfid);

#ifdef __CUDADEBUG__
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess) {
		printf("CUDA_PRF_GeneratorEcho CUDA Error: %s\n", cudaGetErrorString(err));
		// Possibly: exit(-1) if program cannot continue....
	}
#endif // __CUDADEBUG__
	cudaDeviceSynchronize();
	//cublasDestroy(handle);
}

extern "C" void CUDA_RFPC_MainBlock(
	double* antX, double* antY, double* antZ, // 天线的坐标
	double* antXaxisX, double* antXaxisY, double* antXaxisZ, // 天线坐标系的X轴
	double* antYaxisX, double* antYaxisY, double* antYaxisZ,// 天线坐标系的Y轴
	double* antZaxisX, double* antZaxisY, double* antZaxisZ,// 天线坐标系的Z轴
	double* antDirectX, double* antDirectY, double* antDirectZ,// 天线的指向
	long PRFCount, // 脉冲数
	double* freqpoints,double* factorj ,long freqnum,// 频率数
	double* targetX, double* targetY, double* targetZ, long TargetPixelNumber, // 地面坐标
	long* demCls, // 地表类别
	double* demSlopeX, double* demSlopeY, double* demSlopeZ, // 地表坡度矢量
	double Pt,// 发射能量
	double refPhaseRange,
	double* TransAntpattern, double Transtarttheta, double Transstartphi, double Transdtheta, double Transdphi, int Transthetapoints, int Transphipoints, // 发射天线方向图
	double* ReceiveAntpattern, double Receivestarttheta, double Receivestartphi, double Receivedtheta, double Receivedphi, int Receivethetapoints, int Receivephipoints,//接收天线方向图
	double NearR, double FarR, // 距离范围
	CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,// 插值图
	double* out_echoReal, double* out_echoImag,// 输出回波
	float* temp_R, float* temp_amp
	//, double* temp_phi, double* temp_real, double* temp_imag// 临时变量
) {

	long blocknum = 0;
	long pixelcount=TargetPixelNumber;
	for(long pid=0;pid<PRFCount;pid++){

		int blockSize = 256; // 每个块的线程数
		int numBlocks = (TargetPixelNumber + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小


		CUDAKernel_RFPC_Computer_R_Gain<<<numBlocks , blockSize >>>(
			antX[pid], antY[pid], antZ[pid],
			targetX, targetY, targetZ, TargetPixelNumber,
			demCls,
			demSlopeX, demSlopeY, demSlopeZ,
			antXaxisX[pid], antXaxisY[pid], antXaxisZ[pid],
			antYaxisX[pid], antYaxisY[pid], antYaxisZ[pid],
			antZaxisX[pid], antZaxisY[pid], antZaxisZ[pid],
			antDirectX[pid], antDirectY[pid], antDirectZ[pid],
			Pt,// 增益后发射能量
			refPhaseRange,
			TransAntpattern,
			Transtarttheta, Transstartphi, Transdtheta, Transdphi, Transthetapoints, Transphipoints,
			ReceiveAntpattern,
			Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
			NearR, FarR,
			sigma0Paramslist, sigmaparamslistlen,
			//factorj, freqnum,
			temp_R,  // 输出距离
			temp_amp
			//out_echoReal, out_echoImag, pid // 输出振幅
		);
		cudaDeviceSynchronize();

		  blocknum = (pixelcount+ GPU_SHARE_STEP-1) / GPU_SHARE_STEP ;
		  blockSize = 32; // 每个块的线程数
		  numBlocks = (pixelcount + GPU_SHARE_STEP - 1) / GPU_SHARE_STEP; // 网格数量

		CUDAKernel_PRF_GeneratorEcho << <numBlocks, blockSize >> > (temp_R, temp_amp, blocknum, pixelcount,
			factorj, freqnum,
			NearR, FarR,
			out_echoReal, out_echoImag, pid);
		cudaDeviceSynchronize();
	}


}


#endif