RasterProcessTool/RasterProcessToolWidget/README.md

28 KiB

RasterProcessTool

extern "C" void AddCUDA(void* aPtr, void* bptr, void* cptr, long member, LAMPGPUDATETYPE datetype) { int blockSize = 256; // 每个块的线程数 int numBlocks = (member + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小

}

// CUDA 核函数 global void computeDistanceAndEchoID(float* antPx, float* antPy, float* antPz, float* img_x, float* img_y, float* img_z, complexfloat* echopluse, complexfloat* imgarr, long rowcount, long colcount, long prfid, float Rnear, float fs, float factorj) {

long  idx = blockIdx.x * blockDim.x + threadIdx.x;
// 确保线程索引有效
if (idx < rowcount * colcount) {

	// 计算距离
	float dx = antPx[prfid] - img_x[idx];
	float dy = antPy[prfid] - img_y[idx];
	float dz = antPz[prfid] - img_z[idx];
	float imgR = sqrt(dx * dx + dy * dy + dz * dz);

	// 计算 EchoID
	long echoID = floor(((imgR - Rnear) * 2 / LIGHTSPEED) * fs);//回波坐标
	float Rftj = imgR * factorj; // 校正
	//printf("%d:(%f,%f),%f,%f  |||||, %f, %f, %f, %f, %f, %f, %f\n", idx, echopluse[echoID].real,  Rftj, imgR, fs,
	//	antPx[prfid], antPy[prfid], antPz[prfid], img_x[idx], img_y[idx], img_z[idx], imgR);
	if (echoID < 0 || echoID >= colcount) {

	}
	else {
		complexfloat Rphi{ 0,Rftj };
		Rphi = expComplex(Rphi);
		imgarr[idx] = addComplex(imgarr[idx], mulComplex(echopluse[echoID], Rphi));
	}
}

}

void TBPImageGPUBlock(float* antPx, float* antPy, float* antPz, float* img_x, float* img_y, float* img_z, std::shared_ptr<std::complex> echoArr, long prfcount, long plusecount, std::shared_ptr<std::complex> imageArr, float freq, float fs, float Rnear, float Rfar, float factorj, long startline, long stepline, long rowcount, long colcount) {

long pixelcount = rowcount * colcount;
complexfloat* h_echopluse;
complexfloat* h_imgarr;

cudaMallocHost(&h_echopluse, sizeof(float) * 2 * plusecount);  // 单个传感器的位置
cudaMallocHost(&h_imgarr, sizeof(float) * 2 * stepline * colcount);

for (long i = 0; i < stepline; i++) {
	long rid = startline + i;
	for (long j = 0; j < colcount; j++) {
		h_imgarr[i * colcount + j].real = imageArr.get()[rid * colcount + j].real();
		h_imgarr[i * colcount + j].imag = imageArr.get()[rid * colcount + j].imag();
	}
}
std::cout << "h_imgarr init finished!!" << std::endl;


float* h_antPx, * h_antPy, * h_antPz;
cudaMallocHost(&h_antPx, sizeof(float) * prfcount);  // 单个传感器的位置
cudaMallocHost(&h_antPy, sizeof(float) * prfcount);
cudaMallocHost(&h_antPz, sizeof(float) * prfcount);

// 初始化
for (long i = 0; i < prfcount; i++) {
	h_antPx[i] = antPx[i];
	h_antPy[i] = antPy[i];
	h_antPz[i] = antPz[i];
}



float* h_img_x, * h_img_y, * h_img_z;
cudaMallocHost(&h_img_x, sizeof(float) * stepline * colcount);
cudaMallocHost(&h_img_y, sizeof(float) * stepline * colcount);
cudaMallocHost(&h_img_z, sizeof(float) * stepline * colcount);



// 初始化
long rid = 0;
for (long i = 0; i < stepline; i++) {
	rid = startline + i;
	for (long j = 0; j < colcount; j++) {
		h_img_x[i * colcount + j] = img_x[rid * colcount + j];
		h_img_y[i * colcount + j] = img_y[rid * colcount + j];
		h_img_z[i * colcount + j] = img_z[rid * colcount + j];
	}
}
std::cout << "h_img_x init finished!!" << std::endl;

// 分配设备内存
float* d_antPx, * d_antPy, * d_antPz, * d_img_x, * d_img_y, * d_img_z;

complexfloat* d_echopluse;
complexfloat* d_imgarr;

cudaMalloc(&d_echopluse, sizeof(float) * 2 * plusecount);
cudaMalloc(&d_imgarr, sizeof(float) * 2 * stepline * colcount);
cudaMalloc(&d_antPx, sizeof(float) * prfcount);
cudaMalloc(&d_antPy, sizeof(float) * prfcount);
cudaMalloc(&d_antPz, sizeof(float) * prfcount);
cudaMalloc(&d_img_x, sizeof(float) * rowcount * colcount);
cudaMalloc(&d_img_y, sizeof(float) * rowcount * colcount);
cudaMalloc(&d_img_z, sizeof(float) * rowcount * colcount);


// 将数据从主机拷贝到设备
cudaMemcpy(d_antPx, h_antPx, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_antPx, h_antPx, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_antPy, h_antPy, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_antPz, h_antPz, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_img_x, h_img_x, sizeof(float) * rowcount * colcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_img_y, h_img_y, sizeof(float) * rowcount * colcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_img_z, h_img_z, sizeof(float) * rowcount * colcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_imgarr, h_imgarr, sizeof(float) * 2 * stepline * colcount, cudaMemcpyHostToDevice);




int blockSize = 256; // 每个块的线程数
int numBlocks = (pixelcount + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
long eid = 0;
std::complex<double> Rphi;
for (long prfid = 0; prfid < prfcount; prfid++) {

	for (long i = 0; i < plusecount; i++) {
		h_echopluse[i].real = echoArr.get()[prfid * plusecount + i].real();
		h_echopluse[i].imag = echoArr.get()[prfid * plusecount + i].imag();
	}

	cudaMemcpy(d_echopluse, h_echopluse, sizeof(float) * 2 * plusecount, cudaMemcpyHostToDevice);

	computeDistanceAndEchoID << < numBlocks, blockSize >> > (d_antPx, d_antPy, d_antPz,
		d_img_x, d_img_y, d_img_z,
		d_echopluse, d_imgarr,
		rowcount, colcount, prfid,
		Rnear, fs, factorj);
	cudaDeviceSynchronize();// 等待所有设备任务完成
	if (prfid % 100 == 0) {
		//std::cout << "\rprf " << prfid <<"/"<< prfcount << "\t\t\t";
	}
	//cudaMemcpy(h_echopluse, d_echopluse, sizeof(float) * 2 * stepline * colcount, cudaMemcpyDeviceToHost);

}
std::cout << std::endl;
// GPU -> CPU
cudaMemcpy(h_imgarr, d_imgarr, sizeof(float) * 2 * stepline * colcount, cudaMemcpyDeviceToHost);

for (long i = 0; i < stepline; i++) {
	long rid = startline + i;
	for (long j = 0; j < colcount; j++) {
		imageArr.get()[rid * colcount + j] = std::complex<double>(h_imgarr[i * colcount + j].real, h_imgarr[i * colcount + j].imag);
	}
}



// 清理资源
cudaFree(d_antPx);
cudaFree(d_antPy);
cudaFree(d_antPz);
cudaFree(d_img_x);
cudaFree(d_img_y);
cudaFree(d_img_z);

cudaFree(d_echopluse);
cudaFree(d_imgarr);

cudaFreeHost(h_antPx);
cudaFreeHost(h_antPy);
cudaFreeHost(h_antPz);
cudaFreeHost(h_img_x);
cudaFreeHost(h_img_y);
cudaFreeHost(h_img_z);
cudaFreeHost(h_echopluse);
cudaFreeHost(h_imgarr);
std::cout << "end GPU" << std::endl;

}

void RTPC(float* antx, float* anty, float* antz, float* demx, float* demy, float* demz, float* demslopex, float* demslopey, float* demslopez ) {}

ErrorCode RTPCProcessCls::RTPCMainProcess(long num_thread) { omp_set_num_threads(num_thread);// 设置openmp 线程数量 double widthSpace = LIGHTSPEED / 2 / this->TaskSetting->getFs();

double prf_time = 0;
double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔
bool antflag = true; // 计算天线方向图
Landpoint LandP{ 0,0,0 };
Point3 GERpoint{ 0,0,0 };
double R = 0;
double dem_row = 0, dem_col = 0, dem_alt = 0;

long double imageStarttime = 0;
imageStarttime = this->TaskSetting->getSARImageStartTime();
//std::vector<SatelliteOribtNode> sateOirbtNodes(this->PluseCount);
std::shared_ptr<SatelliteOribtNode[]> sateOirbtNodes(new SatelliteOribtNode[this->PluseCount], delArrPtr);
{ // 姿态计算不同
	// 计算姿态
	std::shared_ptr<double> antpos = this->EchoSimulationData->getAntPos();
	double dAt = 1e-6;
	double prf_time_dt = 0;
	Landpoint InP{ 0,0,0 }, outP{ 0,0,0 };
	for (long prf_id = 0; prf_id < this->PluseCount; prf_id++) {
		prf_time = dt * prf_id;
		prf_time_dt = prf_time + dAt;
		SatelliteOribtNode sateOirbtNode;
		SatelliteOribtNode sateOirbtNode_dAt;
		this->TaskSetting->getSatelliteOribtNode(prf_time, sateOirbtNode, antflag);
		this->TaskSetting->getSatelliteOribtNode(prf_time_dt, sateOirbtNode_dAt, antflag);

		sateOirbtNode.AVx = (sateOirbtNode_dAt.Vx - sateOirbtNode.Vx) / dAt; // 加速度
		sateOirbtNode.AVy = (sateOirbtNode_dAt.Vy - sateOirbtNode.Vy) / dAt;
		sateOirbtNode.AVz = (sateOirbtNode_dAt.Vz - sateOirbtNode.Vz) / dAt;

		InP.lon = sateOirbtNode.Px;
		InP.lat = sateOirbtNode.Py;
		InP.ati = sateOirbtNode.Pz;

		outP = XYZ2LLA(InP);

		antpos.get()[prf_id * 19 + 0] = prf_time + imageStarttime;
		antpos.get()[prf_id * 19 + 1] = sateOirbtNode.Px;
		antpos.get()[prf_id * 19 + 2] = sateOirbtNode.Py;
		antpos.get()[prf_id * 19 + 3] = sateOirbtNode.Pz;
		antpos.get()[prf_id * 19 + 4] = sateOirbtNode.Vx;
		antpos.get()[prf_id * 19 + 5] = sateOirbtNode.Vy;
		antpos.get()[prf_id * 19 + 6] = sateOirbtNode.Vz;
		antpos.get()[prf_id * 19 + 7] = sateOirbtNode.AntDirecX;
		antpos.get()[prf_id * 19 + 8] = sateOirbtNode.AntDirecY;
		antpos.get()[prf_id * 19 + 9] = sateOirbtNode.AntDirecZ;
		antpos.get()[prf_id * 19 + 10] = sateOirbtNode.AVx;
		antpos.get()[prf_id * 19 + 11] = sateOirbtNode.AVy;
		antpos.get()[prf_id * 19 + 12] = sateOirbtNode.AVz;
		antpos.get()[prf_id * 19 + 13] = sateOirbtNode.zeroDopplerDirectX;
		antpos.get()[prf_id * 19 + 14] = sateOirbtNode.zeroDopplerDirectY;
		antpos.get()[prf_id * 19 + 15] = sateOirbtNode.zeroDopplerDirectZ;
		antpos.get()[prf_id * 19 + 16] = outP.lon;
		antpos.get()[prf_id * 19 + 17] = outP.lat;
		antpos.get()[prf_id * 19 + 18] = outP.ati;
		sateOirbtNodes[prf_id] = sateOirbtNode;
	}
	this->EchoSimulationData->saveAntPos(antpos);
	antpos.reset();
	qDebug() << "Ant position finished sucessfully !!!";
}

// 回波
long echoIdx = 0;



double NearRange = this->EchoSimulationData->getNearRange(); // 近斜据
double FarRange = this->EchoSimulationData->getFarRange();

double TimgNearRange = 2 * NearRange / LIGHTSPEED;
double TimgFarRange = 2 * FarRange / LIGHTSPEED;

double Fs = this->TaskSetting->getFs(); // 距离向采样率
double Pt = this->TaskSetting->getPt() * this->TaskSetting->getGri();// 发射电压 1v
//double GainAntLen = -3;// -3dB 为天线半径
long pluseCount = this->PluseCount;
double lamda = this->TaskSetting->getCenterLamda(); // 波长

// 天线方向图
std::shared_ptr<AbstractRadiationPattern> TransformPattern = this->TaskSetting->getTransformRadiationPattern(); // 发射天线方向图
std::shared_ptr<AbstractRadiationPattern> ReceivePattern = this->TaskSetting->getReceiveRadiationPattern();   // 接收天线方向图

long PlusePoint = this->EchoSimulationData->getPlusePoints();

long echoline = Memory1GB * 4 / 16 / PlusePoint;
echoline = echoline < 1000 ? 1000 : echoline;
long startecholine = 0;
for (startecholine = 0; startecholine < pluseCount; startecholine = startecholine + echoline) {

	long tempecholine = echoline;
	if (startecholine + tempecholine >= pluseCount) {
		tempecholine = pluseCount - startecholine;
	}
	std::shared_ptr<std::complex<double>> echo = this->EchoSimulationData->getEchoArr(startecholine, tempecholine);
	for (long i = 0; i < tempecholine * PlusePoint; i++) {
		echo.get()[i] = std::complex<double>(0, 0);
}
	this->EchoSimulationData->saveEchoArr(echo, startecholine, tempecholine);

}

POLARTYPEENUM polartype = this->TaskSetting->getPolarType();

#ifndef CUDANVCC_ QMessageBox::information(this, u8"程序提示", u8"请确定安装了CUDA库"); #else

// RTPC CUDA版本 
if (pluseCount * 4 * 18 > Memory1MB * 100) {
	long max = Memory1MB * 100 / 4 / 20 / PluseCount;
	QMessageBox::warning(nullptr, u8"仿真场景太大了", u8"当前频点数下,脉冲数量最多为:" + QString::number(max));
}



gdalImage demxyz(this->demxyzPath);// 地面点坐标
gdalImage demlandcls(this->LandCoverPath);// 地表覆盖类型 
gdalImage demsloperxyz(this->demsloperPath);// 地面坡向
// 参数与分块计算
long demRow = demxyz.height;
long demCol = demxyz.width;

long blokline = 100;

// 每块 250MB*16 = 4GB

blokline = Memory1MB * 500 / 8 / demCol;
blokline = blokline < 1 ? 1 : blokline;
bool bloklineflag = false;

// 处理发射天线方向图
double Tminphi = TransformPattern->getMinPhi();
double Tmaxphi = TransformPattern->getMaxPhi();
double Tmintheta = TransformPattern->getMinTheta();
double Tmaxtheta = TransformPattern->getMaxTheta();

long Tphinum = TransformPattern->getPhis().size();
long Tthetanum = TransformPattern->getThetas().size();

double TstartTheta = Tmintheta;
double TstartPhi = Tminphi;

double Tdtheta = (Tmaxtheta - Tmintheta) / (Tthetanum - 1);
double Tdphi = (Tmaxphi - Tminphi) / (Tphinum - 1);

float* h_TantPattern = (float*)mallocCUDAHost(sizeof(float) * Tthetanum * Tphinum);
float* d_TantPattern = (float*)mallocCUDADevice(sizeof(float) * Tthetanum * Tphinum);

for (long i = 0; i < Tthetanum; i++) {
	for (long j = 0; j < Tphinum; j++) {
		h_TantPattern[i * Tphinum + j] = TransformPattern->getGainLearThetaPhi(TstartTheta + i * Tdtheta, TstartPhi + j * Tdphi);
	}
}

HostToDevice(h_TantPattern, d_TantPattern, sizeof(float) * Tthetanum * Tphinum);
// 处理接收天线方向图
double Rminphi = ReceivePattern->getMinPhi();
double Rmaxphi = ReceivePattern->getMaxPhi();
double Rmintheta = ReceivePattern->getMinTheta();
double Rmaxtheta = ReceivePattern->getMaxTheta();

long Rphinum = ReceivePattern->getPhis().size();
long Rthetanum = ReceivePattern->getThetas().size();

double RstartTheta = Rmintheta;
double RstartPhi = Rminphi;

double Rdtheta = (Rmaxtheta - Rmintheta) / (Rthetanum - 1);
double Rdphi = (Rmaxphi - Rminphi) / (Rphinum - 1);

float* h_RantPattern = (float*)mallocCUDAHost(sizeof(float) * Rthetanum * Rphinum);
float* d_RantPattern = (float*)mallocCUDADevice(sizeof(float) * Rthetanum * Rphinum);

for (long i = 0; i < Rthetanum; i++) {
	for (long j = 0; j < Rphinum; j++) {
		h_RantPattern[i * Rphinum + j] = ReceivePattern->getGainLearThetaPhi(RstartTheta + i * Rdtheta, RstartPhi + j * Rdphi);
	}
}
HostToDevice(h_RantPattern, d_RantPattern, sizeof(float) * Rthetanum * Rphinum);
//处理地表覆盖
QMap<long, long> clamap;
long clamapid = 0;
long startline = 0;

for (startline = 0; startline < demRow; startline = startline + blokline) {
	Eigen::MatrixXd clsland = demlandcls.getData(startline, 0, blokline, demlandcls.width, 1);
	long clsrows = clsland.rows();
	long clscols = clsland.cols();
	long clsid = 0;
	for (long ii = 0; ii < clsrows; ii++) {
		for (long jj = 0; jj < clscols; jj++) {
			clsid = clsland(ii, jj);
			if (clamap.contains(clsid)) {}
			else {
				clamap.insert(clsid, clamapid);
				clamapid = clamapid + 1;
			}
		}
	}
}

CUDASigmaParam* h_clsSigmaParam = (CUDASigmaParam*)mallocCUDAHost(sizeof(CUDASigmaParam) * clamapid);
CUDASigmaParam* d_clsSigmaParam = (CUDASigmaParam*)mallocCUDADevice(sizeof(CUDASigmaParam) * clamapid);
{
	std::map<long, SigmaParam> tempSigmaParam = this->SigmaDatabasePtr->getsigmaParams(polartype);
	for (long id : clamap.keys()) {
		SigmaParam tempp = tempSigmaParam[id];
		h_clsSigmaParam[clamap[id]].p1 = tempp.p1;
		h_clsSigmaParam[clamap[id]].p2 = tempp.p2;
		h_clsSigmaParam[clamap[id]].p3 = tempp.p3;
		h_clsSigmaParam[clamap[id]].p4 = tempp.p4;
		h_clsSigmaParam[clamap[id]].p5 = tempp.p5;
		h_clsSigmaParam[clamap[id]].p6 = tempp.p6;
	}
}
HostToDevice(h_clsSigmaParam, d_clsSigmaParam, sizeof(CUDASigmaParam) * clamapid);

// 临时变量声明
Eigen::MatrixXd dem_x = demxyz.getData(0, 0, blokline, demxyz.width, 1);  // 地面坐标
long tempDemRows = dem_x.rows();
long tempDemCols = dem_x.cols();

Eigen::MatrixXd dem_y = Eigen::MatrixXd::Zero(tempDemRows, tempDemCols);
Eigen::MatrixXd dem_z = Eigen::MatrixXd::Zero(tempDemRows, tempDemCols);
Eigen::MatrixXd demsloper_x = Eigen::MatrixXd::Zero(tempDemRows, tempDemCols);
Eigen::MatrixXd demsloper_y = Eigen::MatrixXd::Zero(tempDemRows, tempDemCols);
Eigen::MatrixXd demsloper_z = Eigen::MatrixXd::Zero(tempDemRows, tempDemCols);
Eigen::MatrixXd sloperAngle = Eigen::MatrixXd::Zero(tempDemRows, tempDemCols);

float* h_dem_x = (float*)mallocCUDAHost(sizeof(float) * blokline * tempDemCols);
float* h_dem_y = (float*)mallocCUDAHost(sizeof(float) * blokline * tempDemCols);
float* h_dem_z = (float*)mallocCUDAHost(sizeof(float) * blokline * tempDemCols);
float* h_demsloper_x = (float*)mallocCUDAHost(sizeof(float) * blokline * tempDemCols);
float* h_demsloper_y = (float*)mallocCUDAHost(sizeof(float) * blokline * tempDemCols);
float* h_demsloper_z = (float*)mallocCUDAHost(sizeof(float) * blokline * tempDemCols);
float* h_demsloper_angle = (float*)mallocCUDAHost(sizeof(float) * blokline * tempDemCols);
long* h_demcls = (long*)mallocCUDAHost(sizeof(long) * blokline * tempDemCols);

float* d_dem_x = (float*)mallocCUDADevice(sizeof(float) * blokline * tempDemCols); // 7
float* d_dem_y = (float*)mallocCUDADevice(sizeof(float) * blokline * tempDemCols);
float* d_dem_z = (float*)mallocCUDADevice(sizeof(float) * blokline * tempDemCols);
float* d_demsloper_x = (float*)mallocCUDADevice(sizeof(float) * blokline * tempDemCols);
float* d_demsloper_y = (float*)mallocCUDADevice(sizeof(float) * blokline * tempDemCols);
float* d_demsloper_z = (float*)mallocCUDADevice(sizeof(float) * blokline * tempDemCols);
float* d_demsloper_angle = (float*)mallocCUDADevice(sizeof(float) * blokline * tempDemCols);
long* d_demcls = (long*)mallocCUDADevice(sizeof(long) * blokline * tempDemCols);


// 回波
cuComplex* h_echoAmp = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * blokline * tempDemCols);
cuComplex* d_echoAmp = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * blokline * tempDemCols);

int* h_echoAmpFID = (int*)mallocCUDAHost(sizeof(int) * blokline * tempDemCols);
int* d_echoAmpFID = (int*)mallocCUDADevice(sizeof(int) * blokline * tempDemCols);

Eigen::MatrixXd landcover = Eigen::MatrixXd::Zero(blokline, tempDemCols);// 地面覆盖类型

for (startline = 0; startline < demRow; startline = startline + blokline) {
	long newblokline = blokline;
	if ((startline + blokline) >= demRow) {
		newblokline = demRow - startline;
		bloklineflag = true;
	}

	dem_x = demxyz.getData(startline, 0, newblokline, demxyz.width, 1);  // 地面坐标
	dem_y = demxyz.getData(startline, 0, newblokline, demxyz.width, 2);
	dem_z = demxyz.getData(startline, 0, newblokline, demxyz.width, 3);
	demsloper_x = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 1);
	demsloper_y = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 2);
	demsloper_z = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 3);
	sloperAngle = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 4);
	landcover = demlandcls.getData(startline, 0, newblokline, demlandcls.width, 1);


	if (bloklineflag) {
		FreeCUDAHost(h_dem_x);						FreeCUDADevice(d_dem_x);
		FreeCUDAHost(h_dem_y);						FreeCUDADevice(d_dem_y);
		FreeCUDAHost(h_dem_z);						FreeCUDADevice(d_dem_z);
		FreeCUDAHost(h_demsloper_x);				FreeCUDADevice(d_demsloper_x);
		FreeCUDAHost(h_demsloper_y);				FreeCUDADevice(d_demsloper_y);
		FreeCUDAHost(h_demsloper_z);				FreeCUDADevice(d_demsloper_z); //6
		FreeCUDAHost(h_demsloper_angle);			FreeCUDADevice(d_demsloper_angle);//7
		FreeCUDAHost(h_demcls);						FreeCUDADevice(d_demcls);//7
		FreeCUDAHost(h_echoAmp);					FreeCUDADevice(d_echoAmp);//19
		FreeCUDAHost(h_echoAmpFID);					FreeCUDADevice(d_echoAmpFID);//19

		h_dem_x = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
		h_dem_y = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
		h_dem_z = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
		h_demsloper_x = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
		h_demsloper_y = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
		h_demsloper_z = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
		h_demsloper_angle = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
		h_demcls = (long*)mallocCUDAHost(sizeof(long) * newblokline * tempDemCols);

		d_dem_x = (float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols); // 7
		d_dem_y = (float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);
		d_dem_z = (float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);
		d_demsloper_x = (float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);
		d_demsloper_y = (float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);
		d_demsloper_z = (float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);
		d_demsloper_angle = (float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);
		d_demcls = (long*)mallocCUDADevice(sizeof(long) * newblokline * tempDemCols);

		h_echoAmp = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * newblokline * tempDemCols);;
		d_echoAmp = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * newblokline * tempDemCols);;

		h_echoAmpFID = (int*)mallocCUDAHost(sizeof(int) * newblokline * tempDemCols);
		d_echoAmpFID = (int*)mallocCUDADevice(sizeof(int) * newblokline * tempDemCols);
	}

	{ // 处理 dem -> 数量

		float temp_dem_x;
		float temp_dem_y;
		float temp_dem_z;
		float temp_demsloper_x;
		float temp_demsloper_y;
		float temp_demsloper_z;
		float temp_sloperAngle;
		long temp_demclsid;

		for (long i = 0; i < newblokline; i++) {
			for (long j = 0; j < demxyz.width; j++) {
				temp_dem_x = float(dem_x(i, j));
				temp_dem_y = float(dem_y(i, j));
				temp_dem_z = float(dem_z(i, j));
				temp_demsloper_x = float(demsloper_x(i, j));
				temp_demsloper_y = float(demsloper_y(i, j));
				temp_demsloper_z = float(demsloper_z(i, j));
				temp_sloperAngle = float(sloperAngle(i, j));
				temp_demclsid = long(landcover(i, j));
				h_dem_x[i * demxyz.width + j] = temp_dem_x;
				h_dem_y[i * demxyz.width + j] = temp_dem_y;
				h_dem_z[i * demxyz.width + j] = temp_dem_z;
				h_demsloper_x[i * demxyz.width + j] = temp_demsloper_x;
				h_demsloper_y[i * demxyz.width + j] = temp_demsloper_y;
				h_demsloper_z[i * demxyz.width + j] = temp_demsloper_z;
				h_demsloper_angle[i * demxyz.width + j] = temp_sloperAngle;
				h_demcls[i * demxyz.width + j] = clamap[temp_demclsid];
			}
		}

	}
	HostToDevice((void*)h_dem_x, (void*)d_dem_x, sizeof(float) * newblokline * tempDemCols); // 复制 机器 -> GPU
	HostToDevice((void*)h_dem_y, (void*)d_dem_y, sizeof(float) * newblokline * tempDemCols);
	HostToDevice((void*)h_dem_z, (void*)d_dem_z, sizeof(float) * newblokline * tempDemCols);
	HostToDevice((void*)h_demsloper_x, (void*)d_demsloper_x, sizeof(float) * newblokline * tempDemCols);
	HostToDevice((void*)h_demsloper_y, (void*)d_demsloper_y, sizeof(float) * newblokline * tempDemCols);
	HostToDevice((void*)h_demsloper_z, (void*)d_demsloper_z, sizeof(float) * newblokline * tempDemCols);
	HostToDevice((void*)h_demsloper_angle, (void*)d_demsloper_angle, sizeof(float) * newblokline * tempDemCols);
	HostToDevice((void*)h_demcls, (void*)d_demcls, sizeof(float) * newblokline * tempDemCols);//地表覆盖

	// 临时文件声明
	float antpx = 0;
	float antpy = 0;
	float antpz = 0;
	float antvx = 0;
	float antvy = 0;
	float antvz = 0;
	float antdirectx = 0;
	float antdirecty = 0;
	float antdirectz = 0;
	float antXaxisX = 0;
	float antXaxisY = 0;
	float antXaxisZ = 0;
	float antYaxisX = 0;
	float antYaxisY = 0;
	float antYaxisZ = 0;
	float antZaxisX = 0;
	float antZaxisY = 0;
	float antZaxisZ = 0;




	int pixelcount = newblokline * tempDemCols;
	std::cout << " GPU Memory init finished!!!!" << std::endl;

	long echoline = Memory1GB * 4 / 16 / PlusePoint;
	echoline = echoline < 1000 ? 1000 : echoline;
	long startecholine = 0;
	for (startecholine = 0; startecholine < pluseCount; startecholine = startecholine + echoline) {

		long tempecholine = echoline;
		if (startecholine + tempecholine >= pluseCount) {
			tempecholine = pluseCount - startecholine;
		}
		std::shared_ptr<std::complex<double>> echo = this->EchoSimulationData->getEchoArr(startecholine, tempecholine);
		long prfid = 0;
		for (long tempprfid = 0; tempprfid < tempecholine; tempprfid++) {
			{// 计算
				prfid = tempprfid + startecholine;
				// 天线位置
				antpx = sateOirbtNodes[prfid].Px;
				antpy = sateOirbtNodes[prfid].Py;
				antpz = sateOirbtNodes[prfid].Pz;
				antvx = sateOirbtNodes[prfid].Vx;
				antvy = sateOirbtNodes[prfid].Vy;
				antvz = sateOirbtNodes[prfid].Vz; //6
				antdirectx = sateOirbtNodes[prfid].AntDirecX;
				antdirecty = sateOirbtNodes[prfid].AntDirecY;
				antdirectz = sateOirbtNodes[prfid].AntDirecZ; // 9 天线指向
				antXaxisX = sateOirbtNodes[prfid].AntXaxisX;
				antXaxisY = sateOirbtNodes[prfid].AntXaxisY;
				antXaxisZ = sateOirbtNodes[prfid].AntXaxisZ;//12 天线坐标系
				antYaxisX = sateOirbtNodes[prfid].AntYaxisX;
				antYaxisY = sateOirbtNodes[prfid].AntYaxisY;
				antYaxisZ = sateOirbtNodes[prfid].AntYaxisZ;//15
				antZaxisX = sateOirbtNodes[prfid].AntZaxisX;
				antZaxisY = sateOirbtNodes[prfid].AntZaxisY;
				antZaxisZ = sateOirbtNodes[prfid].AntZaxisZ;//18
				//CUDATestHelloWorld(1, 20);

				CUDA_RTPC_SiglePRF(
					antpx, antpy, antpz,// 天线坐标
					antXaxisX, antXaxisY, antXaxisZ, // 天线坐标系
					antYaxisX, antYaxisY, antYaxisZ, //
					antZaxisX, antZaxisY, antZaxisZ,
					antdirectx, antdirecty, antdirectz,// 天线指向
					d_dem_x, d_dem_y, d_dem_z,
					d_demcls, // 地面坐标
					d_demsloper_x, d_demsloper_y, d_demsloper_z, d_demsloper_angle,// 地面坡度
					d_TantPattern, TstartTheta, TstartPhi, Tdtheta, Tdphi, Tthetanum, Tphinum,// 天线方向图相关
					d_RantPattern, RstartTheta, RstartPhi, Rdtheta, Rdphi, Rthetanum, Rphinum,// 天线方向图相关
					lamda, Fs, NearRange, Pt, PlusePoint, // 参数
					d_clsSigmaParam, clamapid,// 地表覆盖类型-sigma插值对应函数-ulaby
					d_echoAmp, d_echoAmpFID,
					newblokline, tempDemCols);


















				DeviceToHost(h_echoAmpFID, d_echoAmpFID, sizeof(long) * newblokline * tempDemCols);
				DeviceToHost(h_echoAmp, d_echoAmp, sizeof(long) * newblokline * tempDemCols);

				for (long i = 0; i < pixelcount; i++) {
					echo.get()[tempprfid * PlusePoint + h_echoAmpFID[i]] = 
					echo.get()[tempprfid * PlusePoint + h_echoAmpFID[i]]
						+ std::complex<double>(h_echoAmp[i].x, h_echoAmp[i].y);
				}
				if (tempprfid % 100 == 0) {
					std::cout << "\r[" << QDateTime::currentDateTime().toString("yyyy-MM-dd hh:mm:ss.zzz").toStdString() << "]  dem:\t" << startline << "\t-\t" << startline + newblokline <<"  count:\t"<< demRow << "\t:\t pluse :\t" << prfid << " / " << pluseCount << std::endl;
				}
			}
		}

		this->EchoSimulationData->saveEchoArr(echo, startecholine, tempecholine);
	}
}

std::cout << std::endl;


// 地面数据释放
FreeCUDAHost(h_dem_x);			  FreeCUDADevice(d_dem_x);
FreeCUDAHost(h_dem_y);			  FreeCUDADevice(d_dem_y);
FreeCUDAHost(h_dem_z);			  FreeCUDADevice(d_dem_z);
FreeCUDAHost(h_demsloper_x);	  FreeCUDADevice(d_demsloper_x);
FreeCUDAHost(h_demsloper_y);	  FreeCUDADevice(d_demsloper_y);
FreeCUDAHost(h_demsloper_z);	  FreeCUDADevice(d_demsloper_z); //6
FreeCUDAHost(h_demsloper_angle);  FreeCUDADevice(d_demsloper_angle); //7
FreeCUDAHost(h_demcls);			  FreeCUDADevice(d_demcls);//7
FreeCUDAHost(h_echoAmp);		  FreeCUDADevice(d_echoAmp);//19
FreeCUDAHost(h_echoAmpFID);		  FreeCUDADevice(d_echoAmpFID);//19

FreeCUDAHost(h_TantPattern);	  FreeCUDADevice(d_TantPattern);
FreeCUDAHost(h_RantPattern);	  FreeCUDADevice(d_RantPattern);
FreeCUDAHost(h_clsSigmaParam);    FreeCUDADevice(d_clsSigmaParam);

#endif

this->EchoSimulationData->saveToXml();
return ErrorCode::SUCCESS;

}

			Eigen::MatrixXd plusetemp = Eigen::MatrixXd::Zero(newblokline, tempDemCols);
			for (long ii = 0; ii < newblokline; ii++) {
				for (long jj = 0; jj < tempDemCols; jj++) {
					//plusetemp(ii, jj) = h_amp[ii * tempDemCols + jj];
					plusetemp(ii, jj) = std::abs(std::complex<double>(h_echoAmp[ii * tempDemCols + jj].x, h_echoAmp[ii * tempDemCols + jj].y));
				}
			}

			std::cout << "max:" << plusetemp.maxCoeff() << std::endl;
			std::cout << "min:" << plusetemp.minCoeff() << std::endl;

			Eigen::MatrixXd plusetempID = Eigen::MatrixXd::Zero(newblokline, tempDemCols);
			for (long ii = 0; ii < newblokline; ii++) {
				for (long jj = 0; jj < tempDemCols; jj++) {
					//plusetemp(ii, jj) = h_amp[ii * tempDemCols + jj];
					plusetempID(ii, jj) = h_FreqID[ii * tempDemCols + jj];
				}
			}

			std::cout << "max ID:" << plusetempID.maxCoeff() << std::endl;
			std::cout << "min ID:" << plusetempID.minCoeff() << std::endl;