#include "stdafx.h" #include "TBPImageAlgCls.h" #include #include #include #include #include #include #include "GPUTool.cuh" #include "GPUTBPImage.cuh" #include "ImageOperatorBase.h" #include "BPBasic0_CUDA.cuh" #include "BaseTool.h" #include void TBPImageAlgCls::setImagePlanePath(QString INimagePlanePath) { this->imagePlanePath = INimagePlanePath; } QString TBPImageAlgCls::getImagePlanePath() { return this->imagePlanePath; } void TBPImageAlgCls::setEchoL0(std::shared_ptr inL0ds) { this->L0ds = inL0ds; } void TBPImageAlgCls::setImageL1(std::shared_ptr inL1ds) { this->L1ds = inL1ds; } std::shared_ptr TBPImageAlgCls::getEchoL1() { return this->L0ds; } std::shared_ptr TBPImageAlgCls::getImageL0() { return this->L1ds; } ErrorCode TBPImageAlgCls::ProcessWithGridNet(long num_thread,QString xyzRasterPath) { this->outRasterXYZPath = xyzRasterPath; //qDebug() << u8"频域回波-> 时域回波"; //this->TimeEchoDataPath = JoinPath(this->L1ds->getoutFolderPath(), this->L0ds->getSimulationTaskName() + "_Timeecho.bin"); //this->EchoFreqToTime(); // 初始化Raster qDebug() << u8"初始化影像"; long imageheight = this->L1ds->getrowCount(); long imagewidth = this->L1ds->getcolCount(); long blokline = Memory1GB / 8 / 4 / imageheight * 32; blokline = blokline < 1000 ? 1000 : blokline; long startline = 0; for (startline = 0; startline < imageheight; startline = startline + blokline) { long templine = blokline; if (startline + templine >= imageheight) { templine = imageheight - startline; } qDebug() << "\r[" << QDateTime::currentDateTime().toString("yyyy-MM-dd hh:mm:ss.zzz") << "] imgxyz :\t" << startline << "\t-\t" << startline + templine << " / " << imageheight; std::shared_ptr> imageRaster = this->L1ds->getImageRaster(startline, templine); for (long i = 0; i < templine; i++) { for (long j = 0; j < imagewidth; j++) { imageRaster.get()[i * imagewidth + j] = std::complex(0, 0); } } this->L1ds->saveImageRaster(imageRaster, startline, templine); } // 处理成像映射 CopyProjectTransformMatrixFromRasterAToRasterB(this->outRasterXYZPath, this->L1ds->getImageRasterPath()); qDebug() << u8"频域回波-> 时域回波 结束"; if (GPURUN) { return this->ProcessGPU(); } else { QMessageBox::information(nullptr, u8"提示", u8"目前只支持显卡"); return ErrorCode::FAIL; } return ErrorCode::SUCCESS; } ErrorCode TBPImageAlgCls::ProcessGPU() { std::shared_ptr L1ds=this->L1ds; std::shared_ptr < EchoL0Dataset> L0ds=this->L0ds; QString inGPSPath = L0ds->getGPSPointFilePath(); QString echoFilePath = L0ds->getEchoDataFilePath(); QString imgXYZPath = this->outRasterXYZPath; QString outimgDataPath = L1ds->getImageRasterPath(); size_t prfcount = L0ds->getPluseCount(); size_t freqpoints = L0ds->getPlusePoints(); size_t block_pfrcount = Memory1GB / freqpoints / 8 * 2;// 4GB -- 可以分配内存 size_t img_rowCont = L1ds->getrowCount(); size_t img_colCont = L1ds->getcolCount(); size_t block_imgRowCount = Memory1GB / img_colCont / 8 / 3 * 12;// 4GB-- 可以分配内存 gdalImage demgridimg(imgXYZPath); gdalImageComplex im_finalds(outimgDataPath); gdalImageComplex echods(echoFilePath); // 加载 GPS ,it should be same as prfcount; long gpspoints = prfcount; std::shared_ptr antpos = SatelliteAntPosOperator::readAntPosFile(inGPSPath, gpspoints); GPUDATA h_data{};// 任务参数 { //回波 频率参数 h_data.Nfft = freqpoints; qDebug() << u8"3.proces echo params:"; double centerFreq = L0ds->getCenterFreq(); double bandwidth = L0ds->getBandwidth(); size_t freqpoints = L0ds->getPlusePoints(); h_data.Freq = getFreqPoints_mallocHost(centerFreq - bandwidth / 2, centerFreq + bandwidth / 2, freqpoints); h_data.minF = (double*)mallocCUDAHost(sizeof(double) * gpspoints); for (long i = 0; i < gpspoints; i++) { h_data.minF[i] = h_data.Freq[0]; } h_data.deltaF = bandwidth / (freqpoints - 1); h_data.K = h_data.Nfft; h_data.R0 = L0ds->getRefPhaseRange(); double deltaF = h_data.deltaF; // 从输入参数获取 double maxWr = 299792458.0 / (2.0 * deltaF); double* r_vec_host = (double*)mallocCUDAHost(sizeof(double) * h_data.Nfft);// new double[data.Nfft]; const double step = maxWr / h_data.Nfft; const double start = -1 * h_data.Nfft / 2.0 * step; for (int i = 0; i < h_data.Nfft; ++i) { r_vec_host[i] = start + i * step; } h_data.r_vec = r_vec_host; // 处理天线坐标 h_data.AntX = (double*)mallocCUDAHost(sizeof(double) * block_pfrcount); h_data.AntY = (double*)mallocCUDAHost(sizeof(double) * block_pfrcount); h_data.AntZ = (double*)mallocCUDAHost(sizeof(double) * block_pfrcount); qDebug() << "r_vec [0]:\t" << h_data.r_vec[0]; qDebug() << "r_vec [end]:\t" << h_data.r_vec[h_data.Nfft-1]; qDebug() << "Range resolution(m):\t" << step; qDebug() << "range Scence(m):\t" << maxWr; qDebug() << "start Freq:\t" << centerFreq - bandwidth / 2; qDebug() << "end Freq:\t" << centerFreq + bandwidth / 2; qDebug() << "freq points:\t" << freqpoints; qDebug() << "delta freq:\t" << h_data.deltaF; qDebug() << "prf count:\t" << gpspoints; qDebug() << "-----------------------------------------------------------"; } qDebug() << "BP......"; for (long img_start_rid = 0; img_start_rid < img_rowCont; img_start_rid = img_start_rid + block_imgRowCount) { long img_blockRowCount = block_imgRowCount; long img_blockColCount = img_colCont; std::shared_ptr demX = readDataArr(demgridimg, img_start_rid, 0, img_blockRowCount, img_blockColCount, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); std::shared_ptr demY = readDataArr(demgridimg, img_start_rid, 0, img_blockRowCount, img_blockColCount, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); std::shared_ptr demZ = readDataArr(demgridimg, img_start_rid, 0, img_blockRowCount, img_blockColCount, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); std::shared_ptr demR= demgridimg.band_num==3? readDataArr(demgridimg, img_start_rid, 0, img_blockRowCount, img_blockColCount, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD): readDataArr(demgridimg, img_start_rid, 0, img_blockRowCount, img_blockColCount, 4, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); if (nullptr == demR) { for (long i = 0; i < img_blockRowCount; i++) { for (long j = 0; j < img_blockColCount; j++) { demR.get()[i * img_blockColCount + j] = 0; } } } h_data.x_mat = (double*)mallocCUDAHost(sizeof(double) * img_blockRowCount * img_blockColCount); // 成像网格 h_data.y_mat = (double*)mallocCUDAHost(sizeof(double) * img_blockRowCount * img_blockColCount); h_data.z_mat = (double*)mallocCUDAHost(sizeof(double) * img_blockRowCount * img_blockColCount); h_data.R_mat = (double*)mallocCUDAHost(sizeof(double) * img_blockRowCount * img_blockColCount); h_data.nx = img_blockColCount; h_data.ny = img_blockRowCount; for (long i = 0; i < h_data.ny; i++) { for (long j = 0; j < h_data.nx; j++) { h_data.x_mat[i * h_data.nx + j] = demX.get()[i * h_data.nx + j]; h_data.y_mat[i * h_data.nx + j] = demY.get()[i * h_data.nx + j]; h_data.z_mat[i * h_data.nx + j] = demZ.get()[i * h_data.nx + j]; h_data.R_mat[i * h_data.nx + j] = demR.get()[i * h_data.nx + j]; } } std::shared_ptr> imgArr = im_finalds.getDataComplexSharePtr(img_start_rid, 0, img_blockRowCount, img_blockColCount, 1); h_data.im_final = (cuComplex*)mallocCUDAHost(sizeof(cuComplex)* img_blockRowCount* img_blockColCount); for (long echo_start_pid = 0; echo_start_pid < prfcount; echo_start_pid = echo_start_pid + block_pfrcount) { long echo_blockPRFCount = block_pfrcount; long echo_blockFreqCount = freqpoints; // 读取回波 std::shared_ptr> echoData = readDataArrComplex>(echods, echo_start_pid,0, echo_blockPRFCount, echo_blockFreqCount,1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); size_t echosize = sizeof(cuComplex) * echo_blockPRFCount * echo_blockFreqCount; h_data.phdata = (cuComplex*)mallocCUDAHost(echosize); shared_complexPtrToHostCuComplex(echoData.get(), h_data.phdata, size_t(echo_blockPRFCount * echo_blockFreqCount)); // 处理天线坐标 h_data.Np = echo_blockPRFCount; for (long i = 0; i < h_data.Np; i++) { h_data.AntX[i] = antpos.get()[i+echo_start_pid].Px; h_data.AntY[i] = antpos.get()[i+echo_start_pid].Py; h_data.AntZ[i] = antpos.get()[i+echo_start_pid].Pz; } { qDebug() << "GPS points Count:\t" << gpspoints; qDebug() << "PRF 0:\t " << QString("%1,%2,%3").arg(h_data.AntX[0]).arg(h_data.AntY[0]).arg(h_data.AntZ[0]); qDebug() << "PRF " << gpspoints - 1 << ":\t " << QString("%1,%2,%3") .arg(h_data.AntX[gpspoints - 1]) .arg(h_data.AntY[gpspoints - 1]) .arg(h_data.AntZ[gpspoints - 1]); } GPUDATA d_data; initGPUData(h_data, d_data); bpBasic0CUDA(d_data, 0); DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * d_data.nx * d_data.ny); freeGPUData(d_data); FreeCUDAHost(h_data.phdata); qDebug() << QString(u8"\nimg proces [%1 precent ] , echo process [%2 precess]\t:img row [%3,%4),echo pfr [%5,%6]") .arg((img_start_rid+ img_blockRowCount)*100.0/ img_rowCont) .arg((echo_start_pid + echo_blockPRFCount) * 100.0 / prfcount) .arg(img_start_rid) .arg(img_start_rid + img_blockRowCount) .arg(echo_start_pid) .arg(echo_start_pid + echo_blockPRFCount) ; //HostCuComplexToshared_complexPtr(h_data.im_final, imgArr.get(), size_t(h_data.nx)* size_t(h_data.ny)); //testOutComplexDoubleArr(QString("im_final.bin"), imgArr.get(), h_data.ny, h_data.nx); } HostCuComplexToshared_complexPtr(h_data.im_final, imgArr.get(), size_t(h_data.nx)* size_t(h_data.ny)); im_finalds.saveImage(imgArr, img_start_rid, 0,img_blockRowCount, img_blockColCount, 1); } qDebug() << QString("img proces [100 precent ] , echo process [100 precess] "); L1ds->saveToXml(); qDebug() << "bp Image Result write to file :\t" << L1ds->getoutFolderPath(); return ErrorCode::SUCCESS; } ErrorCode TBPImageAlgCls::BPProcessBlockGPU() { return ErrorCode::SUCCESS; } void TBPImageAlgCls::setGPU(bool flag) { this->GPURUN = flag; } bool TBPImageAlgCls::getGPU( ) { return this->GPURUN; } void TBPImageAlgCls::EchoFreqToTime() { // 读取数据 long PRFCount = this->L0ds->getPluseCount(); long inColCount = this->L0ds->getPlusePoints(); long outColCount = inColCount;// nextpow2(inColCount); this->TimeEchoRowCount = PRFCount; this->TimeEchoColCount = outColCount; qDebug() << "IFFT : " << this->TimeEchoDataPath; qDebug() << "PRF Count:\t" << PRFCount; qDebug() << "inColCount:\t" << inColCount; qDebug() << "outColCount:\t" << outColCount; // 创建二进制文件 gdalImageComplex outTimeEchoImg = CreategdalImageComplexNoProj(this->TimeEchoDataPath, this->TimeEchoRowCount, this->TimeEchoColCount, 1); // 分块 long echoBlockline = Memory1GB / 8 / 2 / outColCount *2; //1GB echoBlockline = echoBlockline < 1 ? 1 : echoBlockline; long startechoid = 0; for (long startechoid = 0; startechoid < PRFCount; startechoid = startechoid + echoBlockline) { long tempechoBlockline = echoBlockline; if (startechoid + tempechoBlockline >= PRFCount) { tempechoBlockline = PRFCount - startechoid; } std::shared_ptr> echoArr = this->L0ds->getEchoArr(startechoid, tempechoBlockline); std::shared_ptr> IFFTArr = outTimeEchoImg.getDataComplexSharePtr(startechoid, 0, tempechoBlockline, outColCount, 1); std::shared_ptr host_echoArr((cuComplex*)mallocCUDAHost(sizeof(cuComplex) * tempechoBlockline * outColCount), FreeCUDAHost); std::shared_ptr host_IFFTechoArr((cuComplex*)mallocCUDAHost(sizeof(cuComplex) * tempechoBlockline * outColCount), FreeCUDAHost); memset(host_echoArr.get(), 0, sizeof(cuComplex) * tempechoBlockline * outColCount); #pragma omp parallel for for (long ii = 0; ii < tempechoBlockline; ii++) { for (long jj = 0; jj < inColCount; jj++) { host_echoArr.get()[ii * outColCount + jj] = make_cuComplex(echoArr.get()[ii * inColCount + jj].real(), echoArr.get()[ii * inColCount + jj].imag()); } } #pragma omp parallel for for (long ii = 0; ii < tempechoBlockline * outColCount; ii++) { host_IFFTechoArr.get()[ii] = make_cuComplex(0, 0); } std::shared_ptr device_echoArr((cuComplex*)mallocCUDADevice(sizeof(cuComplex) * tempechoBlockline * inColCount), FreeCUDADevice); std::shared_ptr device_IFFTechoArr((cuComplex*)mallocCUDADevice(sizeof(cuComplex) * tempechoBlockline * outColCount), FreeCUDADevice); HostToDevice(host_echoArr.get(), device_echoArr.get(), sizeof(cuComplex) * tempechoBlockline * inColCount); HostToDevice(host_IFFTechoArr.get(), device_IFFTechoArr.get(), sizeof(cuComplex) * tempechoBlockline * outColCount); CUDAIFFT(device_echoArr.get(), device_IFFTechoArr.get(), tempechoBlockline, outColCount, outColCount); FFTShift1D(device_IFFTechoArr.get(), tempechoBlockline, outColCount); DeviceToHost(host_IFFTechoArr.get(), device_IFFTechoArr.get(), sizeof(cuComplex) * tempechoBlockline * outColCount); #pragma omp parallel for for (long ii = 0; ii < tempechoBlockline; ii++) { for (long jj = 0; jj < outColCount; jj++) { IFFTArr.get()[ii * outColCount + jj] = std::complex(host_IFFTechoArr.get()[ii * outColCount + jj].x, host_IFFTechoArr.get()[ii * outColCount + jj].y); //IFFTArr.get()[ii * outColCount + jj] = std::complex(host_echoArr.get()[ii * outColCount + jj].x, host_echoArr.get()[ii * outColCount + jj].y); } } outTimeEchoImg.saveImage(IFFTArr, startechoid, 0, tempechoBlockline, outColCount, 1); qDebug() << QString(" image block PRF:[%1] \t").arg((startechoid + tempechoBlockline) * 100.0 / PRFCount) << startechoid << "\t-\t" << startechoid + tempechoBlockline; } return; } bool TBPImageAlgCls::checkZeros(double* data, long long len) { bool flag = true; #pragma omp parallel for for (long long i = 0; i < len; i++) { if (abs(data[i]) < 1e-7) { flag= false; break; } } return flag; } // // //void CreatePixelXYZ(std::shared_ptr echoL0ds, QString outPixelXYZPath) //{ // long bandwidth = echoL0ds->getBandwidth(); // double Rnear = echoL0ds->getNearRange(); // double Rfar = echoL0ds->getFarRange(); // double refRange = echoL0ds->getRefPhaseRange(); // double dx = LIGHTSPEED / 2.0 / bandwidth; // c/2b // // // // 创建坐标系统 // long prfcount = echoL0ds->getPluseCount(); // long freqcount = echoL0ds->getPlusePoints(); // Eigen::MatrixXd gt = Eigen::MatrixXd::Zero(2, 3); // gt(0, 0) = 0; // gt(0, 1) = 1; // gt(0, 2) = 0; // gt(1, 0) = 0; // gt(1, 1) = 0; // gt(1, 2) = 1; // gdalImage xyzRaster = CreategdalImage(outPixelXYZPath, prfcount, freqcount, 3, gt, QString(""), false, true,true,GDT_Float64); // std::shared_ptr antpos = echoL0ds->getAntPos(); // dx = (echoL0ds->getFarRange()-echoL0ds->getNearRange())/(echoL0ds->getPlusePoints()-1); // Rnear = echoL0ds->getNearRange(); // double Rref = echoL0ds->getRefPhaseRange(); // double centerInc = echoL0ds->getCenterAngle()*d2r; // long echocol = Memory1GB * 1.0 / 8 / 4 / prfcount * 6; // qDebug() << "echocol:\t " << echocol ; // echocol = echocol < 1 ? 1: echocol; // // // std::shared_ptr Pxs((double*)mallocCUDAHost(sizeof(double)*prfcount), FreeCUDAHost); // std::shared_ptr Pys((double*)mallocCUDAHost(sizeof(double) * prfcount), FreeCUDAHost); // std::shared_ptr Pzs((double*)mallocCUDAHost(sizeof(double) * prfcount), FreeCUDAHost); // std::shared_ptr AntDirectX((double*)mallocCUDAHost(sizeof(double) * prfcount), FreeCUDAHost); // std::shared_ptr AntDirectY((double*)mallocCUDAHost(sizeof(double) * prfcount), FreeCUDAHost); // std::shared_ptr AntDirectZ((double*)mallocCUDAHost(sizeof(double) * prfcount), FreeCUDAHost); // // { // std::shared_ptr antpos = echoL0ds->getAntPos(); // double time = 0; // double Px = 0; // double Py = 0; // double Pz = 0; // for (long i = 0; i < prfcount; i++) { // // Pxs.get()[i] = antpos.get()[i * 19 + 1]; // 卫星坐标 // Pys.get()[i] = antpos.get()[i * 19 + 2]; // Pzs.get()[i] = antpos.get()[i * 19 + 3]; // AntDirectX.get()[i] = antpos.get()[i * 19 + 13];// zero doppler // AntDirectY.get()[i] = antpos.get()[i * 19 + 14]; // AntDirectZ.get()[i] = antpos.get()[i * 19 + 15]; // // double NormAnt = std::sqrt(AntDirectX.get()[i] * AntDirectX.get()[i] + // AntDirectY.get()[i] * AntDirectY.get()[i] + // AntDirectZ.get()[i] * AntDirectZ.get()[i]); // AntDirectX.get()[i] = AntDirectX.get()[i] / NormAnt; // AntDirectY.get()[i] = AntDirectY.get()[i] / NormAnt; // AntDirectZ.get()[i] = AntDirectZ.get()[i] / NormAnt;// 归一化 // } // antpos.reset(); // } // // std::shared_ptr d_Pxs((double*)mallocCUDADevice(sizeof(double) * prfcount), FreeCUDADevice); // std::shared_ptr d_Pys((double*)mallocCUDADevice(sizeof(double) * prfcount), FreeCUDADevice); // std::shared_ptr d_Pzs((double*)mallocCUDADevice(sizeof(double) * prfcount), FreeCUDADevice); // std::shared_ptr d_AntDirectX((double*)mallocCUDADevice(sizeof(double) * prfcount), FreeCUDADevice); // std::shared_ptr d_AntDirectY((double*)mallocCUDADevice(sizeof(double) * prfcount), FreeCUDADevice); // std::shared_ptr d_AntDirectZ((double*)mallocCUDADevice(sizeof(double) * prfcount), FreeCUDADevice); // // HostToDevice(Pxs.get(), d_Pxs.get(), sizeof(double) * prfcount); // HostToDevice(Pys.get(), d_Pys.get(), sizeof(double) * prfcount); // HostToDevice(Pzs.get(), d_Pzs.get(), sizeof(double) * prfcount); // HostToDevice(AntDirectX.get(), d_AntDirectX.get(), sizeof(double) * prfcount); // HostToDevice(AntDirectY.get(), d_AntDirectY.get(), sizeof(double) * prfcount); // HostToDevice(AntDirectZ.get(), d_AntDirectZ.get(), sizeof(double) * prfcount); // // // for (long startcolidx = 0; startcolidx < freqcount; startcolidx = startcolidx + echocol) { // // long tempechocol = echocol; // if (startcolidx + tempechocol >= freqcount) { // tempechocol = freqcount - startcolidx; // } // qDebug() << "\r[" << QDateTime::currentDateTime().toString("yyyy-MM-dd hh:mm:ss.zzz") << "] imgxyz :\t" << startcolidx << "\t-\t" << startcolidx + tempechocol << " / " << freqcount ; // // std::shared_ptr demx = readDataArr(xyzRaster, 0, startcolidx, prfcount, tempechocol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); // std::shared_ptr demy = readDataArr(xyzRaster, 0, startcolidx, prfcount, tempechocol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); // std::shared_ptr demz = readDataArr(xyzRaster, 0, startcolidx, prfcount, tempechocol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); // // std::shared_ptr h_demx((double*)mallocCUDAHost(sizeof(double) * prfcount*tempechocol), FreeCUDAHost); // std::shared_ptr h_demy((double*)mallocCUDAHost(sizeof(double) * prfcount*tempechocol), FreeCUDAHost); // std::shared_ptr h_demz((double*)mallocCUDAHost(sizeof(double) * prfcount*tempechocol), FreeCUDAHost); // //#pragma omp parallel for // for (long ii = 0; ii < prfcount; ii++) { // for (long jj = 0; jj < tempechocol; jj++) { // h_demx.get()[ii*tempechocol+jj]=demx.get()[ii*tempechocol+jj]; // h_demy.get()[ii*tempechocol+jj]=demy.get()[ii*tempechocol+jj]; // h_demz.get()[ii*tempechocol+jj]=demz.get()[ii*tempechocol+jj]; // } // } // // std::shared_ptr d_demx((double*)mallocCUDADevice(sizeof(double) * prfcount * tempechocol), FreeCUDADevice); // std::shared_ptr d_demy((double*)mallocCUDADevice(sizeof(double) * prfcount * tempechocol), FreeCUDADevice); // std::shared_ptr d_demz((double*)mallocCUDADevice(sizeof(double) * prfcount * tempechocol), FreeCUDADevice); // // HostToDevice(h_demx.get(), d_demx.get(), sizeof(double) * prfcount * tempechocol); // HostToDevice(h_demy.get(), d_demy.get(), sizeof(double) * prfcount * tempechocol); // HostToDevice(h_demz.get(), d_demz.get(), sizeof(double) * prfcount * tempechocol); // // // TIMEBPCreateImageGrid( // d_Pxs.get(), d_Pys.get(), d_Pzs.get(), // d_AntDirectX.get(), d_AntDirectY.get(), d_AntDirectZ.get(), // d_demx.get(), d_demy.get(), d_demz.get(), // prfcount, tempechocol, 0, // Rnear+dx* startcolidx, dx, refRange // 更新最近修读 // ); // // DeviceToHost(h_demx.get(), d_demx.get(), sizeof(double) * prfcount * tempechocol); // DeviceToHost(h_demy.get(), d_demy.get(), sizeof(double) * prfcount * tempechocol); // DeviceToHost(h_demz.get(), d_demz.get(), sizeof(double) * prfcount * tempechocol); // //#pragma omp parallel for // for (long ii = 0; ii < prfcount; ii++) { // for (long jj = 0; jj < tempechocol; jj++) { // demx.get()[ii * tempechocol + jj]=h_demx.get()[ii * tempechocol + jj] ; // demy.get()[ii * tempechocol + jj]=h_demy.get()[ii * tempechocol + jj] ; // demz.get()[ii * tempechocol + jj]=h_demz.get()[ii * tempechocol + jj] ; // } // } // // xyzRaster.saveImage(demx, 0, startcolidx,prfcount,tempechocol, 1); // xyzRaster.saveImage(demy, 0, startcolidx,prfcount,tempechocol, 2); // xyzRaster.saveImage(demz, 0, startcolidx,prfcount,tempechocol, 3); // } // //}