更新TBPImage代码

pull/3/head
陈增辉 2024-12-24 16:18:14 +08:00
parent daac65f763
commit 7af5963ad3
2 changed files with 19 additions and 81 deletions

View File

@ -14,7 +14,7 @@
#define __CUDANVCC___ // 定义CUDA函数
#define __PRFDEBUG__
//#define __PRFDEBUG__

View File

@ -221,7 +221,6 @@ ErrorCode TBPImageAlgCls::ProcessGPU()
// 按照回波分块,图像分块
long echoBlockline = Memory1GB / 8 / 2 / PlusePoints * 6;
echoBlockline = echoBlockline < 1 ? 1 : echoBlockline;
@ -268,11 +267,10 @@ ErrorCode TBPImageAlgCls::ProcessGPU()
tempechoBlockline, PlusePoints );
}
this->L1ds->saveImageRaster(imgArr, startimgrowid, tempimgBlockline);
}
qDebug() << "\r[" << QDateTime::currentDateTime().toString("yyyy-MM-dd hh:mm:ss.zzz") << "] image writing:\t" << this->L1ds->getxmlFilePath();
this->L1ds->saveToXml();
return ErrorCode::SUCCESS;
}
@ -289,6 +287,7 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
float* h_antPx = (float*)mallocCUDAHost(sizeof(float) * prfcount);
float* h_antPy = (float*)mallocCUDAHost(sizeof(float) * prfcount);
float* h_antPz = (float*)mallocCUDAHost(sizeof(float) * prfcount);
float* d_antPx = (float*)mallocCUDADevice(sizeof(float) * prfcount);
float* d_antPy = (float*)mallocCUDADevice(sizeof(float) * prfcount);
float* d_antPz = (float*)mallocCUDADevice(sizeof(float) * prfcount);
@ -296,13 +295,15 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
float* h_imgx = (float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
float* h_imgy = (float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
float* h_imgz = (float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
float* d_imgx = (float*)mallocCUDADevice(sizeof(float) * rowcount * colcount);
float* d_imgy = (float*)mallocCUDADevice(sizeof(float) * rowcount * colcount);
float* d_imgz = (float*)mallocCUDADevice(sizeof(float) * rowcount * colcount);
cuComplex* h_echoArr = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * prfcount * freqcount);
cuComplex* h_imgArr = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * rowcount * colcount);
cuComplex* d_echoArr = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * prfcount * freqcount);
cuComplex* h_imgArr = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * rowcount * colcount);
cuComplex* d_imgArr = (cuComplex*)mallocCUDADevice( sizeof(cuComplex) * rowcount * colcount);
// 初始化
@ -342,33 +343,26 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
HostToDevice(h_antPx, d_antPx, sizeof(float) * prfcount);
HostToDevice(h_antPy, d_antPy, sizeof(float) * prfcount);
HostToDevice(h_antPz, d_antPz, sizeof(float) * prfcount);
HostToDevice(h_imgx, d_imgx, sizeof(float) * rowcount * colcount);
HostToDevice(h_imgy, d_imgy, sizeof(float) * rowcount * colcount);
HostToDevice(h_imgz, d_imgz, sizeof(float) * rowcount * colcount);
HostToDevice(h_echoArr, d_echoArr, sizeof(cuComplex) * prfcount * freqcount);
HostToDevice(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount);
for (long prfid = 0; prfid < prfcount; prfid++) {
CUDATBPImage(
d_antPx,
d_antPy,
d_antPz,
d_imgx,
d_imgy,
d_imgz,
d_echoArr,
d_imgArr,
d_antPx,d_antPy,d_antPz,
d_imgx,d_imgy,d_imgz,
d_echoArr,d_imgArr,
freq, fs, Rnear, Rfar,
rowcount, colcount,
prfid, freqcount
);
}
// Device -> Host
DeviceToHost(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount);
@ -381,25 +375,14 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
}
FreeCUDAHost(h_antPx);
FreeCUDAHost(h_antPy);
FreeCUDAHost(h_antPz);
FreeCUDADevice(d_antPx);
FreeCUDADevice(d_antPy);
FreeCUDADevice(d_antPz);
FreeCUDAHost(h_imgx);
FreeCUDAHost(h_imgy);
FreeCUDAHost(h_imgz);
FreeCUDADevice(d_imgx);
FreeCUDADevice(d_imgy);
FreeCUDADevice(d_imgz);
FreeCUDAHost(h_echoArr);
FreeCUDAHost(h_imgArr);
FreeCUDADevice(d_echoArr);
FreeCUDADevice(d_imgArr);
FreeCUDAHost(h_antPx); FreeCUDADevice(d_antPx);
FreeCUDAHost(h_antPy); FreeCUDADevice(d_antPy);
FreeCUDAHost(h_antPz); FreeCUDADevice(d_antPz);
FreeCUDAHost(h_imgx); FreeCUDADevice(d_imgx);
FreeCUDAHost(h_imgy); FreeCUDADevice(d_imgy);
FreeCUDAHost(h_imgz); FreeCUDADevice(d_imgz);
FreeCUDAHost(h_echoArr); FreeCUDADevice(d_echoArr);
FreeCUDAHost(h_imgArr); FreeCUDADevice(d_imgArr);
}
@ -415,51 +398,6 @@ bool TBPImageAlgCls::getGPU( )
return this->GPURUN;
}
/// <summary>
/// TBP GPU代码
/// </summary>
/// <param name="antpos_ptr">卫星轨道坐标</param>
/// <param name="echoArr">回波矩阵</param>
/// <param name="img_arr">图像矩阵</param>
void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy, std::shared_ptr<float> antPz, // 天线坐标
std::shared_ptr<float> antVx, std::shared_ptr<float> antVy, std::shared_ptr<float> antVz,
std::shared_ptr<float> img_x, std::shared_ptr<float> img_y, std::shared_ptr<float> img_z, // 图像坐标
std::shared_ptr<std::complex<double>> echoArr, std::shared_ptr<std::complex<double>> img_arr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount, std::shared_ptr<SARSimulationImageL1Dataset> L1ds) {
float factorj = freq * 4 * PI / LIGHTSPEED;
qDebug() << "factorj:\t" << factorj;
qDebug() << "freq:\t" << freq;
qDebug() << "fs:\t" << fs;
qDebug() << "Rnear:\t" << Rnear;
qDebug() << "Rfar:\t" << Rfar;
qDebug() << "img_x:\t" << img_x.get()[0];
qDebug() << "img_y:\t" << img_y.get()[0];
qDebug() << "img_z:\t" << img_z.get()[0];
long blockline = Memory1MB * 1000 / sizeof(float) / colcount;
blockline = blockline < 10 ? 10 : blockline;
for (long startline = 0; startline < rowcount; startline = startline + blockline) {
long stepline = startline + blockline < rowcount ? blockline : rowcount - startline;
std::cout << startline << " \ " << rowcount << " "<< stepline << " start " << std::endl;
//TBPImageGPUBlock(antPx.get(), antPy.get(), antPz.get(), img_x.get(), img_y.get(), img_z.get(),
// echoArr, rowcount, colcount,
// img_arr,
// freq, fs, Rnear, Rfar, factorj, startline, stepline,
// stepline, colcount);
//std::cout << startline << " \ " << rowcount << " " << stepline << " end " << std::endl;
//L1ds->saveImageRaster(img_arr, 0, rowcount);
}
L1ds->saveImageRaster(img_arr, 0, rowcount);
L1ds->saveToXml();
}
/**
ErrorCode TBPImageAlgCls::ProcessCPU(long num_thread)