提交测试代码

pull/3/head
陈增辉 2024-12-29 12:05:41 +08:00
parent 6c46507ae8
commit 7ba47e2761
5 changed files with 87 additions and 53 deletions

View File

@ -16,7 +16,7 @@
//#define __PRFDEBUG__
//#define __TBPIMAGEDEBUG__
#define __TBPIMAGEDEBUG__
//#include <mkl.h>
#include <complex>

View File

@ -19,8 +19,12 @@
__global__ void CUDA_TBPImage(
float* antPx, float* antPy, float* antPz,
float* imgx, float* imgy, float* imgz, float* RArr,
cuComplex* echoArr, cuComplex* imgArr,
float* imgx, float* imgy, float* imgz,
float* RArr,
long* Cids,
cuComplex* echoArr,
cuComplex* imgArr,
cuComplex* imgEchoArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfid, long freqcount
@ -28,17 +32,29 @@ __global__ void CUDA_TBPImage(
int idx = blockIdx.x * blockDim.x + threadIdx.x;
//printf("\nidx:\t %d %d %d\n", idx, linecount, plusepoint);
if (idx < rowcount * colcount) {
float R = sqrtf(powf(antPx[prfid] - imgx[idx], 2) + powf(antPy[prfid] - imgy[idx], 2) + powf(antPz[prfid] - imgz[idx], 2));
float Ridf = 2 * (R - Rnear) / LIGHTSPEED * fs;
long Rid = floorf(Ridf);
float px = antPx[prfid];
float py = antPy[prfid];
float pz = antPz[prfid];
float tx = imgx[idx];
float ty = imgy[idx];
float tz = imgz[idx];
float R = sqrtf((px-tx) * (px - tx) + (py-ty) * (py-ty) + (pz-tz) * (pz-tz));
float Cidf= 2 * (R - Rnear) / LIGHTSPEED * fs;
long Cid = floorf(Cidf);
RArr[idx] = R;
if(Rid <0|| Rid >= freqcount){}
Cids[idx] = Cid;
if(Cid <0|| Cid >= freqcount){}
else {
float factorj = freq * 4 * PI / LIGHTSPEED;
cuComplex Rfactorj = make_cuComplex(0, factorj * R);
cuComplex Rphi =cuCexpf(Rfactorj);// УÕýÏî
cuComplex echotime = echoArr[prfid * freqcount + Rid];
imgArr[idx] = cuCaddf(imgArr[idx], cuCmulf(echotime, Rphi));// ½ÃÕý
cuComplex echotemp = echoArr[prfid * freqcount + Cid];
imgEchoArr[idx] = echotemp;
imgArr[idx] = cuCaddf(imgArr[idx], cuCmulf(echotemp, Rphi));// ½ÃÕý
//printf("R=%f;Rid=%d;factorj=%f;Rfactorj=complex(%f,%f);Rphi=complex(%f,%f);\n", R, Rid, factorj,
// Rfactorj.x, Rfactorj.y,
// Rphi.x, Rphi.y);
@ -50,7 +66,10 @@ __global__ void CUDA_TBPImage(
extern "C" void CUDATBPImage(float* antPx, float* antPy, float* antPz,
float* imgx, float* imgy, float* imgz,
float* R,
cuComplex* echoArr, cuComplex* imgArr,
long* Cids,
cuComplex* echoArr,
cuComplex* imgArr,
cuComplex* imgEchoArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfid, long freqcount)
@ -63,8 +82,8 @@ extern "C" void CUDATBPImage(float* antPx, float* antPy, float* antPz,
CUDA_TBPImage << <numBlocks, blockSize >> > (
antPx, antPy, antPz,
imgx, imgy, imgz,
R,
echoArr, imgArr,
R, Cids,
echoArr, imgArr, imgEchoArr,
freq, fs, Rnear, Rfar,
rowcount, colcount,
prfid, freqcount

View File

@ -28,8 +28,10 @@ extern "C" void CUDATBPImage(
float* imgy,
float* imgz,
float* R,
long* Cids,
cuComplex* echoArr,
cuComplex* imgArr,
cuComplex* imgEchoArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfid, long freqcount

View File

@ -233,10 +233,10 @@ ErrorCode TBPImageAlgCls::ProcessGPU()
// 按照回波分块,图像分块
long echoBlockline = Memory1GB / 8 / 2 / PlusePoints * 3;
long echoBlockline = Memory1GB / 8 / 2 / PlusePoints * 2;
echoBlockline = echoBlockline < 1 ? 1 : echoBlockline;
long imageBlockline = Memory1GB / 8 / 2 / colCount * 3;
long imageBlockline = Memory1GB / 8 / 2 / colCount * 2;
imageBlockline = imageBlockline < 1 ? 1 : imageBlockline;
gdalImage imageXYZ(this->outRasterXYZPath);
@ -278,7 +278,9 @@ ErrorCode TBPImageAlgCls::ProcessGPU()
freq, fs,
Rnear, Rfar,
tempimgBlockline, colCount,
tempechoBlockline, PlusePoints );
tempechoBlockline, PlusePoints,
startechoid
);
}
this->L1ds->saveImageRaster(imgArr, startimgrowid, tempimgBlockline);
@ -295,8 +297,8 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
std::shared_ptr<std::complex<float>> imgArr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfcount, long freqcount
long prfcount, long freqcount,
long startPRFId
)
{
// 声明GPU变量
@ -322,11 +324,14 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
cuComplex* h_imgArr = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * rowcount * colcount);
cuComplex* d_imgArr = (cuComplex*)mallocCUDADevice( sizeof(cuComplex) * rowcount * colcount);
cuComplex* h_imgEchoArr = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * rowcount * colcount);
cuComplex* d_imgEchoArr = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * rowcount * colcount);
float* h_R=(float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
float* d_R=(float*)mallocCUDADevice(sizeof(float) * rowcount * colcount);
long* h_CIdx = (long*)mallocCUDAHost(sizeof(long) * rowcount * colcount);
long* d_CIdx = (long*)mallocCUDADevice(sizeof(long) * rowcount * colcount);
// 初始化
// 天线位置
@ -370,71 +375,77 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
HostToDevice(h_imgy, d_imgy, sizeof(float) * rowcount * colcount);
HostToDevice(h_imgz, d_imgz, sizeof(float) * rowcount * colcount);
HostToDevice(h_R, d_R, sizeof(float) * rowcount * colcount);
HostToDevice(h_CIdx, d_CIdx, sizeof(long) * rowcount * colcount);
HostToDevice(h_echoArr, d_echoArr, sizeof(cuComplex) * prfcount * freqcount);
HostToDevice(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount);
HostToDevice(h_imgEchoArr, d_imgEchoArr, sizeof(cuComplex) * rowcount * colcount);
#ifdef __TBPIMAGEDEBUG__
// ¶¨Òå²ÉÑùµã
long tc[4] = { 6956,6542,7003,6840};
long tr[4] = { 1100,9324,9415,11137 };
std::shared_ptr<float> Rlist(new float[4*prfcount], delArrPtr);
std::shared_ptr<long> CIdslist(new long[4*prfcount], delArrPtr);
std::shared_ptr<float> imgchoReal (new float[4 * prfcount], delArrPtr);
std::shared_ptr<float> imgchoImag (new float[4 * prfcount], delArrPtr);
std::shared_ptr<float> imgdataReal(new float[4 * prfcount], delArrPtr);
std::shared_ptr<float> imgdataImag(new float[4 * prfcount], delArrPtr);
#endif
for (long prfid = 0; prfid < prfcount; prfid++) {
CUDATBPImage(
d_antPx,d_antPy,d_antPz,
d_imgx,d_imgy,d_imgz,
d_R,
d_CIdx,
d_echoArr,
d_imgArr,
d_imgEchoArr,
freq, fs, Rnear, Rfar,
rowcount, colcount,
prfid, freqcount
);
#ifdef __TBPIMAGEDEBUG__
// 判断当前坐标情况下的 各块的计算情况
qDebug() << "Ant=[" << h_antPx[prfid] << " " << h_antPy[prfid] << " " << h_antPz[prfid] << "];";
DeviceToHost(h_R, d_R, sizeof(float) * rowcount * colcount);
DeviceToHost(h_CIdx, d_CIdx, sizeof(float) * rowcount * colcount);
DeviceToHost(h_imgEchoArr, d_imgEchoArr, sizeof(cuComplex) * rowcount * colcount);
DeviceToHost(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount);
testOutAmpArr(QString("imge_R_%1").arg(prfid), h_R, rowcount, colcount);
testOutAmpArr(QString("imge_X_%1").arg(prfid), h_imgx, rowcount, colcount);
testOutAmpArr(QString("imge_Y_%1").arg(prfid), h_imgy, rowcount, colcount);
testOutAmpArr(QString("imge_Z_%1").arg(prfid), h_imgz, rowcount, colcount);
long pixelcount = rowcount * colcount;
float* h_echoAmp_real = (float*)mallocCUDAHost(sizeof(float) * prfcount* freqcount);
float* h_echoAmp_imag = (float*)mallocCUDAHost(sizeof(float) * prfcount * freqcount);
float* h_echoAmp_abs = (float*)mallocCUDAHost(sizeof(float) * prfcount * freqcount);
for (long freqi = 0; freqi < prfcount * freqcount; freqi++) {
h_echoAmp_real[freqi] = h_echoArr[freqi].x;
h_echoAmp_imag[freqi] = h_echoArr[freqi].y;
h_echoAmp_abs[freqi] = 20 * std::log10(std::abs(std::complex<double>(h_echoAmp_real[freqi], h_echoAmp_imag[freqi])));
for (long iii = 0; iii < 4; iii++) {
Rlist.get()[prfid * 4 + iii] = h_R[tr[iii] * colcount + tc[iii]];
CIdslist.get()[prfid * 4 + iii] = h_CIdx[tr[iii] * colcount + tc[iii]];
imgchoReal.get()[prfid * 4 + iii] = h_imgEchoArr[tr[iii] * colcount + tc[iii]].x;
imgchoImag.get()[prfid * 4 + iii] = h_imgEchoArr[tr[iii] * colcount + tc[iii]].y;
imgdataReal.get()[prfid * 4 + iii] = h_imgArr[tr[iii] * colcount + tc[iii]].x;
imgdataImag.get()[prfid * 4 + iii] = h_imgArr[tr[iii] * colcount + tc[iii]].y;
}
testOutAmpArr(QString("h_echo_absdB_%1.bin").arg(prfid), h_echoAmp_abs, prfcount, freqcount);
float* h_imgAmp_real = (float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
float* h_imgAmp_imag = (float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
float* h_imgAmp_abs = (float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
for (long freqi = 0; freqi < rowcount * colcount; freqi++) {
h_imgAmp_real[freqi] = h_imgArr[freqi].x;
h_imgAmp_imag[freqi] = h_imgArr[freqi].y;
h_imgAmp_abs[freqi] = 20 * std::log10(std::abs(std::complex<double>(h_imgAmp_real[freqi], h_imgAmp_imag[freqi])));
}
testOutAmpArr(QString("h_image_absdB_%1.bin").arg(prfid), h_imgAmp_abs, rowcount, colcount);
exit(-1);
#endif
}
// Device -> Host
DeviceToHost(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount);
#ifdef __TBPIMAGEDEBUG__
testOutAmpArr(QString("Rlist_%1.bin").arg(startPRFId), Rlist.get(), prfcount, 4);
testOutAmpArr(QString("imgchoReal_%1.bin").arg(startPRFId), imgchoReal.get(), prfcount, 4);
testOutAmpArr(QString("imgchoImag_%1.bin").arg(startPRFId), imgchoImag.get(), prfcount, 4);
testOutAmpArr(QString("imgdataReal_%1.bin").arg(startPRFId), imgdataReal.get(), prfcount, 4);
testOutAmpArr(QString("imgdataImag_%1.bin").arg(startPRFId), imgdataImag.get(), prfcount, 4);
testOutClsArr(QString("CIdslist_%1.bin").arg(startPRFId), CIdslist.get(), prfcount, 4);
#endif

View File

@ -71,4 +71,6 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
std::shared_ptr<std::complex<float>> img_arr,
float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount,
long prfcount,long freqcount );
long prfcount,long freqcount,
long startPRFId
);