提交测试代码

pull/3/head
陈增辉 2024-12-29 12:05:41 +08:00
parent 6c46507ae8
commit 7ba47e2761
5 changed files with 87 additions and 53 deletions

View File

@ -16,7 +16,7 @@
//#define __PRFDEBUG__ //#define __PRFDEBUG__
//#define __TBPIMAGEDEBUG__ #define __TBPIMAGEDEBUG__
//#include <mkl.h> //#include <mkl.h>
#include <complex> #include <complex>

View File

@ -19,8 +19,12 @@
__global__ void CUDA_TBPImage( __global__ void CUDA_TBPImage(
float* antPx, float* antPy, float* antPz, float* antPx, float* antPy, float* antPz,
float* imgx, float* imgy, float* imgz, float* RArr, float* imgx, float* imgy, float* imgz,
cuComplex* echoArr, cuComplex* imgArr, float* RArr,
long* Cids,
cuComplex* echoArr,
cuComplex* imgArr,
cuComplex* imgEchoArr,
float freq, float fs, float Rnear, float Rfar, float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount, long rowcount, long colcount,
long prfid, long freqcount long prfid, long freqcount
@ -28,17 +32,29 @@ __global__ void CUDA_TBPImage(
int idx = blockIdx.x * blockDim.x + threadIdx.x; int idx = blockIdx.x * blockDim.x + threadIdx.x;
//printf("\nidx:\t %d %d %d\n", idx, linecount, plusepoint); //printf("\nidx:\t %d %d %d\n", idx, linecount, plusepoint);
if (idx < rowcount * colcount) { if (idx < rowcount * colcount) {
float R = sqrtf(powf(antPx[prfid] - imgx[idx], 2) + powf(antPy[prfid] - imgy[idx], 2) + powf(antPz[prfid] - imgz[idx], 2));
float Ridf = 2 * (R - Rnear) / LIGHTSPEED * fs;
long Rid = floorf(Ridf); float px = antPx[prfid];
float py = antPy[prfid];
float pz = antPz[prfid];
float tx = imgx[idx];
float ty = imgy[idx];
float tz = imgz[idx];
float R = sqrtf((px-tx) * (px - tx) + (py-ty) * (py-ty) + (pz-tz) * (pz-tz));
float Cidf= 2 * (R - Rnear) / LIGHTSPEED * fs;
long Cid = floorf(Cidf);
RArr[idx] = R; RArr[idx] = R;
if(Rid <0|| Rid >= freqcount){} Cids[idx] = Cid;
if(Cid <0|| Cid >= freqcount){}
else { else {
float factorj = freq * 4 * PI / LIGHTSPEED; float factorj = freq * 4 * PI / LIGHTSPEED;
cuComplex Rfactorj = make_cuComplex(0, factorj * R); cuComplex Rfactorj = make_cuComplex(0, factorj * R);
cuComplex Rphi =cuCexpf(Rfactorj);// УÕýÏî cuComplex Rphi =cuCexpf(Rfactorj);// УÕýÏî
cuComplex echotime = echoArr[prfid * freqcount + Rid]; cuComplex echotemp = echoArr[prfid * freqcount + Cid];
imgArr[idx] = cuCaddf(imgArr[idx], cuCmulf(echotime, Rphi));// ½ÃÕý imgEchoArr[idx] = echotemp;
imgArr[idx] = cuCaddf(imgArr[idx], cuCmulf(echotemp, Rphi));// ½ÃÕý
//printf("R=%f;Rid=%d;factorj=%f;Rfactorj=complex(%f,%f);Rphi=complex(%f,%f);\n", R, Rid, factorj, //printf("R=%f;Rid=%d;factorj=%f;Rfactorj=complex(%f,%f);Rphi=complex(%f,%f);\n", R, Rid, factorj,
// Rfactorj.x, Rfactorj.y, // Rfactorj.x, Rfactorj.y,
// Rphi.x, Rphi.y); // Rphi.x, Rphi.y);
@ -50,7 +66,10 @@ __global__ void CUDA_TBPImage(
extern "C" void CUDATBPImage(float* antPx, float* antPy, float* antPz, extern "C" void CUDATBPImage(float* antPx, float* antPy, float* antPz,
float* imgx, float* imgy, float* imgz, float* imgx, float* imgy, float* imgz,
float* R, float* R,
cuComplex* echoArr, cuComplex* imgArr, long* Cids,
cuComplex* echoArr,
cuComplex* imgArr,
cuComplex* imgEchoArr,
float freq, float fs, float Rnear, float Rfar, float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount, long rowcount, long colcount,
long prfid, long freqcount) long prfid, long freqcount)
@ -63,8 +82,8 @@ extern "C" void CUDATBPImage(float* antPx, float* antPy, float* antPz,
CUDA_TBPImage << <numBlocks, blockSize >> > ( CUDA_TBPImage << <numBlocks, blockSize >> > (
antPx, antPy, antPz, antPx, antPy, antPz,
imgx, imgy, imgz, imgx, imgy, imgz,
R, R, Cids,
echoArr, imgArr, echoArr, imgArr, imgEchoArr,
freq, fs, Rnear, Rfar, freq, fs, Rnear, Rfar,
rowcount, colcount, rowcount, colcount,
prfid, freqcount prfid, freqcount

View File

@ -28,8 +28,10 @@ extern "C" void CUDATBPImage(
float* imgy, float* imgy,
float* imgz, float* imgz,
float* R, float* R,
long* Cids,
cuComplex* echoArr, cuComplex* echoArr,
cuComplex* imgArr, cuComplex* imgArr,
cuComplex* imgEchoArr,
float freq, float fs, float Rnear, float Rfar, float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount, long rowcount, long colcount,
long prfid, long freqcount long prfid, long freqcount

View File

@ -233,10 +233,10 @@ ErrorCode TBPImageAlgCls::ProcessGPU()
// 按照回波分块,图像分块 // 按照回波分块,图像分块
long echoBlockline = Memory1GB / 8 / 2 / PlusePoints * 3; long echoBlockline = Memory1GB / 8 / 2 / PlusePoints * 2;
echoBlockline = echoBlockline < 1 ? 1 : echoBlockline; echoBlockline = echoBlockline < 1 ? 1 : echoBlockline;
long imageBlockline = Memory1GB / 8 / 2 / colCount * 3; long imageBlockline = Memory1GB / 8 / 2 / colCount * 2;
imageBlockline = imageBlockline < 1 ? 1 : imageBlockline; imageBlockline = imageBlockline < 1 ? 1 : imageBlockline;
gdalImage imageXYZ(this->outRasterXYZPath); gdalImage imageXYZ(this->outRasterXYZPath);
@ -278,7 +278,9 @@ ErrorCode TBPImageAlgCls::ProcessGPU()
freq, fs, freq, fs,
Rnear, Rfar, Rnear, Rfar,
tempimgBlockline, colCount, tempimgBlockline, colCount,
tempechoBlockline, PlusePoints ); tempechoBlockline, PlusePoints,
startechoid
);
} }
this->L1ds->saveImageRaster(imgArr, startimgrowid, tempimgBlockline); this->L1ds->saveImageRaster(imgArr, startimgrowid, tempimgBlockline);
@ -295,8 +297,8 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
std::shared_ptr<std::complex<float>> imgArr, std::shared_ptr<std::complex<float>> imgArr,
float freq, float fs, float Rnear, float Rfar, float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount, long rowcount, long colcount,
long prfcount, long freqcount long prfcount, long freqcount,
long startPRFId
) )
{ {
// 声明GPU变量 // 声明GPU变量
@ -322,11 +324,14 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
cuComplex* h_imgArr = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * rowcount * colcount); cuComplex* h_imgArr = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * rowcount * colcount);
cuComplex* d_imgArr = (cuComplex*)mallocCUDADevice( sizeof(cuComplex) * rowcount * colcount); cuComplex* d_imgArr = (cuComplex*)mallocCUDADevice( sizeof(cuComplex) * rowcount * colcount);
cuComplex* h_imgEchoArr = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * rowcount * colcount);
cuComplex* d_imgEchoArr = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * rowcount * colcount);
float* h_R=(float*)mallocCUDAHost(sizeof(float) * rowcount * colcount); float* h_R=(float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
float* d_R=(float*)mallocCUDADevice(sizeof(float) * rowcount * colcount); float* d_R=(float*)mallocCUDADevice(sizeof(float) * rowcount * colcount);
long* h_CIdx = (long*)mallocCUDAHost(sizeof(long) * rowcount * colcount);
long* d_CIdx = (long*)mallocCUDADevice(sizeof(long) * rowcount * colcount);
// 初始化 // 初始化
// 天线位置 // 天线位置
@ -370,71 +375,77 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
HostToDevice(h_imgy, d_imgy, sizeof(float) * rowcount * colcount); HostToDevice(h_imgy, d_imgy, sizeof(float) * rowcount * colcount);
HostToDevice(h_imgz, d_imgz, sizeof(float) * rowcount * colcount); HostToDevice(h_imgz, d_imgz, sizeof(float) * rowcount * colcount);
HostToDevice(h_R, d_R, sizeof(float) * rowcount * colcount); HostToDevice(h_R, d_R, sizeof(float) * rowcount * colcount);
HostToDevice(h_CIdx, d_CIdx, sizeof(long) * rowcount * colcount);
HostToDevice(h_echoArr, d_echoArr, sizeof(cuComplex) * prfcount * freqcount); HostToDevice(h_echoArr, d_echoArr, sizeof(cuComplex) * prfcount * freqcount);
HostToDevice(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount); HostToDevice(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount);
HostToDevice(h_imgEchoArr, d_imgEchoArr, sizeof(cuComplex) * rowcount * colcount);
#ifdef __TBPIMAGEDEBUG__
// ¶¨Òå²ÉÑùµã
long tc[4] = { 6956,6542,7003,6840};
long tr[4] = { 1100,9324,9415,11137 };
std::shared_ptr<float> Rlist(new float[4*prfcount], delArrPtr);
std::shared_ptr<long> CIdslist(new long[4*prfcount], delArrPtr);
std::shared_ptr<float> imgchoReal (new float[4 * prfcount], delArrPtr);
std::shared_ptr<float> imgchoImag (new float[4 * prfcount], delArrPtr);
std::shared_ptr<float> imgdataReal(new float[4 * prfcount], delArrPtr);
std::shared_ptr<float> imgdataImag(new float[4 * prfcount], delArrPtr);
#endif
for (long prfid = 0; prfid < prfcount; prfid++) { for (long prfid = 0; prfid < prfcount; prfid++) {
CUDATBPImage( CUDATBPImage(
d_antPx,d_antPy,d_antPz, d_antPx,d_antPy,d_antPz,
d_imgx,d_imgy,d_imgz, d_imgx,d_imgy,d_imgz,
d_R, d_R,
d_CIdx,
d_echoArr, d_echoArr,
d_imgArr, d_imgArr,
d_imgEchoArr,
freq, fs, Rnear, Rfar, freq, fs, Rnear, Rfar,
rowcount, colcount, rowcount, colcount,
prfid, freqcount prfid, freqcount
); );
#ifdef __TBPIMAGEDEBUG__ #ifdef __TBPIMAGEDEBUG__
// 判断当前坐标情况下的 各块的计算情况
qDebug() << "Ant=[" << h_antPx[prfid] << " " << h_antPy[prfid] << " " << h_antPz[prfid] << "];";
DeviceToHost(h_R, d_R, sizeof(float) * rowcount * colcount); DeviceToHost(h_R, d_R, sizeof(float) * rowcount * colcount);
DeviceToHost(h_CIdx, d_CIdx, sizeof(float) * rowcount * colcount);
DeviceToHost(h_imgEchoArr, d_imgEchoArr, sizeof(cuComplex) * rowcount * colcount);
DeviceToHost(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount); DeviceToHost(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount);
testOutAmpArr(QString("imge_R_%1").arg(prfid), h_R, rowcount, colcount);
testOutAmpArr(QString("imge_X_%1").arg(prfid), h_imgx, rowcount, colcount);
testOutAmpArr(QString("imge_Y_%1").arg(prfid), h_imgy, rowcount, colcount);
testOutAmpArr(QString("imge_Z_%1").arg(prfid), h_imgz, rowcount, colcount);
long pixelcount = rowcount * colcount;
float* h_echoAmp_real = (float*)mallocCUDAHost(sizeof(float) * prfcount* freqcount);
float* h_echoAmp_imag = (float*)mallocCUDAHost(sizeof(float) * prfcount * freqcount);
float* h_echoAmp_abs = (float*)mallocCUDAHost(sizeof(float) * prfcount * freqcount);
for (long freqi = 0; freqi < prfcount * freqcount; freqi++) {
h_echoAmp_real[freqi] = h_echoArr[freqi].x; for (long iii = 0; iii < 4; iii++) {
h_echoAmp_imag[freqi] = h_echoArr[freqi].y; Rlist.get()[prfid * 4 + iii] = h_R[tr[iii] * colcount + tc[iii]];
h_echoAmp_abs[freqi] = 20 * std::log10(std::abs(std::complex<double>(h_echoAmp_real[freqi], h_echoAmp_imag[freqi]))); CIdslist.get()[prfid * 4 + iii] = h_CIdx[tr[iii] * colcount + tc[iii]];
imgchoReal.get()[prfid * 4 + iii] = h_imgEchoArr[tr[iii] * colcount + tc[iii]].x;
imgchoImag.get()[prfid * 4 + iii] = h_imgEchoArr[tr[iii] * colcount + tc[iii]].y;
imgdataReal.get()[prfid * 4 + iii] = h_imgArr[tr[iii] * colcount + tc[iii]].x;
imgdataImag.get()[prfid * 4 + iii] = h_imgArr[tr[iii] * colcount + tc[iii]].y;
} }
testOutAmpArr(QString("h_echo_absdB_%1.bin").arg(prfid), h_echoAmp_abs, prfcount, freqcount);
float* h_imgAmp_real = (float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
float* h_imgAmp_imag = (float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
float* h_imgAmp_abs = (float*)mallocCUDAHost(sizeof(float) * rowcount * colcount);
for (long freqi = 0; freqi < rowcount * colcount; freqi++) {
h_imgAmp_real[freqi] = h_imgArr[freqi].x;
h_imgAmp_imag[freqi] = h_imgArr[freqi].y;
h_imgAmp_abs[freqi] = 20 * std::log10(std::abs(std::complex<double>(h_imgAmp_real[freqi], h_imgAmp_imag[freqi])));
}
testOutAmpArr(QString("h_image_absdB_%1.bin").arg(prfid), h_imgAmp_abs, rowcount, colcount);
exit(-1);
#endif #endif
} }
// Device -> Host // Device -> Host
DeviceToHost(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount); DeviceToHost(h_imgArr, d_imgArr, sizeof(cuComplex) * rowcount * colcount);
#ifdef __TBPIMAGEDEBUG__
testOutAmpArr(QString("Rlist_%1.bin").arg(startPRFId), Rlist.get(), prfcount, 4);
testOutAmpArr(QString("imgchoReal_%1.bin").arg(startPRFId), imgchoReal.get(), prfcount, 4);
testOutAmpArr(QString("imgchoImag_%1.bin").arg(startPRFId), imgchoImag.get(), prfcount, 4);
testOutAmpArr(QString("imgdataReal_%1.bin").arg(startPRFId), imgdataReal.get(), prfcount, 4);
testOutAmpArr(QString("imgdataImag_%1.bin").arg(startPRFId), imgdataImag.get(), prfcount, 4);
testOutClsArr(QString("CIdslist_%1.bin").arg(startPRFId), CIdslist.get(), prfcount, 4);
#endif

View File

@ -71,4 +71,6 @@ void TBPImageGPUAlg(std::shared_ptr<float> antPx, std::shared_ptr<float> antPy,
std::shared_ptr<std::complex<float>> img_arr, std::shared_ptr<std::complex<float>> img_arr,
float freq, float fs, float Rnear, float Rfar, float freq, float fs, float Rnear, float Rfar,
long rowcount, long colcount, long rowcount, long colcount,
long prfcount,long freqcount ); long prfcount,long freqcount,
long startPRFId
);