From 3c1fd3a91a57f57063a6dc9cae590277f284cac1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=A2=9E=E8=BE=89?= <3045316072@qq.com> Date: Wed, 2 Apr 2025 16:47:01 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=BA=86=E6=88=90=E5=83=8F?= =?UTF-8?q?=E6=A3=80=E7=B4=A2=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPUBaseLib/GPUTool/GPUTool.cuh | 2 +- .../SimulationSAR/GPURFPC.cu | 93 +++++++------------ .../SimulationSAR/QImageSARRFPC.ui | 6 +- .../SimulationSAR/RFPCProcessCls.cpp | 13 ++- 4 files changed, 45 insertions(+), 69 deletions(-) diff --git a/GPUBaseLib/GPUTool/GPUTool.cuh b/GPUBaseLib/GPUTool/GPUTool.cuh index b380dc5..c2bd1d4 100644 --- a/GPUBaseLib/GPUTool/GPUTool.cuh +++ b/GPUBaseLib/GPUTool/GPUTool.cuh @@ -21,7 +21,7 @@ #define BLOCK_SIZE 256 #define SHAREMEMORY_BYTE 49152 #define SHAREMEMORY_FLOAT_HALF_STEP 2 -#define SHAREMEMORY_FLOAT_HALF SHAREMEMORY_FLOAT_HALF_STEP*BLOCK_SIZE +#define SHAREMEMORY_FLOAT_HALF 512 #define SHAREMEMORY_DEM_STEP 768 #define SHAREMEMORY_Reflect 612 diff --git a/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cu b/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cu index bb27ce1..62b3a4d 100644 --- a/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cu +++ b/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cu @@ -490,16 +490,14 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern( double maxGain, double GainWeight, float* d_temp_R, float* d_temp_amps// 计算输出 ) { - long long tid = threadIdx.x; - long long bid = blockIdx.x; - long long dmx = blockDim.x; - long long idx = bid*dmx+tid; // 获取当前的线程编码 - long long prfId = idx / SHAREMEMORY_FLOAT_HALF; - long long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点 - - //if (prfId > 20000) { - // printf("prfid %d,PRFCount : %d\n", prfId, PRFCount); - //} + int tid = threadIdx.x; + int bid = blockIdx.x; + int dmx = blockDim.x; + int idx = bid*dmx+tid; // 获取当前的线程编码 + int prfId = idx / SHAREMEMORY_FLOAT_HALF; + int posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点 + //d_temp_R[idx] = pixelcount; + //d_temp_amps[idx] = posId; if (prfId < PRFCount && posId < pixelcount) { SateState antp = antlist[prfId]; @@ -510,7 +508,7 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern( double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离 if (RstRFarR) { - d_temp_R[idx] = RstR; + d_temp_R[idx] = 0; d_temp_amps[idx] = 0; return; } @@ -530,8 +528,8 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern( float localangle = acosf(dotAB / (slopR)); if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) { - d_temp_R[idx] = RstR; - d_temp_amps[idx] = localangle; + d_temp_R[idx] = 0; + d_temp_amps[idx] = 0; return; } else {} @@ -562,41 +560,24 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern( sigma = powf(10.0, sigma / 10.0); float temp_amp = float(ampGain * Pt * sigma); - float temp_R = float(RstR - refPhaseRange); - + double temp_R = RstR - refPhaseRange; if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) { + d_temp_R[idx] = 0; + d_temp_amps[idx] = 0; return; } else { - d_temp_amps[idx] = temp_amp ; - d_temp_R[idx] = temp_R ; - //if (prfId > 200) { - // printf("idx:%lld , prfid:%lld , d_temp_amp=%e,d_temp_R=%e\n", idx, prfId,d_temp_amps[idx], d_temp_R[idx]); - //} - //return; + d_temp_amps[idx] = temp_amp; + d_temp_R[idx] =static_cast(temp_R); + return; } - //if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) { - // printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R); - // d_temp_R[idx] = 0; - // d_temp_amps[idx] = 0; - // return; - //} - //else { - // if (prfId > 200) { - // printf("error prfId:%ld RstR:%e antP=[%e,%e,%e] ,gp=[%e,%e,%e],temp_amp=%e,temp_R=%e\n", prfId, RstR, - // antp.Px, antp.Py, antp.Pz, gp.Tx, gp.Ty, gp.Tz, - // temp_amp, temp_R); - // } - // d_temp_amps[idx] = temp_amp; - // d_temp_R[idx] = temp_R; - // return; - //} + } } } else { d_temp_R[idx] = 0; - d_temp_amps[idx] = 0; + d_temp_amps[idx] =0; return; } @@ -612,24 +593,20 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern( __shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存 __shared__ float s_amp[SHAREMEMORY_FLOAT_HALF]; - long long tid = threadIdx.x; - long long bid = blockIdx.x; - long long idx = bid * blockDim.x + tid; - long long prfId = idx / nextfreqNum; // 脉冲ID - long long fId = idx % nextfreqNum;//频率ID + long tid = threadIdx.x; + long bid = blockIdx.x; + long idx = bid * blockDim.x + tid; + long prfId = idx / nextfreqNum; // 脉冲ID + long fId = idx % nextfreqNum;//频率ID - long long psid = 0; - long long pixelId = 0; + long psid = 0; + long pixelId = 0; for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF psid = ii * BLOCK_SIZE + tid; pixelId = prfId * SHAREMEMORY_FLOAT_HALF + psid; s_R[psid] = d_temp_R[pixelId]; s_amp[psid] = d_temp_amps[pixelId]; - if (prfId > 200) { - printf(" s_R=%e S_amp=%e d_temp_R=%e d_temp_amps=%e idx=%lld prfid=%lld\n", s_R[psid], s_amp[psid], - d_temp_R[pixelId], d_temp_amps[pixelId], idx, prfId - ); - } + } __syncthreads(); // 确定所有待处理数据都已经进入程序中 @@ -656,11 +633,11 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern( //if (dataid > 5000) { // printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]); //} - if (isnan(temp_phi) || isnan(temp_amp) || isnan(echo.x) || isnan(echo.y) - || isinf(temp_phi) || isinf(temp_amp) || isinf(echo.x) || isinf(echo.y) - ) { - printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, echo.x, echo.y); - } + //if (isnan(temp_phi) || isnan(temp_amp) || isnan(echo.x) || isnan(echo.y) + // || isinf(temp_phi) || isinf(temp_amp) || isinf(echo.x) || isinf(echo.y) + // ) { + // printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, echo.x, echo.y); + //} } @@ -710,9 +687,9 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid, float* h_R, float* h PrintLasterError("CUDA_Kernel_Computer_R_amp"); cudaDeviceSynchronize(); - DeviceToHost(h_R, d_R, task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float)); - DeviceToHost(h_amp, d_amps, task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float)); - break; + //DeviceToHost(h_R, d_R, task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float)); + //DeviceToHost(h_amp, d_amps, task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float)); + //break; cudaBlocknum = (task.prfNum * BLOCK_FREQNUM + BLOCK_SIZE- 1) / BLOCK_SIZE; CUDA_Kernel_Computer_echo_NoAntPattern << > > ( diff --git a/Toolbox/SimulationSARTool/SimulationSAR/QImageSARRFPC.ui b/Toolbox/SimulationSARTool/SimulationSAR/QImageSARRFPC.ui index 5d7afcf..9ff2ec6 100644 --- a/Toolbox/SimulationSARTool/SimulationSAR/QImageSARRFPC.ui +++ b/Toolbox/SimulationSARTool/SimulationSAR/QImageSARRFPC.ui @@ -103,7 +103,7 @@ - D:/FZSimulation/LTDQ/Input/testEcho/LandCover.dat + D:/FZSimulation/LTDQ/Input/DEM30/LandCover.dat @@ -233,7 +233,7 @@ - D:/FZSimulation/LTDQ/Input/testEcho/DEMXYZ.dat + D:/FZSimulation/LTDQ/Input/DEM30/DEM_XYZ.dat @@ -350,7 +350,7 @@ - D:/FZSimulation/LTDQ/Input/testEcho/DEMSloper.dat + D:/FZSimulation/LTDQ/Input/DEM30/DEM_Sloper.dat diff --git a/Toolbox/SimulationSARTool/SimulationSAR/RFPCProcessCls.cpp b/Toolbox/SimulationSARTool/SimulationSAR/RFPCProcessCls.cpp index fe5dc8a..7001179 100644 --- a/Toolbox/SimulationSARTool/SimulationSAR/RFPCProcessCls.cpp +++ b/Toolbox/SimulationSARTool/SimulationSAR/RFPCProcessCls.cpp @@ -1119,7 +1119,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si h_antlist.get()[0].Vx, h_antlist.get()[0].Vy, h_antlist.get()[0].Vz, h_antlist.get()[0].antDirectX, h_antlist.get()[0].antDirectY, h_antlist.get()[0].antDirectZ ); - testOutAmpArr(QString("antlist_%1.dat").arg(devId), (double*)(h_antlist.get()), prfcount, 9); + //testOutAmpArr(QString("antlist_%1.dat").arg(devId), (double*)(h_antlist.get()), prfcount, 9); } @@ -1136,13 +1136,11 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si task.targetnum = clscount; task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId); HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount); - - - ProcessRFPCTask(task, devId, h_R, h_amp); - testOutDataArr(QString("h_R_%1.bin").arg(devId), h_R, prfcount, SHAREMEMORY_FLOAT_HALF); - exit(-1); + //testOutDataArr(QString("h_R_%1.bin").arg(devId), h_R, prfcount, SHAREMEMORY_FLOAT_HALF); + //testOutDataArr(QString("h_amp_%1.bin").arg(devId), h_amp, prfcount, SHAREMEMORY_FLOAT_HALF); + //exit(-1); FreeCUDADevice(task.goallist); } @@ -1153,7 +1151,8 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si FreeCUDADevice(task.d_echoData); FreeCUDADevice(task.antlist); //FreeCUDADevice(task.goallist); - + FreeCUDAHost(h_R); + FreeCUDAHost(h_amp); } PRINT("dem cover processbar: [100 precent]\n");