diff --git a/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cu b/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cu index 049b07b..bb27ce1 100644 --- a/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cu +++ b/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cu @@ -490,7 +490,10 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern( double maxGain, double GainWeight, float* d_temp_R, float* d_temp_amps// 计算输出 ) { - long long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码 + long long tid = threadIdx.x; + long long bid = blockIdx.x; + long long dmx = blockDim.x; + long long idx = bid*dmx+tid; // 获取当前的线程编码 long long prfId = idx / SHAREMEMORY_FLOAT_HALF; long long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点 @@ -506,26 +509,13 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern( double RstZ = antp.Pz - gp.Tz; double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离 - - if (RstRFarR) { - - if (prfId > 200) { - printf("error prfId:%d ,idx:%d RstR:%e antP=[%e,%e,%e] ,gp=[%e,%e,%e]\n", prfId, idx, RstR, - antp.Px, antp.Py, antp.Pz, gp.Tx, gp.Ty, gp.Tz - ); - } - - d_temp_R[idx] = 0; + d_temp_R[idx] = RstR; d_temp_amps[idx] = 0; return; } else { - if (prfId > 200) { - printf(" prfId:%d ,idx:%d RstR:%e antP=[%e,%e,%e] ,gp=[%e,%e,%e]\n", prfId, idx, RstR, - antp.Px, antp.Py, antp.Pz, gp.Tx, gp.Ty, gp.Tz - ); - } + RstX = RstX / RstR; RstY = RstY / RstR; RstZ = RstZ / RstR; @@ -540,8 +530,8 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern( float localangle = acosf(dotAB / (slopR)); if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) { - d_temp_R[idx] = 0; - d_temp_amps[idx] = 0; + d_temp_R[idx] = RstR; + d_temp_amps[idx] = localangle; return; } else {} @@ -572,21 +562,35 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern( sigma = powf(10.0, sigma / 10.0); float temp_amp = float(ampGain * Pt * sigma); - float temp_R = float(double(RstR - refPhaseRange)); + float temp_R = float(RstR - refPhaseRange); if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) { - printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R); - d_temp_R[idx] = 0; - d_temp_amps[idx] = 0; return; } else { - - d_temp_amps[idx] = temp_amp ; - d_temp_R[idx] = temp_R; - - return; + d_temp_amps[idx] = temp_amp ; + d_temp_R[idx] = temp_R ; + //if (prfId > 200) { + // printf("idx:%lld , prfid:%lld , d_temp_amp=%e,d_temp_R=%e\n", idx, prfId,d_temp_amps[idx], d_temp_R[idx]); + //} + //return; } + //if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) { + // printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R); + // d_temp_R[idx] = 0; + // d_temp_amps[idx] = 0; + // return; + //} + //else { + // if (prfId > 200) { + // printf("error prfId:%ld RstR:%e antP=[%e,%e,%e] ,gp=[%e,%e,%e],temp_amp=%e,temp_R=%e\n", prfId, RstR, + // antp.Px, antp.Py, antp.Pz, gp.Tx, gp.Ty, gp.Tz, + // temp_amp, temp_R); + // } + // d_temp_amps[idx] = temp_amp; + // d_temp_R[idx] = temp_R; + // return; + //} } } } @@ -622,7 +626,9 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern( s_R[psid] = d_temp_R[pixelId]; s_amp[psid] = d_temp_amps[pixelId]; if (prfId > 200) { - printf("prfId:%d ,idx:%d s_R:%f S_amp:%f \n", prfId, idx, s_R[psid], s_amp[psid]); + printf(" s_R=%e S_amp=%e d_temp_R=%e d_temp_amps=%e idx=%lld prfid=%lld\n", s_R[psid], s_amp[psid], + d_temp_R[pixelId], d_temp_amps[pixelId], idx, prfId + ); } } @@ -667,7 +673,7 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern( /** 分块处理 ****************************************************************************************************************/ -extern "C" void ProcessRFPCTask(RFPCTask& task, long devid) +extern "C" void ProcessRFPCTask(RFPCTask& task, long devid, float* h_R, float* h_amp) { size_t pixelcount = task.prfNum * task.freqNum; size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE; @@ -686,6 +692,7 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid) long prfcount = task.prfNum; long process = 0; for (long sTi = 0; sTi < task.targetnum; sTi = sTi + SHAREMEMORY_FLOAT_HALF) { + cudaBlocknum = (task.prfNum * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE; Kernel_Computer_R_amp_NoAntPattern << > > ( task.antlist, @@ -703,6 +710,9 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid) PrintLasterError("CUDA_Kernel_Computer_R_amp"); cudaDeviceSynchronize(); + DeviceToHost(h_R, d_R, task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float)); + DeviceToHost(h_amp, d_amps, task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float)); + break; cudaBlocknum = (task.prfNum * BLOCK_FREQNUM + BLOCK_SIZE- 1) / BLOCK_SIZE; CUDA_Kernel_Computer_echo_NoAntPattern << > > ( @@ -718,6 +728,11 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid) process = sTi * 100.0 / task.targetnum; PRINT("device ID : %d , TargetID [%f]: %d / %d finished %d\n", devid, sTi * 100.0 / task.targetnum, sTi, task.targetnum, devid); } + + + + + } diff --git a/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cuh b/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cuh index 48ec1be..0515694 100644 --- a/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cuh +++ b/Toolbox/SimulationSARTool/SimulationSAR/GPURFPC.cuh @@ -16,11 +16,7 @@ extern "C" struct SateState { - double Px, Py, Pz, Vx, Vy, Vz; - //double antXaxisX, antXaxisY, antXaxisZ; - //double antYaxisX, antYaxisY, antYaxisZ; - //double antZaxisX, antZaxisY, antZaxisZ; - double antDirectX, antDirectY, antDirectZ; + double Px, Py, Pz, Vx, Vy, Vz,antDirectX, antDirectY, antDirectZ; }; @@ -152,7 +148,7 @@ extern "C" void CUDA_RFPC_MainProcess( extern "C" double* hostSigmaData_toDevice(int devid); -extern "C" void ProcessRFPCTask(RFPCTask& task,long devid); +extern "C" void ProcessRFPCTask(RFPCTask& task,long devid,float* h_R,float* h_amp); diff --git a/Toolbox/SimulationSARTool/SimulationSAR/QImageSARRFPC.ui b/Toolbox/SimulationSARTool/SimulationSAR/QImageSARRFPC.ui index 3d6693a..5d7afcf 100644 --- a/Toolbox/SimulationSARTool/SimulationSAR/QImageSARRFPC.ui +++ b/Toolbox/SimulationSARTool/SimulationSAR/QImageSARRFPC.ui @@ -103,7 +103,7 @@ - D:/FZSimulation/LTDQ/Input/LandCover.dat + D:/FZSimulation/LTDQ/Input/testEcho/LandCover.dat @@ -233,7 +233,7 @@ - D:/FZSimulation/LTDQ/Input/DEM_XYZ.dat + D:/FZSimulation/LTDQ/Input/testEcho/DEMXYZ.dat @@ -350,7 +350,7 @@ - D:/FZSimulation/LTDQ/Input/DEM_Sloper.dat + D:/FZSimulation/LTDQ/Input/testEcho/DEMSloper.dat diff --git a/Toolbox/SimulationSARTool/SimulationSAR/RFPCProcessCls.cpp b/Toolbox/SimulationSARTool/SimulationSAR/RFPCProcessCls.cpp index e704c0f..fe5dc8a 100644 --- a/Toolbox/SimulationSARTool/SimulationSAR/RFPCProcessCls.cpp +++ b/Toolbox/SimulationSARTool/SimulationSAR/RFPCProcessCls.cpp @@ -1114,9 +1114,18 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si task.antlist = (SateState*)mallocCUDADevice(prfcount * sizeof(SateState), devId); HostToDevice(h_antlist.get(), task.antlist, sizeof(SateState) * prfcount); - + printf("h_antlist: %e,%e,%e,%e,%e,%e,%e,%e,%e \n", + h_antlist.get()[0].Px, h_antlist.get()[0].Py, h_antlist.get()[0].Pz, + h_antlist.get()[0].Vx, h_antlist.get()[0].Vy, h_antlist.get()[0].Vz, + h_antlist.get()[0].antDirectX, h_antlist.get()[0].antDirectY, h_antlist.get()[0].antDirectZ + ); + testOutAmpArr(QString("antlist_%1.dat").arg(devId), (double*)(h_antlist.get()), prfcount, 9); } + + float* h_R = (float*)mallocCUDAHost(sizeof(float) * prfcount * SHAREMEMORY_FLOAT_HALF); //2GB 璺濈 + float* h_amp = (float*)mallocCUDAHost(sizeof(float) * prfcount * SHAREMEMORY_FLOAT_HALF);//2GB 寮哄害 + // 鍒嗗潡璁$畻 for (const auto& pair : clsGoalStateDict) { long clsid = pair.first; @@ -1128,7 +1137,12 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId); HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount); - ProcessRFPCTask(task, devId); + + + + ProcessRFPCTask(task, devId, h_R, h_amp); + testOutDataArr(QString("h_R_%1.bin").arg(devId), h_R, prfcount, SHAREMEMORY_FLOAT_HALF); + exit(-1); FreeCUDADevice(task.goallist); }