修复bug

pull/13/head
陈增辉 2025-03-23 18:07:40 +08:00
parent 6a62a17dc2
commit cb9012e750
2 changed files with 14 additions and 13 deletions

View File

@ -485,13 +485,13 @@ __global__ void CUDA_Kernel_RFPC(
cuComplex* echodata cuComplex* echodata
) )
{ {
__shared__ GoalState Ts[SHAREMEMORY_DEM_STEP]; __shared__ GoalState Ts[SHAREMEMORY_DEM_STEP];
long threadid = threadIdx.x;
long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码 size_t threadid = threadIdx.x;
long prfid = floorf(idx / Freqcount);
long freqid = idx % Freqcount; size_t idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
size_t prfid = floorf(idx / Freqcount);
size_t freqid = idx % Freqcount;
// printf("%d,%d ",prfid,freqid); // printf("%d,%d ",prfid,freqid);
if (prfid < PRFCount && freqid < Freqcount) if (prfid < PRFCount && freqid < Freqcount)
{ {
@ -565,9 +565,9 @@ __global__ void CUDA_Kernel_RFPC(
extern "C" void ProcessRFPCTask(RFPCTask& task) extern "C" void ProcessRFPCTask(RFPCTask& task)
{ {
long pixelcount = task.prfNum * task.freqNum; size_t pixelcount = task.prfNum * task.freqNum;
long grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE; size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
printf("start %d ,%d,%d\n", task.targetnum, grid_size, BLOCK_SIZE); printf("start %d,%d ,%d,%d\n", pixelcount, task.targetnum, grid_size, BLOCK_SIZE);
CUDA_Kernel_RFPC << <grid_size, BLOCK_SIZE >> > ( CUDA_Kernel_RFPC << <grid_size, BLOCK_SIZE >> > (
task.antlist, task.antlist,
task.prfNum,task.freqNum, task.prfNum,task.freqNum,

View File

@ -939,7 +939,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
long prfcount = this->EchoSimulationData->getPluseCount(); long prfcount = this->EchoSimulationData->getPluseCount();
size_t prfblockcount = (prfcount + num_devices +2- 1) / num_devices; size_t prfblockcount = (prfcount + num_devices +2- 1) / num_devices;
PRINT("PRF COUNT : %d , child PRF COUNT: %d\n", prfcount, prfblockcount);
double prf_time = 0; double prf_time = 0;
double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔 double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔
bool antflag = true; // 计算天线方向图 bool antflag = true; // 计算天线方向图
@ -952,10 +952,10 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
#pragma omp parallel for #pragma omp parallel for
for (int devid = 0; devid < num_devices; devid++) { for (int devid = 0; devid < num_devices; devid++) {
cudaSetDevice(devid); // 确保当前线程操作指定的GPU设备 cudaSetDevice(devid); // 确保当前线程操作指定的GPU设备
this->RFPCMainProcess_GPU_NoAntPattern(0, 0, devid);
size_t startTid = devid * prfblockcount; size_t startTid = devid * prfblockcount;
size_t prf_devLen = prfblockcount; size_t prf_devLen = prfblockcount;
prf_devLen = (startTid + prf_devLen) < prfcount ? prf_devLen : (prfcount - startTid); prf_devLen = (startTid + prf_devLen) < prfcount ? prf_devLen : (prfcount - startTid);
PRINT("dev ID:%d,start PRF ID: %d , PRF COUNT: %d \n", devid, startTid, prf_devLen);
this->RFPCMainProcess_GPU_NoAntPattern(startTid, prf_devLen, devid); this->RFPCMainProcess_GPU_NoAntPattern(startTid, prf_devLen, devid);
} }
@ -965,6 +965,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount, int devId) ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount, int devId)
{ {
PRINT("dev ID:%d,start PRF ID: %d , PRF COUNT: %d \n", devId,startprfid,prfcount);
/// 显存不限制 /// 显存不限制
cudaSetDevice(devId); // 确保当前线程操作指定的GPU设备 cudaSetDevice(devId); // 确保当前线程操作指定的GPU设备
POLARTYPEENUM polartype = this->TaskSetting->getPolarType(); POLARTYPEENUM polartype = this->TaskSetting->getPolarType();
@ -1094,14 +1095,14 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
} }
// 分块计算 // 分块计算
for (const auto& pair : clsCUDASigmaParamsDict) { for (const auto& pair : clsGoalStateDict) {
long clsid = pair.first; long clsid = pair.first;
size_t clscount = clsCountDict[clsid]; size_t clscount = clsCountDict[clsid];
PRINT("Process Class ID : %d , Count: %d\n", clsid, clscount); PRINT("Process Class ID : %d , Count: %d\n", clsid, clscount);
task.targetnum = clscount; task.targetnum = clscount;
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId); task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount); HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
task.sigma0_cls = pair.second; task.sigma0_cls = clsCUDASigmaParamsDict[clsid];
ProcessRFPCTask(task); ProcessRFPCTask(task);
FreeCUDADevice(task.goallist); FreeCUDADevice(task.goallist);
} }