修复bug
parent
6a62a17dc2
commit
cb9012e750
|
|
@ -485,13 +485,13 @@ __global__ void CUDA_Kernel_RFPC(
|
||||||
cuComplex* echodata
|
cuComplex* echodata
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
|
||||||
__shared__ GoalState Ts[SHAREMEMORY_DEM_STEP];
|
__shared__ GoalState Ts[SHAREMEMORY_DEM_STEP];
|
||||||
long threadid = threadIdx.x;
|
|
||||||
|
size_t threadid = threadIdx.x;
|
||||||
|
|
||||||
long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
|
size_t idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
|
||||||
long prfid = floorf(idx / Freqcount);
|
size_t prfid = floorf(idx / Freqcount);
|
||||||
long freqid = idx % Freqcount;
|
size_t freqid = idx % Freqcount;
|
||||||
// printf("%d,%d ",prfid,freqid);
|
// printf("%d,%d ",prfid,freqid);
|
||||||
if (prfid < PRFCount && freqid < Freqcount)
|
if (prfid < PRFCount && freqid < Freqcount)
|
||||||
{
|
{
|
||||||
|
|
@ -565,9 +565,9 @@ __global__ void CUDA_Kernel_RFPC(
|
||||||
|
|
||||||
extern "C" void ProcessRFPCTask(RFPCTask& task)
|
extern "C" void ProcessRFPCTask(RFPCTask& task)
|
||||||
{
|
{
|
||||||
long pixelcount = task.prfNum * task.freqNum;
|
size_t pixelcount = task.prfNum * task.freqNum;
|
||||||
long grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||||
printf("start %d ,%d,%d\n", task.targetnum, grid_size, BLOCK_SIZE);
|
printf("start %d,%d ,%d,%d\n", pixelcount, task.targetnum, grid_size, BLOCK_SIZE);
|
||||||
CUDA_Kernel_RFPC << <grid_size, BLOCK_SIZE >> > (
|
CUDA_Kernel_RFPC << <grid_size, BLOCK_SIZE >> > (
|
||||||
task.antlist,
|
task.antlist,
|
||||||
task.prfNum,task.freqNum,
|
task.prfNum,task.freqNum,
|
||||||
|
|
|
||||||
|
|
@ -939,23 +939,23 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
|
||||||
|
|
||||||
long prfcount = this->EchoSimulationData->getPluseCount();
|
long prfcount = this->EchoSimulationData->getPluseCount();
|
||||||
size_t prfblockcount = (prfcount + num_devices +2- 1) / num_devices;
|
size_t prfblockcount = (prfcount + num_devices +2- 1) / num_devices;
|
||||||
|
PRINT("PRF COUNT : %d , child PRF COUNT: %d\n", prfcount, prfblockcount);
|
||||||
double prf_time = 0;
|
double prf_time = 0;
|
||||||
double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔
|
double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔
|
||||||
bool antflag = true; // 计算天线方向图
|
bool antflag = true; // 计算天线方向图
|
||||||
long double imageStarttime = this->TaskSetting->getSARImageStartTime();
|
long double imageStarttime = this->TaskSetting->getSARImageStartTime();
|
||||||
std::shared_ptr<SatelliteOribtNode[]> sateOirbtNodes = this->getSatelliteOribtNodes(prf_time, dt, antflag, imageStarttime);
|
std::shared_ptr<SatelliteOribtNode[]> sateOirbtNodes = this->getSatelliteOribtNodes(prf_time, dt, antflag, imageStarttime);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int devid = 0; devid < num_devices; devid++) {
|
for (int devid = 0; devid < num_devices; devid++) {
|
||||||
cudaSetDevice(devid); // 确保当前线程操作指定的GPU设备
|
cudaSetDevice(devid); // 确保当前线程操作指定的GPU设备
|
||||||
this->RFPCMainProcess_GPU_NoAntPattern(0, 0, devid);
|
|
||||||
size_t startTid = devid * prfblockcount;
|
size_t startTid = devid * prfblockcount;
|
||||||
size_t prf_devLen = prfblockcount;
|
size_t prf_devLen = prfblockcount;
|
||||||
prf_devLen = (startTid + prf_devLen) < prfcount ? prf_devLen : (prfcount - startTid);
|
prf_devLen = (startTid + prf_devLen) < prfcount ? prf_devLen : (prfcount - startTid);
|
||||||
|
PRINT("dev ID:%d,start PRF ID: %d , PRF COUNT: %d \n", devid, startTid, prf_devLen);
|
||||||
this->RFPCMainProcess_GPU_NoAntPattern(startTid, prf_devLen, devid);
|
this->RFPCMainProcess_GPU_NoAntPattern(startTid, prf_devLen, devid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -965,6 +965,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
|
||||||
|
|
||||||
ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount, int devId)
|
ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount, int devId)
|
||||||
{
|
{
|
||||||
|
PRINT("dev ID:%d,start PRF ID: %d , PRF COUNT: %d \n", devId,startprfid,prfcount);
|
||||||
/// 显存不限制
|
/// 显存不限制
|
||||||
cudaSetDevice(devId); // 确保当前线程操作指定的GPU设备
|
cudaSetDevice(devId); // 确保当前线程操作指定的GPU设备
|
||||||
POLARTYPEENUM polartype = this->TaskSetting->getPolarType();
|
POLARTYPEENUM polartype = this->TaskSetting->getPolarType();
|
||||||
|
|
@ -1094,14 +1095,14 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
|
||||||
}
|
}
|
||||||
|
|
||||||
// 分块计算
|
// 分块计算
|
||||||
for (const auto& pair : clsCUDASigmaParamsDict) {
|
for (const auto& pair : clsGoalStateDict) {
|
||||||
long clsid = pair.first;
|
long clsid = pair.first;
|
||||||
size_t clscount = clsCountDict[clsid];
|
size_t clscount = clsCountDict[clsid];
|
||||||
PRINT("Process Class ID : %d , Count: %d\n", clsid, clscount);
|
PRINT("Process Class ID : %d , Count: %d\n", clsid, clscount);
|
||||||
task.targetnum = clscount;
|
task.targetnum = clscount;
|
||||||
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
|
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
|
||||||
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
|
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
|
||||||
task.sigma0_cls = pair.second;
|
task.sigma0_cls = clsCUDASigmaParamsDict[clsid];
|
||||||
ProcessRFPCTask(task);
|
ProcessRFPCTask(task);
|
||||||
FreeCUDADevice(task.goallist);
|
FreeCUDADevice(task.goallist);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue