新增多卡支持

pull/13/head
陈增辉 2025-03-23 16:01:28 +08:00
parent 0006890b96
commit 8bd997f924
15 changed files with 660 additions and 14 deletions

View File

@ -339,6 +339,12 @@ inline long nextpow2(long n) {
return pow(2,en); return pow(2,en);
} }
inline void releaseVoidArray(void* a)
{
free(a);
a = NULL;
}
#endif #endif

View File

@ -511,6 +511,19 @@ ErrorCode EchoL0Dataset::loadFromXml() {
return ErrorCode::SUCCESS; return ErrorCode::SUCCESS;
} }
std::shared_ptr<SatelliteAntPos> EchoL0Dataset::getAntPosVelc()
{
omp_lock_t lock;
omp_init_lock(&lock);
omp_set_lock(&lock);
long prfcount = this->PluseCount;
std::shared_ptr<SatelliteAntPos> antposlist= SatelliteAntPosOperator::readAntPosFile(this->GPSPointFilePath, prfcount);
omp_unset_lock(&lock); //
omp_destroy_lock(&lock); //
return antposlist;
}
std::shared_ptr<double> EchoL0Dataset::getAntPos() std::shared_ptr<double> EchoL0Dataset::getAntPos()
{ {
omp_lock_t lock; omp_lock_t lock;

View File

@ -209,6 +209,7 @@ public: //
public: public:
// 读取文件 // 读取文件
std::shared_ptr< SatelliteAntPos> getAntPosVelc();
std::shared_ptr<double> getAntPos(); std::shared_ptr<double> getAntPos();
std::shared_ptr<std::complex<double>> getEchoArr(long startPRF, long& PRFLen); std::shared_ptr<std::complex<double>> getEchoArr(long startPRF, long& PRFLen);
std::shared_ptr<std::complex<double>> getEchoArr(); std::shared_ptr<std::complex<double>> getEchoArr();

View File

@ -32,6 +32,7 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="GPUTool\GPUBaseLibAPI.h" /> <ClInclude Include="GPUTool\GPUBaseLibAPI.h" />
<ClInclude Include="GPUTool\GPUBaseTool.h" />
<ClInclude Include="GPUTool\GPUDouble32.cuh" /> <ClInclude Include="GPUTool\GPUDouble32.cuh" />
<CudaCompile Include="GPUTool\GPUTool.cuh" /> <CudaCompile Include="GPUTool\GPUTool.cuh" />
</ItemGroup> </ItemGroup>
@ -40,6 +41,9 @@
<Project>{872ecd6f-30e3-4a1b-b17c-15e87d373ff6}</Project> <Project>{872ecd6f-30e3-4a1b-b17c-15e87d373ff6}</Project>
</ProjectReference> </ProjectReference>
</ItemGroup> </ItemGroup>
<ItemGroup>
<ClCompile Include="GPUTool\GPUBaseTool.cpp" />
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<VCProjectVersion>17.0</VCProjectVersion> <VCProjectVersion>17.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>
@ -180,7 +184,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;GPUBASELIB_API;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>NDEBUG;_CONSOLE;GPUBASELIB_API;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp14</LanguageStandard> <LanguageStandard>stdcpp14</LanguageStandard>
<LanguageStandard_C>stdc11</LanguageStandard_C> <LanguageStandard_C>stdc11</LanguageStandard_C>

View File

@ -35,5 +35,13 @@
<ClInclude Include="GPUTool\GPUDouble32.cuh"> <ClInclude Include="GPUTool\GPUDouble32.cuh">
<Filter>GPUTool</Filter> <Filter>GPUTool</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="GPUTool\GPUBaseTool.h">
<Filter>GPUTool</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="GPUTool\GPUBaseTool.cpp">
<Filter>GPUTool</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -0,0 +1,56 @@
#include "GPUBaseTool.h"
// »ñÈ¡Îļþ´óС
extern "C" size_t getfsize(FILE* fp)
{
fseek(fp, 0L, SEEK_END);
size_t size = ftell(fp);
fseek(fp, 0L, SEEK_SET);
return size;
}
extern "C" unsigned char* loadBinFromPath(char* binPath, size_t* binpath_len)
{
FILE* fd = fopen(binPath, "rb");
if (NULL == fd)
{
perror("Failed to open file");
return NULL;
}
size_t f_len = getfsize(fd);
*binpath_len = f_len;
unsigned char* buffer = (unsigned char*)malloc(f_len * sizeof(unsigned char));
if (buffer == NULL)
{
perror("Failed to allocate memory");
fclose(fd);
return NULL;
}
size_t fread_count = fread(buffer, 1, f_len, fd);
fclose(fd);
if (fread_count != f_len)
{
releaseVoidArray(buffer);
return NULL;
}
return buffer;
}
extern "C" void writeComplexDataBinFile(char* dataPath, size_t datalen, cuComplex* data)
{
FILE* pd = fopen(dataPath, "w");
double* tempdata = (double*)malloc(datalen * 2 * sizeof(double));
for (long i = 0; i < datalen; i++)
{
tempdata[i * 2 + 0] = data[i].x;
tempdata[i * 2 + 1] = data[i].y;
}
fwrite(tempdata, sizeof(double), datalen * 2, pd);
fclose(pd);
}

View File

@ -0,0 +1,25 @@
#ifndef __GPUBASETOOL_H__
#define __GPUBASETOOL_H__
#include "GPUBaseLibAPI.h"
#include "BaseConstVariable.h"
#include <iostream>
#include <memory>
#include <complex>
#include "GPUTool.cuh"
extern "C" GPUBASELIBAPI size_t getfsize(FILE* fp);
extern "C" GPUBASELIBAPI unsigned char* loadBinFromPath(char* binPath, size_t* binpath_len);
extern "C" GPUBASELIBAPI void writeComplexDataBinFile(char* dataPath, size_t datalen, cuComplex* data);
template<typename T>
inline std::shared_ptr<T> CPUToHost(std::shared_ptr<T> CPUArr, size_t len) {
std::shared_ptr<T> result = std::shared_ptr<T>((T*)mallocCUDAHost(len*sizeof(T)), FreeCUDAHost);
for (size_t i = 0; i < len; i++) {
result.get()[i] = CPUArr.get()[i];
}
return result;
}
#endif // !__GPUBASETOOL_H__

View File

@ -16,11 +16,13 @@
#define LAMP_CUDA_PI 3.141592653589793238462643383279 #define LAMP_CUDA_PI 3.141592653589793238462643383279
// SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE = SHAREMEMORY_FLOAT_HALF // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE = SHAREMEMORY_FLOAT_HALF
/** CUDA 调用参数 ************************************************************************************/
#define BLOCK_SIZE 256 #define BLOCK_SIZE 256
#define SHAREMEMORY_BYTE 49152 #define SHAREMEMORY_BYTE 49152
#define SHAREMEMORY_FLOAT_HALF 6144 #define SHAREMEMORY_FLOAT_HALF 6144
#define SHAREMEMORY_FLOAT_HALF_STEP 24 #define SHAREMEMORY_FLOAT_HALF_STEP 24
#define SHAREMEMORY_DEM_STEP 768
#define SHAREMEMORY_Reflect 612
@ -110,6 +112,13 @@ extern "C" GPUBASELIBAPI void CUDAIFFT(cuComplex* inArr, cuComplex* outArr, long
extern "C" GPUBASELIBAPI void FFTShift1D(cuComplex* d_data, int batch_size, int signal_length); extern "C" GPUBASELIBAPI void FFTShift1D(cuComplex* d_data, int batch_size, int signal_length);
extern "C" GPUBASELIBAPI void shared_complexPtrToHostCuComplex(std::complex<double>* src, cuComplex* dst, size_t len); extern "C" GPUBASELIBAPI void shared_complexPtrToHostCuComplex(std::complex<double>* src, cuComplex* dst, size_t len);
extern "C" GPUBASELIBAPI void HostCuComplexToshared_complexPtr(cuComplex* src, std::complex<double>* dst, size_t len); extern "C" GPUBASELIBAPI void HostCuComplexToshared_complexPtr(cuComplex* src, std::complex<double>* dst, size_t len);
#endif #endif
#endif #endif

View File

@ -0,0 +1,132 @@
#include "BaseConstVariable.h"
#include "GPURFPC.cuh"
const double cls_sigma0[102][6] = {
{0, 0, 0, 0, 0, 0}, // 0
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{28.15, -39.73, 0.0986, 2.863, 4.356, -6.279}, // 10
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{36.13, -48.2, 0.1299, -1.838, 5.404, -4.015},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{183.5, -194.6, 0.0167, 2.952, -4.1, 6.026},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{50.97, -62.9, 0.0968, 1.604, 4.637, 6.108},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{-10.31, 15.96, 37.73, -4.584, 4.997, -3.569},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{-12.45, 0.1561, -3.187, -2.482, 8.244, 0.3632},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{-19.23, 0.3623, -2.209, 9.649, 0.1292, -0.264},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{56.61, -62.29, 0.01388, 2.767, -3.943, 5.995},
{0, 0, 0, 0, 0, 0} };
/** ²ÎÊý ***************************************************************/
extern "C" double* hostSigmaData_toDevice(int devid)
{
double* h_data = (double*)mallocCUDAHost(102 * 6 * sizeof(double));
double* d_data = (double*)mallocCUDADevice(102 * 6 * sizeof(double), devid);
printf("copy to ");
for (long i = 0; i < 102; i++)
{
printf(" %d ,", i);
for (long j = 0; j < 6; j++)
{
h_data[i * 6 + j] = cls_sigma0[i][j];
}
}
printf("host to device sigma data\n");
HostToDevice(h_data, d_data, 102 * 6 * sizeof(double));
FreeCUDAHost(h_data);
return d_data;
}

View File

@ -463,6 +463,128 @@ void CUDA_RFPC_MainProcess(
} }
/* 核函数 ****************************************************************************************************************************/
__global__ void CUDA_Kernel_RFPC(
SateState* antlist,
long PRFCount, long Freqcount, // 整体的脉冲数,
GoalState* goallist,
long demLen,
double StartFreqGHz, double FreqStep,
double refPhaseRange,
double NearR, double FarR,
CUDASigmaParam clsSigma0,
cuComplex* echodata
)
{
__shared__ GoalState Ts[SHAREMEMORY_DEM_STEP];
long threadid = threadIdx.x;
long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
long prfid = floorf(idx / Freqcount);
long freqid = idx % Freqcount;
// printf("%d,%d ",prfid,freqid);
if (prfid < PRFCount && freqid < Freqcount)
{
SateState antPos = antlist[prfid];
double factorjTemp = RFPCPIDIVLIGHT * (StartFreqGHz + freqid * FreqStep);
double Tx = 0;
double Ty = 0;
double Tz = 0;
double R = 0;
double incAngle = 0;
double echo_real = 0;
double echo_imag = 0;
cuComplex echo = make_cuComplex(0, 0);
for (long startid = 0; startid < demLen; startid = startid + SHAREMEMORY_DEM_STEP)
{
__syncthreads(); // 确定所有待处理数据都已经进入程序中
for (long i = 0; i < 3; i++) {
long ttid = startid + threadid + i * blockDim.x;
long stid = threadid + i * blockDim.x;
if ((stid < SHAREMEMORY_DEM_STEP) && (ttid < demLen)) {
Ts[stid] = goallist[ttid];
}
}
__syncthreads(); // 确定所有待处理数据都已经进入程序中
for (long tid = 0; tid < SHAREMEMORY_DEM_STEP; tid++)
{
if ((tid + startid) < demLen)
{
GoalState p = Ts[tid];
Tx = p.Tx;
Ty = p.Ty;
Tz = p.Tz;
Tx = antPos.Px - Tx; // T->P
Ty = antPos.Py - Ty;
Tz = antPos.Pz - Tz;
R = sqrt(Tx * Tx + Ty * Ty + Tz * Tz);
bool isNearFar = (R < NearR || R > FarR);
incAngle = sqrt(p.TsX * p.TsX + p.TsY * p.TsY + p.TsZ * p.TsZ);
incAngle = acos((Tx * p.TsX + Ty * p.TsY + Tz * p.TsZ) / (R * incAngle));
incAngle = GPU_getSigma0dB_params(clsSigma0.p1, clsSigma0.p2, clsSigma0.p3, clsSigma0.p4, clsSigma0.p5, clsSigma0.p6, incAngle); // sigma
incAngle = pow(10.0, incAngle / 10.0); // amp
incAngle = incAngle / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); //
R = (R - refPhaseRange);
R = factorjTemp * R;
echo_real = incAngle * cos(R)* isNearFar;
echo_imag = incAngle * sin(R)* isNearFar;
echo.x = echo.x + echo_real;
echo.y = echo.y + echo_imag;
}
}
}
echodata[idx] = cuCaddf(echodata[idx], echo);
}
}
/** 分块处理 ****************************************************************************************************************/
extern "C" void ProcessRFPCTask(RFPCTask& task)
{
long pixelcount = task.prfNum * task.freqNum;
long grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
printf("start %d ,%d,%d\n", task.targetnum, grid_size, BLOCK_SIZE);
CUDA_Kernel_RFPC << <grid_size, BLOCK_SIZE >> > (
task.antlist,
task.prfNum,task.freqNum,
task.goallist,
task.targetnum,
task.startFreq,task.stepFreq,
task.Rref,task.Rnear,task.Rfar,
task.sigma0_cls,
task.d_echoData
);
PrintLasterError("ProcessRFPCTask");
cudaDeviceSynchronize();
printf("start %d \n", task.targetnum);
}
#endif #endif

View File

@ -8,9 +8,26 @@
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuComplex.h> #include <cuComplex.h>
/** CUDA ľ÷ÓòÎĘý ************************************************************************************/
#define RFPCPIDIVLIGHT -4*PI/(LIGHTSPEED/1e9) #define RFPCPIDIVLIGHT -4*PI/(LIGHTSPEED/1e9)
extern "C" struct SateState {
double Px, Py, Pz, Vx, Vy, Vz;
};
extern "C" struct GoalState {
double Tx, Ty, Tz, TsX, TsY, TsZ;
size_t cls;
};
extern "C" struct CUDASigmaParam { extern "C" struct CUDASigmaParam {
double p1; double p1;
double p2; double p2;
@ -20,6 +37,59 @@ extern "C" struct CUDASigmaParam {
double p6; double p6;
}; };
extern "C" struct SloperDs
{
double* SloperX;
double* SloperY;
double* SloperZ;
double* SloperAngle;
};
extern "C" struct DEMDs
{
double* demX;
double* demY;
double* demZ;
};
extern "C" struct LandDataDs
{
unsigned char* landData;
};
extern "C" struct GPSPointsDs
{
double* Pxs;
double* Pys;
double* Pzs;
double* Vxs;
double* Vys;
double* Vzs;
};
extern "C" struct RFPCTask
{
double startFreq;
double stepFreq;
long freqNum;
long prfNum;
double Rnear;
double Rfar;
double Rref;
SateState* antlist = nullptr;
GoalState* goallist = nullptr;
cuComplex* d_echoData = nullptr; // ťŘ˛¨
CUDASigmaParam sigma0_cls;
long targetnum;
};
extern __device__ double GPU_getSigma0dB_params( extern __device__ double GPU_getSigma0dB_params(
@ -73,8 +143,8 @@ extern "C" void CUDA_RFPC_MainProcess(
extern "C" double* hostSigmaData_toDevice(int devid);
extern "C" void ProcessRFPCTask(RFPCTask& task);

View File

@ -1,6 +1,7 @@
 
#include "stdafx.h" #include "stdafx.h"
#include <complex> #include <complex>
#include "GPUBaseTool.h"
#include "RFPCProcessCls.h" #include "RFPCProcessCls.h"
#include "BaseConstVariable.h" #include "BaseConstVariable.h"
#include "SARSatelliteSimulationAbstractCls.h" #include "SARSatelliteSimulationAbstractCls.h"
@ -306,8 +307,8 @@ ErrorCode RFPCProcessCls::Process(long num_thread)
//return ErrorCode::SUCCESS; //return ErrorCode::SUCCESS;
stateCode = this->RFPCMainProcess_GPU(); //stateCode = this->RFPCMainProcess_GPU();
stateCode = this->RFPCMainProcess_MultiGPU_NoAntPattern();
if (stateCode != ErrorCode::SUCCESS) { if (stateCode != ErrorCode::SUCCESS) {
return stateCode; return stateCode;
} }
@ -542,17 +543,12 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
long freqnum_temp = freqnum; long freqnum_temp = freqnum;
float f0 = float(freqlist[0] / 1e9); float f0 = float(freqlist[0] / 1e9);
float dfreq = float((freqlist[1] - freqlist[0]) / 1e9); float dfreq = float((freqlist[1] - freqlist[0]) / 1e9);
qDebug() << "freqnum: " << freqnum << " f0: " << f0 << " dfreq: " << dfreq;
long PRFCount = this->EchoSimulationData->getPluseCount(); long PRFCount = this->EchoSimulationData->getPluseCount();
double NearRange = this->EchoSimulationData->getNearRange(); // 近斜距 double NearRange = this->EchoSimulationData->getNearRange(); // 近斜距
double FarRange = this->EchoSimulationData->getFarRange(); double FarRange = this->EchoSimulationData->getFarRange();
double Pt = this->TaskSetting->getPt() * this->TaskSetting->getGri();// 发射电压 1v double Pt = this->TaskSetting->getPt() * this->TaskSetting->getGri();// 发射电压 1v
double lamda = this->TaskSetting->getCenterLamda(); // 波长 double lamda = this->TaskSetting->getCenterLamda(); // 波长
double refphaseRange = this->TaskSetting->getRefphaseRange(); // 参考相位斜距 double refphaseRange = this->TaskSetting->getRefphaseRange(); // 参考相位斜距
double prf_time = 0; double prf_time = 0;
double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔 double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔
bool antflag = true; // 计算天线方向图 bool antflag = true; // 计算天线方向图
@ -935,6 +931,202 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
return ErrorCode::SUCCESS; return ErrorCode::SUCCESS;
} }
ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
{
int num_devices;
cudaGetDeviceCount(&num_devices);
PRINT("GPU Count : %d \n", num_devices);
long prfcount = this->EchoSimulationData->getPluseCount();
size_t prfblockcount = (prfcount + num_devices +2- 1) / num_devices;
double prf_time = 0;
double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔
bool antflag = true; // 计算天线方向图
long double imageStarttime = this->TaskSetting->getSARImageStartTime();
std::shared_ptr<SatelliteOribtNode[]> sateOirbtNodes = this->getSatelliteOribtNodes(prf_time, dt, antflag, imageStarttime);
#pragma omp parallel for
for (int devid = 0; devid < num_devices; devid++) {
cudaSetDevice(devid); // 确保当前线程操作指定的GPU设备
this->RFPCMainProcess_GPU_NoAntPattern(0, 0, devid);
size_t startTid = devid * prfblockcount;
size_t prf_devLen = prfblockcount;
prf_devLen = (startTid + prf_devLen) < prfcount ? prf_devLen : (prfcount - startTid);
this->RFPCMainProcess_GPU_NoAntPattern(startTid, prf_devLen, devid);
}
return ErrorCode::SUCCESS;
}
ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount, int devId)
{
/// 显存不限制
cudaSetDevice(devId); // 确保当前线程操作指定的GPU设备
POLARTYPEENUM polartype = this->TaskSetting->getPolarType();
std::map<long, SigmaParam> clssigmaParamsDict = this->SigmaDatabasePtr->getsigmaParams(polartype);;
std::map<long, CUDASigmaParam> clsCUDASigmaParamsDict;
for (const auto& pair : clssigmaParamsDict) {
clsCUDASigmaParamsDict.insert(std::pair<long, CUDASigmaParam>(pair.first,
CUDASigmaParam{
pair.second.p1,
pair.second.p2,
pair.second.p3,
pair.second.p4,
pair.second.p5,
pair.second.p6
}));
}
// 读取类别
gdalImage demxyz(this->demxyzPath);// 地面点坐标
gdalImage demlandcls(this->LandCoverPath);// 地表覆盖类型
gdalImage slpxyz(this->demsloperPath);// 地面坡向
// 处理地面坐标
long demRow = demxyz.height;
long demCol = demxyz.width;
size_t demCount = size_t(demRow) * size_t(demCol);
std::shared_ptr<double> demX = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demY = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demZ = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpX = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpY = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpZ = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<long> clsArr = readDataArr<long>(demlandcls, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
// 检索类别数量
std::map<long, size_t> clsCountDict;
for (const auto& pair : clssigmaParamsDict) {
clsCountDict.insert(std::pair<long, size_t>(pair.first, 0));
}
for (size_t i = 0; i < demCount; i++) {
long clsid = clsArr.get()[i];
if (clsCountDict.find(clsid) != clsCountDict.end()) {
clsCountDict[clsid] = clsCountDict[clsid] + 1;
}
}
std::map<long, std::shared_ptr<GoalState>> clsGoalStateDict;
for (const auto& pair : clsCountDict) {
clsGoalStateDict.insert(
std::pair<long, std::shared_ptr<GoalState>>(
pair.first,
std::shared_ptr<GoalState>((GoalState*)mallocCUDAHost(sizeof(GoalState) * pair.second), FreeCUDAHost)));
}
// 分块处理大小
size_t blocksize = 1000;
std::map<long, size_t> clsCountDictTemp;
for (const auto& pair : clsCountDict) {
clsCountDictTemp.insert(std::pair<long, size_t>(pair.first, pair.second));
}
for (long i = 0; i < demCount; i++) {
long clsid = clsArr.get()[i];
size_t Currentclscount = clsCountDictTemp[clsid];
size_t allclscount = clsCountDict[clsid];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].Tx = demX.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].Ty = demY.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].Tz = demZ.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].TsX = slpX.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].TsY = slpY.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].TsZ = slpZ.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].cls = clsArr.get()[i];
Currentclscount = Currentclscount - 1;
}
RFPCTask task;
// 参数声明
task.freqNum = this->EchoSimulationData->getPlusePoints();
task.prfNum = prfcount;
task.Rref = this->EchoSimulationData->getRefPhaseRange();
task.Rnear = this->EchoSimulationData->getNearRange();
task.Rfar = this->EchoSimulationData->getFarRange();
task.startFreq = this->EchoSimulationData->getCenterFreq() - this->EchoSimulationData->getBandwidth() / 2;
task.stepFreq = this->EchoSimulationData->getBandwidth() / (task.freqNum - 1);
task.d_echoData = (cuComplex*)mallocCUDADevice(prfcount * task.freqNum * sizeof(cuComplex), devId);
// 天线位置
{
std::shared_ptr<SatelliteAntPos> antplise = this->EchoSimulationData->getAntPosVelc();
std::shared_ptr<SateState> h_antlist((SateState*)mallocCUDAHost(prfcount * sizeof(SateState)), FreeCUDAHost);
for (long i = 0; i < prfcount; i++) {
h_antlist.get()[i].Px = antplise.get()[i + startprfid].Px;
h_antlist.get()[i].Py = antplise.get()[i + startprfid].Py;
h_antlist.get()[i].Pz = antplise.get()[i + startprfid].Pz;
h_antlist.get()[i].Vx = antplise.get()[i + startprfid].Vx;
h_antlist.get()[i].Vy = antplise.get()[i + startprfid].Vy;
h_antlist.get()[i].Vz = antplise.get()[i + startprfid].Vz;
}
task.antlist = (SateState*)mallocCUDADevice(prfcount * sizeof(SateState), devId);
HostToDevice(h_antlist.get(), task.antlist, sizeof(double) * prfcount);
}
// 分块计算
for (const auto& pair : clsCUDASigmaParamsDict) {
long clsid = pair.first;
size_t clscount = clsCountDict[clsid];
PRINT("Process Class ID : %d , Count: %d\n", clsid, clscount);
task.targetnum = clscount;
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
task.sigma0_cls = pair.second;
ProcessRFPCTask(task);
FreeCUDADevice(task.goallist);
}
// 文件读写
omp_lock_t lock;
omp_init_lock(&lock);
omp_set_lock(&lock);
cuComplex* h_echoData = (cuComplex*)mallocCUDAHost(prfcount * task.freqNum * sizeof(cuComplex));
DeviceToHost(h_echoData, task.d_echoData, prfcount* task.freqNum * sizeof(cuComplex));
long prfcount_read = prfcount;
std::shared_ptr<std::complex<double>> fileEchoArr = this->EchoSimulationData->getEchoArr(startprfid, prfcount_read);
for (size_t i = 0; i < prfcount; i++) {
for (size_t j = 0; j < task.freqNum; j++) {
std::complex<double> temp = fileEchoArr.get()[i * task.freqNum + j];
fileEchoArr.get()[i * task.freqNum + j] = std::complex<double>(
temp.real() + h_echoData[i * task.freqNum + j].x,
temp.imag() + h_echoData[i * task.freqNum + j].y
);
}
}
this->EchoSimulationData->saveEchoArr(fileEchoArr, startprfid, prfcount_read);
omp_unset_lock(&lock); // 锟酵放伙拷斤拷
omp_destroy_lock(&lock); // 劫伙拷斤拷
FreeCUDADevice(task.d_echoData);
FreeCUDADevice(task.antlist);
//FreeCUDADevice(task.goallist);
return ErrorCode::SUCCESS;
}

View File

@ -79,6 +79,10 @@ private: //
ErrorCode InitEchoMaskArray(); ErrorCode InitEchoMaskArray();
//ErrorCode RFPCMainProcess(long num_thread); //ErrorCode RFPCMainProcess(long num_thread);
ErrorCode RFPCMainProcess_GPU(); ErrorCode RFPCMainProcess_GPU();
ErrorCode RFPCMainProcess_MultiGPU_NoAntPattern(); // 多GPU处理,不考虑天线方向图
ErrorCode RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount,int devId=0);
std::shared_ptr<SatelliteOribtNode[]> getSatelliteOribtNodes(double prf_time, double dt, bool antflag, long double imageStarttime); std::shared_ptr<SatelliteOribtNode[]> getSatelliteOribtNodes(double prf_time, double dt, bool antflag, long double imageStarttime);

View File

@ -226,6 +226,7 @@
<ClCompile Include="SimulationSAR\TBPImageAlgCls.cpp" /> <ClCompile Include="SimulationSAR\TBPImageAlgCls.cpp" />
<ClCompile Include="UnitTestMain.cpp" /> <ClCompile Include="UnitTestMain.cpp" />
<CudaCompile Include="GPUBpSimulation.cu" /> <CudaCompile Include="GPUBpSimulation.cu" />
<CudaCompile Include="Sigma0ClsReflect.cu" />
<CudaCompile Include="SimulationSAR\GPURFPC_single.cu" /> <CudaCompile Include="SimulationSAR\GPURFPC_single.cu" />
<CudaCompile Include="SimulationSAR\GPUTBPImage.cu" /> <CudaCompile Include="SimulationSAR\GPUTBPImage.cu" />
<QtMoc Include="PowerSimulationIncoherent\QSimulationSARPolynomialOrbitModel.h" /> <QtMoc Include="PowerSimulationIncoherent\QSimulationSARPolynomialOrbitModel.h" />

View File

@ -217,5 +217,8 @@
<CudaCompile Include="SimulationSAR\GPURFPC_single.cu"> <CudaCompile Include="SimulationSAR\GPURFPC_single.cu">
<Filter>SimulationSAR</Filter> <Filter>SimulationSAR</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="Sigma0ClsReflect.cu">
<Filter>SimulationSAR</Filter>
</CudaCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>