新增多卡支持

pull/13/head
陈增辉 2025-03-23 16:01:28 +08:00
parent 0006890b96
commit 8bd997f924
15 changed files with 660 additions and 14 deletions

View File

@ -339,6 +339,12 @@ inline long nextpow2(long n) {
return pow(2,en); return pow(2,en);
} }
inline void releaseVoidArray(void* a)
{
free(a);
a = NULL;
}
#endif #endif

View File

@ -511,6 +511,19 @@ ErrorCode EchoL0Dataset::loadFromXml() {
return ErrorCode::SUCCESS; return ErrorCode::SUCCESS;
} }
std::shared_ptr<SatelliteAntPos> EchoL0Dataset::getAntPosVelc()
{
omp_lock_t lock;
omp_init_lock(&lock);
omp_set_lock(&lock);
long prfcount = this->PluseCount;
std::shared_ptr<SatelliteAntPos> antposlist= SatelliteAntPosOperator::readAntPosFile(this->GPSPointFilePath, prfcount);
omp_unset_lock(&lock); //
omp_destroy_lock(&lock); //
return antposlist;
}
std::shared_ptr<double> EchoL0Dataset::getAntPos() std::shared_ptr<double> EchoL0Dataset::getAntPos()
{ {
omp_lock_t lock; omp_lock_t lock;

View File

@ -209,6 +209,7 @@ public: //
public: public:
// 读取文件 // 读取文件
std::shared_ptr< SatelliteAntPos> getAntPosVelc();
std::shared_ptr<double> getAntPos(); std::shared_ptr<double> getAntPos();
std::shared_ptr<std::complex<double>> getEchoArr(long startPRF, long& PRFLen); std::shared_ptr<std::complex<double>> getEchoArr(long startPRF, long& PRFLen);
std::shared_ptr<std::complex<double>> getEchoArr(); std::shared_ptr<std::complex<double>> getEchoArr();

View File

@ -32,6 +32,7 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="GPUTool\GPUBaseLibAPI.h" /> <ClInclude Include="GPUTool\GPUBaseLibAPI.h" />
<ClInclude Include="GPUTool\GPUBaseTool.h" />
<ClInclude Include="GPUTool\GPUDouble32.cuh" /> <ClInclude Include="GPUTool\GPUDouble32.cuh" />
<CudaCompile Include="GPUTool\GPUTool.cuh" /> <CudaCompile Include="GPUTool\GPUTool.cuh" />
</ItemGroup> </ItemGroup>
@ -40,6 +41,9 @@
<Project>{872ecd6f-30e3-4a1b-b17c-15e87d373ff6}</Project> <Project>{872ecd6f-30e3-4a1b-b17c-15e87d373ff6}</Project>
</ProjectReference> </ProjectReference>
</ItemGroup> </ItemGroup>
<ItemGroup>
<ClCompile Include="GPUTool\GPUBaseTool.cpp" />
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<VCProjectVersion>17.0</VCProjectVersion> <VCProjectVersion>17.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>
@ -180,7 +184,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;GPUBASELIB_API;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>NDEBUG;_CONSOLE;GPUBASELIB_API;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp14</LanguageStandard> <LanguageStandard>stdcpp14</LanguageStandard>
<LanguageStandard_C>stdc11</LanguageStandard_C> <LanguageStandard_C>stdc11</LanguageStandard_C>

View File

@ -35,5 +35,13 @@
<ClInclude Include="GPUTool\GPUDouble32.cuh"> <ClInclude Include="GPUTool\GPUDouble32.cuh">
<Filter>GPUTool</Filter> <Filter>GPUTool</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="GPUTool\GPUBaseTool.h">
<Filter>GPUTool</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="GPUTool\GPUBaseTool.cpp">
<Filter>GPUTool</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -0,0 +1,56 @@
#include "GPUBaseTool.h"
// »ñÈ¡Îļþ´óС
extern "C" size_t getfsize(FILE* fp)
{
fseek(fp, 0L, SEEK_END);
size_t size = ftell(fp);
fseek(fp, 0L, SEEK_SET);
return size;
}
extern "C" unsigned char* loadBinFromPath(char* binPath, size_t* binpath_len)
{
FILE* fd = fopen(binPath, "rb");
if (NULL == fd)
{
perror("Failed to open file");
return NULL;
}
size_t f_len = getfsize(fd);
*binpath_len = f_len;
unsigned char* buffer = (unsigned char*)malloc(f_len * sizeof(unsigned char));
if (buffer == NULL)
{
perror("Failed to allocate memory");
fclose(fd);
return NULL;
}
size_t fread_count = fread(buffer, 1, f_len, fd);
fclose(fd);
if (fread_count != f_len)
{
releaseVoidArray(buffer);
return NULL;
}
return buffer;
}
extern "C" void writeComplexDataBinFile(char* dataPath, size_t datalen, cuComplex* data)
{
FILE* pd = fopen(dataPath, "w");
double* tempdata = (double*)malloc(datalen * 2 * sizeof(double));
for (long i = 0; i < datalen; i++)
{
tempdata[i * 2 + 0] = data[i].x;
tempdata[i * 2 + 1] = data[i].y;
}
fwrite(tempdata, sizeof(double), datalen * 2, pd);
fclose(pd);
}

View File

@ -0,0 +1,25 @@
#ifndef __GPUBASETOOL_H__
#define __GPUBASETOOL_H__
#include "GPUBaseLibAPI.h"
#include "BaseConstVariable.h"
#include <iostream>
#include <memory>
#include <complex>
#include "GPUTool.cuh"
extern "C" GPUBASELIBAPI size_t getfsize(FILE* fp);
extern "C" GPUBASELIBAPI unsigned char* loadBinFromPath(char* binPath, size_t* binpath_len);
extern "C" GPUBASELIBAPI void writeComplexDataBinFile(char* dataPath, size_t datalen, cuComplex* data);
template<typename T>
inline std::shared_ptr<T> CPUToHost(std::shared_ptr<T> CPUArr, size_t len) {
std::shared_ptr<T> result = std::shared_ptr<T>((T*)mallocCUDAHost(len*sizeof(T)), FreeCUDAHost);
for (size_t i = 0; i < len; i++) {
result.get()[i] = CPUArr.get()[i];
}
return result;
}
#endif // !__GPUBASETOOL_H__

View File

@ -16,11 +16,13 @@
#define LAMP_CUDA_PI 3.141592653589793238462643383279 #define LAMP_CUDA_PI 3.141592653589793238462643383279
// SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE = SHAREMEMORY_FLOAT_HALF // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE = SHAREMEMORY_FLOAT_HALF
/** CUDA 调用参数 ************************************************************************************/
#define BLOCK_SIZE 256 #define BLOCK_SIZE 256
#define SHAREMEMORY_BYTE 49152 #define SHAREMEMORY_BYTE 49152
#define SHAREMEMORY_FLOAT_HALF 6144 #define SHAREMEMORY_FLOAT_HALF 6144
#define SHAREMEMORY_FLOAT_HALF_STEP 24 #define SHAREMEMORY_FLOAT_HALF_STEP 24
#define SHAREMEMORY_DEM_STEP 768
#define SHAREMEMORY_Reflect 612
@ -110,6 +112,13 @@ extern "C" GPUBASELIBAPI void CUDAIFFT(cuComplex* inArr, cuComplex* outArr, long
extern "C" GPUBASELIBAPI void FFTShift1D(cuComplex* d_data, int batch_size, int signal_length); extern "C" GPUBASELIBAPI void FFTShift1D(cuComplex* d_data, int batch_size, int signal_length);
extern "C" GPUBASELIBAPI void shared_complexPtrToHostCuComplex(std::complex<double>* src, cuComplex* dst, size_t len); extern "C" GPUBASELIBAPI void shared_complexPtrToHostCuComplex(std::complex<double>* src, cuComplex* dst, size_t len);
extern "C" GPUBASELIBAPI void HostCuComplexToshared_complexPtr(cuComplex* src, std::complex<double>* dst, size_t len); extern "C" GPUBASELIBAPI void HostCuComplexToshared_complexPtr(cuComplex* src, std::complex<double>* dst, size_t len);
#endif #endif
#endif #endif

View File

@ -0,0 +1,132 @@
#include "BaseConstVariable.h"
#include "GPURFPC.cuh"
const double cls_sigma0[102][6] = {
{0, 0, 0, 0, 0, 0}, // 0
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{28.15, -39.73, 0.0986, 2.863, 4.356, -6.279}, // 10
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{36.13, -48.2, 0.1299, -1.838, 5.404, -4.015},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{183.5, -194.6, 0.0167, 2.952, -4.1, 6.026},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{50.97, -62.9, 0.0968, 1.604, 4.637, 6.108},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{-10.31, 15.96, 37.73, -4.584, 4.997, -3.569},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{-12.45, 0.1561, -3.187, -2.482, 8.244, 0.3632},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{-19.23, 0.3623, -2.209, 9.649, 0.1292, -0.264},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0},
{56.61, -62.29, 0.01388, 2.767, -3.943, 5.995},
{0, 0, 0, 0, 0, 0} };
/** ²ÎÊý ***************************************************************/
extern "C" double* hostSigmaData_toDevice(int devid)
{
double* h_data = (double*)mallocCUDAHost(102 * 6 * sizeof(double));
double* d_data = (double*)mallocCUDADevice(102 * 6 * sizeof(double), devid);
printf("copy to ");
for (long i = 0; i < 102; i++)
{
printf(" %d ,", i);
for (long j = 0; j < 6; j++)
{
h_data[i * 6 + j] = cls_sigma0[i][j];
}
}
printf("host to device sigma data\n");
HostToDevice(h_data, d_data, 102 * 6 * sizeof(double));
FreeCUDAHost(h_data);
return d_data;
}

View File

@ -463,6 +463,128 @@ void CUDA_RFPC_MainProcess(
} }
/* 核函数 ****************************************************************************************************************************/
__global__ void CUDA_Kernel_RFPC(
SateState* antlist,
long PRFCount, long Freqcount, // 整体的脉冲数,
GoalState* goallist,
long demLen,
double StartFreqGHz, double FreqStep,
double refPhaseRange,
double NearR, double FarR,
CUDASigmaParam clsSigma0,
cuComplex* echodata
)
{
__shared__ GoalState Ts[SHAREMEMORY_DEM_STEP];
long threadid = threadIdx.x;
long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
long prfid = floorf(idx / Freqcount);
long freqid = idx % Freqcount;
// printf("%d,%d ",prfid,freqid);
if (prfid < PRFCount && freqid < Freqcount)
{
SateState antPos = antlist[prfid];
double factorjTemp = RFPCPIDIVLIGHT * (StartFreqGHz + freqid * FreqStep);
double Tx = 0;
double Ty = 0;
double Tz = 0;
double R = 0;
double incAngle = 0;
double echo_real = 0;
double echo_imag = 0;
cuComplex echo = make_cuComplex(0, 0);
for (long startid = 0; startid < demLen; startid = startid + SHAREMEMORY_DEM_STEP)
{
__syncthreads(); // 确定所有待处理数据都已经进入程序中
for (long i = 0; i < 3; i++) {
long ttid = startid + threadid + i * blockDim.x;
long stid = threadid + i * blockDim.x;
if ((stid < SHAREMEMORY_DEM_STEP) && (ttid < demLen)) {
Ts[stid] = goallist[ttid];
}
}
__syncthreads(); // 确定所有待处理数据都已经进入程序中
for (long tid = 0; tid < SHAREMEMORY_DEM_STEP; tid++)
{
if ((tid + startid) < demLen)
{
GoalState p = Ts[tid];
Tx = p.Tx;
Ty = p.Ty;
Tz = p.Tz;
Tx = antPos.Px - Tx; // T->P
Ty = antPos.Py - Ty;
Tz = antPos.Pz - Tz;
R = sqrt(Tx * Tx + Ty * Ty + Tz * Tz);
bool isNearFar = (R < NearR || R > FarR);
incAngle = sqrt(p.TsX * p.TsX + p.TsY * p.TsY + p.TsZ * p.TsZ);
incAngle = acos((Tx * p.TsX + Ty * p.TsY + Tz * p.TsZ) / (R * incAngle));
incAngle = GPU_getSigma0dB_params(clsSigma0.p1, clsSigma0.p2, clsSigma0.p3, clsSigma0.p4, clsSigma0.p5, clsSigma0.p6, incAngle); // sigma
incAngle = pow(10.0, incAngle / 10.0); // amp
incAngle = incAngle / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); //
R = (R - refPhaseRange);
R = factorjTemp * R;
echo_real = incAngle * cos(R)* isNearFar;
echo_imag = incAngle * sin(R)* isNearFar;
echo.x = echo.x + echo_real;
echo.y = echo.y + echo_imag;
}
}
}
echodata[idx] = cuCaddf(echodata[idx], echo);
}
}
/** 分块处理 ****************************************************************************************************************/
extern "C" void ProcessRFPCTask(RFPCTask& task)
{
long pixelcount = task.prfNum * task.freqNum;
long grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
printf("start %d ,%d,%d\n", task.targetnum, grid_size, BLOCK_SIZE);
CUDA_Kernel_RFPC << <grid_size, BLOCK_SIZE >> > (
task.antlist,
task.prfNum,task.freqNum,
task.goallist,
task.targetnum,
task.startFreq,task.stepFreq,
task.Rref,task.Rnear,task.Rfar,
task.sigma0_cls,
task.d_echoData
);
PrintLasterError("ProcessRFPCTask");
cudaDeviceSynchronize();
printf("start %d \n", task.targetnum);
}
#endif #endif

View File

@ -8,8 +8,25 @@
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuComplex.h> #include <cuComplex.h>
/** CUDA ľ÷ÓòÎĘý ************************************************************************************/
#define RFPCPIDIVLIGHT -4*PI/(LIGHTSPEED/1e9) #define RFPCPIDIVLIGHT -4*PI/(LIGHTSPEED/1e9)
extern "C" struct SateState {
double Px, Py, Pz, Vx, Vy, Vz;
};
extern "C" struct GoalState {
double Tx, Ty, Tz, TsX, TsY, TsZ;
size_t cls;
};
extern "C" struct CUDASigmaParam { extern "C" struct CUDASigmaParam {
double p1; double p1;
@ -20,7 +37,60 @@ extern "C" struct CUDASigmaParam {
double p6; double p6;
}; };
extern "C" struct SloperDs
{
double* SloperX;
double* SloperY;
double* SloperZ;
double* SloperAngle;
};
extern "C" struct DEMDs
{
double* demX;
double* demY;
double* demZ;
};
extern "C" struct LandDataDs
{
unsigned char* landData;
};
extern "C" struct GPSPointsDs
{
double* Pxs;
double* Pys;
double* Pzs;
double* Vxs;
double* Vys;
double* Vzs;
};
extern "C" struct RFPCTask
{
double startFreq;
double stepFreq;
long freqNum;
long prfNum;
double Rnear;
double Rfar;
double Rref;
SateState* antlist = nullptr;
GoalState* goallist = nullptr;
cuComplex* d_echoData = nullptr; // ťŘ˛¨
CUDASigmaParam sigma0_cls;
long targetnum;
};
extern __device__ double GPU_getSigma0dB_params( extern __device__ double GPU_getSigma0dB_params(
const double p1, const double p2, const double p3, const double p4, const double p5, const double p6, const double p1, const double p2, const double p3, const double p4, const double p5, const double p6,
@ -73,8 +143,8 @@ extern "C" void CUDA_RFPC_MainProcess(
extern "C" double* hostSigmaData_toDevice(int devid);
extern "C" void ProcessRFPCTask(RFPCTask& task);

View File

@ -1,6 +1,7 @@
 
#include "stdafx.h" #include "stdafx.h"
#include <complex> #include <complex>
#include "GPUBaseTool.h"
#include "RFPCProcessCls.h" #include "RFPCProcessCls.h"
#include "BaseConstVariable.h" #include "BaseConstVariable.h"
#include "SARSatelliteSimulationAbstractCls.h" #include "SARSatelliteSimulationAbstractCls.h"
@ -306,8 +307,8 @@ ErrorCode RFPCProcessCls::Process(long num_thread)
//return ErrorCode::SUCCESS; //return ErrorCode::SUCCESS;
stateCode = this->RFPCMainProcess_GPU(); //stateCode = this->RFPCMainProcess_GPU();
stateCode = this->RFPCMainProcess_MultiGPU_NoAntPattern();
if (stateCode != ErrorCode::SUCCESS) { if (stateCode != ErrorCode::SUCCESS) {
return stateCode; return stateCode;
} }
@ -542,17 +543,12 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
long freqnum_temp = freqnum; long freqnum_temp = freqnum;
float f0 = float(freqlist[0] / 1e9); float f0 = float(freqlist[0] / 1e9);
float dfreq = float((freqlist[1] - freqlist[0]) / 1e9); float dfreq = float((freqlist[1] - freqlist[0]) / 1e9);
qDebug() << "freqnum: " << freqnum << " f0: " << f0 << " dfreq: " << dfreq;
long PRFCount = this->EchoSimulationData->getPluseCount(); long PRFCount = this->EchoSimulationData->getPluseCount();
double NearRange = this->EchoSimulationData->getNearRange(); // 近斜距 double NearRange = this->EchoSimulationData->getNearRange(); // 近斜距
double FarRange = this->EchoSimulationData->getFarRange(); double FarRange = this->EchoSimulationData->getFarRange();
double Pt = this->TaskSetting->getPt() * this->TaskSetting->getGri();// 发射电压 1v double Pt = this->TaskSetting->getPt() * this->TaskSetting->getGri();// 发射电压 1v
double lamda = this->TaskSetting->getCenterLamda(); // 波长 double lamda = this->TaskSetting->getCenterLamda(); // 波长
double refphaseRange = this->TaskSetting->getRefphaseRange(); // 参考相位斜距 double refphaseRange = this->TaskSetting->getRefphaseRange(); // 参考相位斜距
double prf_time = 0; double prf_time = 0;
double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔 double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔
bool antflag = true; // 计算天线方向图 bool antflag = true; // 计算天线方向图
@ -935,6 +931,202 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
return ErrorCode::SUCCESS; return ErrorCode::SUCCESS;
} }
ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
{
int num_devices;
cudaGetDeviceCount(&num_devices);
PRINT("GPU Count : %d \n", num_devices);
long prfcount = this->EchoSimulationData->getPluseCount();
size_t prfblockcount = (prfcount + num_devices +2- 1) / num_devices;
double prf_time = 0;
double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔
bool antflag = true; // 计算天线方向图
long double imageStarttime = this->TaskSetting->getSARImageStartTime();
std::shared_ptr<SatelliteOribtNode[]> sateOirbtNodes = this->getSatelliteOribtNodes(prf_time, dt, antflag, imageStarttime);
#pragma omp parallel for
for (int devid = 0; devid < num_devices; devid++) {
cudaSetDevice(devid); // 确保当前线程操作指定的GPU设备
this->RFPCMainProcess_GPU_NoAntPattern(0, 0, devid);
size_t startTid = devid * prfblockcount;
size_t prf_devLen = prfblockcount;
prf_devLen = (startTid + prf_devLen) < prfcount ? prf_devLen : (prfcount - startTid);
this->RFPCMainProcess_GPU_NoAntPattern(startTid, prf_devLen, devid);
}
return ErrorCode::SUCCESS;
}
ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount, int devId)
{
/// 显存不限制
cudaSetDevice(devId); // 确保当前线程操作指定的GPU设备
POLARTYPEENUM polartype = this->TaskSetting->getPolarType();
std::map<long, SigmaParam> clssigmaParamsDict = this->SigmaDatabasePtr->getsigmaParams(polartype);;
std::map<long, CUDASigmaParam> clsCUDASigmaParamsDict;
for (const auto& pair : clssigmaParamsDict) {
clsCUDASigmaParamsDict.insert(std::pair<long, CUDASigmaParam>(pair.first,
CUDASigmaParam{
pair.second.p1,
pair.second.p2,
pair.second.p3,
pair.second.p4,
pair.second.p5,
pair.second.p6
}));
}
// 读取类别
gdalImage demxyz(this->demxyzPath);// 地面点坐标
gdalImage demlandcls(this->LandCoverPath);// 地表覆盖类型
gdalImage slpxyz(this->demsloperPath);// 地面坡向
// 处理地面坐标
long demRow = demxyz.height;
long demCol = demxyz.width;
size_t demCount = size_t(demRow) * size_t(demCol);
std::shared_ptr<double> demX = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demY = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demZ = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpX = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpY = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpZ = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<long> clsArr = readDataArr<long>(demlandcls, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
// 检索类别数量
std::map<long, size_t> clsCountDict;
for (const auto& pair : clssigmaParamsDict) {
clsCountDict.insert(std::pair<long, size_t>(pair.first, 0));
}
for (size_t i = 0; i < demCount; i++) {
long clsid = clsArr.get()[i];
if (clsCountDict.find(clsid) != clsCountDict.end()) {
clsCountDict[clsid] = clsCountDict[clsid] + 1;
}
}
std::map<long, std::shared_ptr<GoalState>> clsGoalStateDict;
for (const auto& pair : clsCountDict) {
clsGoalStateDict.insert(
std::pair<long, std::shared_ptr<GoalState>>(
pair.first,
std::shared_ptr<GoalState>((GoalState*)mallocCUDAHost(sizeof(GoalState) * pair.second), FreeCUDAHost)));
}
// 分块处理大小
size_t blocksize = 1000;
std::map<long, size_t> clsCountDictTemp;
for (const auto& pair : clsCountDict) {
clsCountDictTemp.insert(std::pair<long, size_t>(pair.first, pair.second));
}
for (long i = 0; i < demCount; i++) {
long clsid = clsArr.get()[i];
size_t Currentclscount = clsCountDictTemp[clsid];
size_t allclscount = clsCountDict[clsid];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].Tx = demX.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].Ty = demY.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].Tz = demZ.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].TsX = slpX.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].TsY = slpY.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].TsZ = slpZ.get()[i];
clsGoalStateDict[clsid].get()[Currentclscount-allclscount].cls = clsArr.get()[i];
Currentclscount = Currentclscount - 1;
}
RFPCTask task;
// 参数声明
task.freqNum = this->EchoSimulationData->getPlusePoints();
task.prfNum = prfcount;
task.Rref = this->EchoSimulationData->getRefPhaseRange();
task.Rnear = this->EchoSimulationData->getNearRange();
task.Rfar = this->EchoSimulationData->getFarRange();
task.startFreq = this->EchoSimulationData->getCenterFreq() - this->EchoSimulationData->getBandwidth() / 2;
task.stepFreq = this->EchoSimulationData->getBandwidth() / (task.freqNum - 1);
task.d_echoData = (cuComplex*)mallocCUDADevice(prfcount * task.freqNum * sizeof(cuComplex), devId);
// 天线位置
{
std::shared_ptr<SatelliteAntPos> antplise = this->EchoSimulationData->getAntPosVelc();
std::shared_ptr<SateState> h_antlist((SateState*)mallocCUDAHost(prfcount * sizeof(SateState)), FreeCUDAHost);
for (long i = 0; i < prfcount; i++) {
h_antlist.get()[i].Px = antplise.get()[i + startprfid].Px;
h_antlist.get()[i].Py = antplise.get()[i + startprfid].Py;
h_antlist.get()[i].Pz = antplise.get()[i + startprfid].Pz;
h_antlist.get()[i].Vx = antplise.get()[i + startprfid].Vx;
h_antlist.get()[i].Vy = antplise.get()[i + startprfid].Vy;
h_antlist.get()[i].Vz = antplise.get()[i + startprfid].Vz;
}
task.antlist = (SateState*)mallocCUDADevice(prfcount * sizeof(SateState), devId);
HostToDevice(h_antlist.get(), task.antlist, sizeof(double) * prfcount);
}
// 分块计算
for (const auto& pair : clsCUDASigmaParamsDict) {
long clsid = pair.first;
size_t clscount = clsCountDict[clsid];
PRINT("Process Class ID : %d , Count: %d\n", clsid, clscount);
task.targetnum = clscount;
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
task.sigma0_cls = pair.second;
ProcessRFPCTask(task);
FreeCUDADevice(task.goallist);
}
// 文件读写
omp_lock_t lock;
omp_init_lock(&lock);
omp_set_lock(&lock);
cuComplex* h_echoData = (cuComplex*)mallocCUDAHost(prfcount * task.freqNum * sizeof(cuComplex));
DeviceToHost(h_echoData, task.d_echoData, prfcount* task.freqNum * sizeof(cuComplex));
long prfcount_read = prfcount;
std::shared_ptr<std::complex<double>> fileEchoArr = this->EchoSimulationData->getEchoArr(startprfid, prfcount_read);
for (size_t i = 0; i < prfcount; i++) {
for (size_t j = 0; j < task.freqNum; j++) {
std::complex<double> temp = fileEchoArr.get()[i * task.freqNum + j];
fileEchoArr.get()[i * task.freqNum + j] = std::complex<double>(
temp.real() + h_echoData[i * task.freqNum + j].x,
temp.imag() + h_echoData[i * task.freqNum + j].y
);
}
}
this->EchoSimulationData->saveEchoArr(fileEchoArr, startprfid, prfcount_read);
omp_unset_lock(&lock); // 锟酵放伙拷斤拷
omp_destroy_lock(&lock); // 劫伙拷斤拷
FreeCUDADevice(task.d_echoData);
FreeCUDADevice(task.antlist);
//FreeCUDADevice(task.goallist);
return ErrorCode::SUCCESS;
}

View File

@ -79,6 +79,10 @@ private: //
ErrorCode InitEchoMaskArray(); ErrorCode InitEchoMaskArray();
//ErrorCode RFPCMainProcess(long num_thread); //ErrorCode RFPCMainProcess(long num_thread);
ErrorCode RFPCMainProcess_GPU(); ErrorCode RFPCMainProcess_GPU();
ErrorCode RFPCMainProcess_MultiGPU_NoAntPattern(); // 多GPU处理,不考虑天线方向图
ErrorCode RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount,int devId=0);
std::shared_ptr<SatelliteOribtNode[]> getSatelliteOribtNodes(double prf_time, double dt, bool antflag, long double imageStarttime); std::shared_ptr<SatelliteOribtNode[]> getSatelliteOribtNodes(double prf_time, double dt, bool antflag, long double imageStarttime);

View File

@ -226,6 +226,7 @@
<ClCompile Include="SimulationSAR\TBPImageAlgCls.cpp" /> <ClCompile Include="SimulationSAR\TBPImageAlgCls.cpp" />
<ClCompile Include="UnitTestMain.cpp" /> <ClCompile Include="UnitTestMain.cpp" />
<CudaCompile Include="GPUBpSimulation.cu" /> <CudaCompile Include="GPUBpSimulation.cu" />
<CudaCompile Include="Sigma0ClsReflect.cu" />
<CudaCompile Include="SimulationSAR\GPURFPC_single.cu" /> <CudaCompile Include="SimulationSAR\GPURFPC_single.cu" />
<CudaCompile Include="SimulationSAR\GPUTBPImage.cu" /> <CudaCompile Include="SimulationSAR\GPUTBPImage.cu" />
<QtMoc Include="PowerSimulationIncoherent\QSimulationSARPolynomialOrbitModel.h" /> <QtMoc Include="PowerSimulationIncoherent\QSimulationSARPolynomialOrbitModel.h" />

View File

@ -217,5 +217,8 @@
<CudaCompile Include="SimulationSAR\GPURFPC_single.cu"> <CudaCompile Include="SimulationSAR\GPURFPC_single.cu">
<Filter>SimulationSAR</Filter> <Filter>SimulationSAR</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="Sigma0ClsReflect.cu">
<Filter>SimulationSAR</Filter>
</CudaCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>