调试回波仿真

pull/14/head
陈增辉 2025-03-31 02:14:22 +08:00
parent 1b373e6de1
commit 997471cba1
5 changed files with 233 additions and 293 deletions

View File

@ -1153,7 +1153,7 @@ int ResampleGDAL(const char* pszSrcFile, const char* pszOutFile, double* gt, int
GDALWarpOptions* psWo = GDALCreateWarpOptions();
CPLSetConfigOption("GDAL_NUM_THREADS", "ALL_CPUS"); // 使用所有可用的CPU核心
CPLSetConfigOption("GDAL_CACHEMAX", "16000"); // 设置缓存大小为500MB
CPLSetConfigOption("GDAL_CACHEMAX", "4000"); // 设置缓存大小为500MB
// psWo->papszWarpOptions = CSLDuplicate(NULL);
psWo->eWorkingDataType = dataType;
psWo->eResampleAlg = eResample;

View File

@ -110,7 +110,7 @@ __global__ void processPulseKernel(
im_final[idx].x += phCorr.x;
im_final[idx].y += phCorr.y;
//printf("r_start=%e;dr=%e;nR=%d\n", r_start, dr, nR);
if (abs(phCorr.x) > 1e-100 || abs(phCorr.y > 1e-100)) {
//if (abs(phCorr.x) > 1e-100 || abs(phCorr.y > 1e-100)) {
//printf(
// "[DEBUG] prfid=%-4ld | idx=%-8lld\n"
// " Ant: X=%-18.10e Y=%-18.10e Z=%-18.10e\n"
@ -133,7 +133,7 @@ __global__ void processPulseKernel(
// phCorr.x, phCorr.y,
// im_final[idx].x, im_final[idx].y
//);
}
//}
}
void bpBasic0CUDA(GPUDATA& data, int flag,double* h_R) {

View File

@ -519,12 +519,12 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern(
RstY = RstY / RstR;
RstZ = RstZ / RstR;
double slopeX = gp.TsX;
double slopeY = gp.TsY;
double slopeZ = gp.TsZ;
float slopeX = gp.TsX;
float slopeY = gp.TsY;
float slopeZ = gp.TsZ;
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
if (abs(slopR - 0) > 1e-3) {
float slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
if (slopR > 1e-3) {
float localangle = acosf((RstX * slopeX + RstY * slopeY + RstZ * slopeZ) / ( slopR));
@ -553,10 +553,11 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern(
ampGain=2 * maxGain * (1 - (powf(diectAngle,2) / 6)
+ (powf(diectAngle, 4) / 120)
- (powf(diectAngle, 6) / 5040)); //dB
ampGain = powf(10.0, ampGain / 10.0);
ampGain = ampGain / (PI4POW2 * powf(RstR, 4)); // 反射强度
double sigma = GPU_getSigma0dB(sigma0Params, localangle);
float sigma = GPU_getSigma0dB(sigma0Params, localangle);
sigma = powf(10.0, sigma / 10.0);
double temp_amp = double(ampGain * Pt * sigma);
@ -573,76 +574,6 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern(
}
}
__global__ void CUDA_Kernel_Computer_echo_NoAntPattern(
double* d_temp_R, double* d_temp_amps, long posNum,
double f0, double dfreq,
long FreqPoints, // 当前频率的分块
long maxfreqnum, // 最大脉冲值
cuComplex* echodata,
long temp_PRF_Count
) {
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
long long tid = threadIdx.x;
long long bid = blockIdx.x;
long long idx = bid * blockDim.x + tid;
long long prfId = idx / FreqPoints; // 脉冲ID
long long fId = idx % FreqPoints;//频率ID
long long psid = 0;
long long pixelId = 0;
for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF
psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii;
pixelId = prfId * posNum + psid; //
if (psid < posNum) {
s_R[psid] = d_temp_R[pixelId];
s_amp[psid] = d_temp_amps[pixelId];
}
else {
s_R[psid] = 0;
s_amp[psid] = 0;
}
}
__syncthreads(); // 确定所有待处理数据都已经进入程序中
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
cuComplex echo = make_cuComplex(0, 0);
float temp_phi = 0;
float temp_amp = 0;
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
temp_phi = s_R[dataid] * factorjTemp;
temp_amp = s_amp[dataid];
echo.x += (temp_amp * cosf(temp_phi));
echo.y += (temp_amp * sinf(temp_phi));
//if (dataid > 5000) {
// printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]);
//}
//if (isnan(temp_phi) || isnan(temp_amp) || isnan(echo.x) || isnan(echo.y)
// || isinf(temp_phi) || isinf(temp_amp) || isinf(echo.x) || isinf(echo.y)
// ) {
// printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, echo.x, echo.y);
//}
}
echodata[echo_ID] = cuCaddf(echodata[echo_ID], echo);
}
}
__global__ void CUDA_Kernel_Computer_echo_NoAntPattern_Optimized(
double* d_temp_R, double* d_temp_amps, long posNum,
double f0, double dfreq,
@ -652,15 +583,15 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern_Optimized(
long temp_PRF_Count
) {
// 使用动态共享内存,根据线程块大小调整
extern __shared__ float s_data[];
float* s_R = s_data;
float* s_amp = s_data + blockDim.x;
extern __shared__ double s_data[];
double* s_R = s_data;
double* s_amp = s_data + blockDim.x;
const int tid = threadIdx.x;
const int prfId = blockIdx.x;
const int fId = tid; // 每个线程处理一个频率点
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
double factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
cuComplex echo = make_cuComplex(0.0f, 0.0f);
// 分块加载数据并计算
@ -670,8 +601,8 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern_Optimized(
// 加载当前块到共享内存
if (psid < posNum) {
s_R[tid] = static_cast<float>(d_temp_R[pixelId]);
s_amp[tid] = static_cast<float>(d_temp_amps[pixelId]);
s_R[tid] = static_cast<double>(d_temp_R[pixelId]);
s_amp[tid] = static_cast<double>(d_temp_amps[pixelId]);
}
else {
s_R[tid] = 0.0f;
@ -681,7 +612,7 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern_Optimized(
// 计算当前块的贡献
for (int dataid = 0; dataid < blockDim.x; ++dataid) {
float temp_phi = s_R[dataid] * factorjTemp;
float temp_phi =fmod( s_R[dataid] * factorjTemp,2*PI);
float temp_amp = s_amp[dataid];
float sin_phi, cos_phi;
sincosf(temp_phi, &sin_phi, &cos_phi);
@ -743,7 +674,7 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
dim3 blocks(task.prfNum);
dim3 threads(BLOCK_SIZE);
size_t shared_mem_size = 2 * BLOCK_SIZE * sizeof(float);
size_t shared_mem_size = 2 * BLOCK_SIZE * sizeof(double);
CUDA_Kernel_Computer_echo_NoAntPattern_Optimized << <blocks, threads, shared_mem_size >> > (
d_R, d_amps, SHAREMEMORY_FLOAT_HALF,
@ -761,11 +692,11 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
// task.d_echoData,
// task.prfNum
// );
//PrintLasterError("CUDA_Kernel_Computer_echo");
PrintLasterError("CUDA_Kernel_Computer_echo");
cudaDeviceSynchronize();
if ((sTi * 100.0 / task.targetnum) - process >= 1) {
if ((sTi * 100.0 / task.targetnum) - process >= 10) {
process = sTi * 100.0 / task.targetnum;
PRINT("TargetID [%f]: %d / %d finished %d\n", sTi * 100.0 / task.targetnum, sTi, task.targetnum,devid);
PRINT("device ID : %d , TargetID [%f]: %d / %d finished %d\n",devid, sTi * 100.0 / task.targetnum, sTi, task.targetnum,devid);
}
}

View File

@ -91,7 +91,7 @@ extern "C" struct RFPCTask
cuComplex* d_echoData = nullptr; // »Ø²¨
CUDASigmaParam sigma0_cls;
double maxGain=48;
double GainWeight=20; // 2śČˇśÎ§
double GainWeight=10; // 2śČˇśÎ§
size_t targetnum;

View File

@ -223,12 +223,12 @@ RFPCProcessCls::RFPCProcessCls()
this->PlusePoint = 0;
this->TaskSetting = nullptr;
this->EchoSimulationData = nullptr;
this->LandCoverPath = "";
this->OutEchoPath = "";
this->LandCoverPath = "";
this->OutEchoPath = "";
this->LandCoverPath.clear();
this->OutEchoPath.clear();
this->OutEchoPath.clear();
this->SigmaDatabasePtr = std::shared_ptr<SigmaDatabase>(new SigmaDatabase);
}
@ -286,7 +286,7 @@ ErrorCode RFPCProcessCls::Process(long num_thread)
return stateCode;
}
else {}
qDebug() << "RFPCMainProcess";
//return ErrorCode::SUCCESS;
stateCode = this->InitEchoMaskArray();
@ -299,7 +299,7 @@ ErrorCode RFPCProcessCls::Process(long num_thread)
//stateCode = this->RFPCMainProcess(num_thread);
// 初始化回波
this->EchoSimulationData->initEchoArr(std::complex<double>(0, 0));
//return ErrorCode::SUCCESS;
@ -339,7 +339,7 @@ ErrorCode RFPCProcessCls::InitParams()
this->PlusePoint = freqnum;// ceil((this->TaskSetting->getFarRange() - this->TaskSetting->getNearRange()) / LIGHTSPEED * 2 * this->TaskSetting->getBandWidth());
this->TaskSetting->setFarRange(this->TaskSetting->getNearRange() + (this->PlusePoint-1) * drange);
this->TaskSetting->setFarRange(this->TaskSetting->getNearRange() + (this->PlusePoint - 1) * drange);
//ceil(rangeTimeSample * this->TaskSetting->getFs());
@ -363,7 +363,7 @@ ErrorCode RFPCProcessCls::InitParams()
this->tmpfolderPath = tmpfolderPath;
return ErrorCode::SUCCESS;
}
ErrorCode RFPCProcessCls::InitEchoMaskArray()
{
QString name = this->EchoSimulationData->getSimulationTaskName();
@ -454,9 +454,9 @@ std::shared_ptr<SatelliteOribtNode[]> RFPCProcessCls::getSatelliteOribtNodes(dou
void RFPCProcessMain(long num_thread,
QString TansformPatternFilePath, QString ReceivePatternFilePath,
QString simulationtaskName, QString OutEchoPath,
QString GPSXmlPath, QString TaskXmlPath,QString demTiffPath, QString sloperPath, QString LandCoverPath)
QString TansformPatternFilePath, QString ReceivePatternFilePath,
QString simulationtaskName, QString OutEchoPath,
QString GPSXmlPath, QString TaskXmlPath, QString demTiffPath, QString sloperPath, QString LandCoverPath)
{
std::shared_ptr < AbstractSARSatelliteModel> task = ReadSimulationSettingsXML(TaskXmlPath);
@ -563,7 +563,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
double* h_TantPattern = nullptr;
double* d_TantPattern = nullptr;
double maxTransAntPatternValue = 0;
{
// 处理发射天线方向图
double Tminphi = TransformPattern->getMinPhi();
@ -631,7 +631,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
h_RantPattern = (double*)mallocCUDAHost(sizeof(double) * Rthetanum * Rphinum);
d_RantPattern = (double*)mallocCUDADevice(sizeof(double) * Rthetanum * Rphinum);
for (long i = 0; i < Rthetanum; i++) {
for (long j = 0; j < Rphinum; j++) {
//h_RantPattern[i * Rphinum + j] = ReceivePattern->getGainLearThetaPhi(RstartTheta + i * Rdtheta, RstartPhi + j * Rdphi);
@ -666,8 +666,8 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
gdalImage demlandcls(this->LandCoverPath);// 地表覆盖类型
gdalImage demsloperxyz(this->demsloperPath);// 地面坡向
long demRow = demxyz.height;
long demCol = demxyz.width;
long demRow = demxyz.height;
long demCol = demxyz.width;
//处理地表覆盖
@ -676,7 +676,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
long startline = 0;
{
long blokline = getBlockRows(2e4, demCol, sizeof(double),demRow);
long blokline = getBlockRows(2e4, demCol, sizeof(double), demRow);
for (startline = 0; startline < demRow; startline = startline + blokline) {
Eigen::MatrixXd clsland = demlandcls.getData(startline, 0, blokline, demlandcls.width, 1);
long clsrows = clsland.rows();
@ -694,9 +694,9 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
}
}
qDebug() << "class id recoding" ;
qDebug() << "class id recoding";
for (long id : clamap.keys()) {
qDebug() << id << " -> " << clamap[id] ;
qDebug() << id << " -> " << clamap[id];
}
}
@ -716,15 +716,15 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
}
// 打印日志
qDebug() << "sigma params:" ;
qDebug() << "classid:\tp1\tp2\tp3\tp4\tp5\tp6" ;
qDebug() << "sigma params:";
qDebug() << "classid:\tp1\tp2\tp3\tp4\tp5\tp6";
for (long ii = 0; ii < clamapid; ii++) {
qDebug() << ii << ":\t" << h_clsSigmaParam[ii].p1;
qDebug() << "\t" << h_clsSigmaParam[ii].p2;
qDebug() << "\t" << h_clsSigmaParam[ii].p3;
qDebug() << "\t" << h_clsSigmaParam[ii].p4;
qDebug() << "\t" << h_clsSigmaParam[ii].p5;
qDebug() << "\t" << h_clsSigmaParam[ii].p6 ;
qDebug() << "\t" << h_clsSigmaParam[ii].p6;
}
qDebug() << "";
}
@ -733,39 +733,39 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
qDebug() << "CUDA class Proces finished!!!";
// 处理地面坐标
long blockline = getBlockRows(TargetMemoryMB, demCol, sizeof(double), demRow);
double* h_dem_x = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
double* h_dem_y = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
double* h_dem_z = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
double* h_demsloper_x = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
double* h_demsloper_y = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
double* h_demsloper_z = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
long* h_demcls = (long*)mallocCUDAHost(sizeof(long) * blockline * demCol);
long blockline = getBlockRows(TargetMemoryMB, demCol, sizeof(double), demRow);
double* h_dem_x = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
double* h_dem_y = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
double* h_dem_z = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
double* h_demsloper_x = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
double* h_demsloper_y = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
double* h_demsloper_z = (double*)mallocCUDAHost(sizeof(double) * blockline * demCol);
long* h_demcls = (long*)mallocCUDAHost(sizeof(long) * blockline * demCol);
double* d_dem_x = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_dem_y = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_dem_z = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_demsloper_x = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_demsloper_y = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_demsloper_z = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
long* d_demcls = (long*) mallocCUDADevice(sizeof(long) * blockline * demCol);
double* d_dem_x = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_dem_y = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_dem_z = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_demsloper_x = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_demsloper_y = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_demsloper_z = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
long* d_demcls = (long*)mallocCUDADevice(sizeof(long) * blockline * demCol);
/** 处理回波***************************************************/
long echo_block_rows = getBlockRows(1000, freqnum, sizeof(float)*2, PRFCount);
float* h_echo_block_real = (float*)mallocCUDAHost(sizeof(float) * echo_block_rows * freqnum);
long echo_block_rows = getBlockRows(1000, freqnum, sizeof(float) * 2, PRFCount);
float* h_echo_block_real = (float*)mallocCUDAHost(sizeof(float) * echo_block_rows * freqnum);
float* h_echo_block_imag = (float*)mallocCUDAHost(sizeof(float) * echo_block_rows * freqnum);
float* d_echo_block_real = (float*)mallocCUDADevice(sizeof(float) * echo_block_rows * freqnum);
float* d_echo_block_imag = (float*)mallocCUDADevice(sizeof(float) * echo_block_rows * freqnum);
float* d_temp_R = (float*)mallocCUDADevice(sizeof(float) * echo_block_rows * SHAREMEMORY_FLOAT_HALF); //2GB 距离
float* d_temp_R = (float*)mallocCUDADevice(sizeof(float) * echo_block_rows * SHAREMEMORY_FLOAT_HALF); //2GB 距离
float* d_temp_amp = (float*)mallocCUDADevice(sizeof(float) * echo_block_rows * SHAREMEMORY_FLOAT_HALF);//2GB 强度
@ -783,16 +783,16 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
qDebug() << "freqnum: " << freqnum << " f0: " << f0 << " dfreq: " << dfreq << "freqnum_temp: " << freqnum_temp;
qDebug() << "Start PRF: " << sprfid << "\t-\t" << sprfid + PRF_len << "\t:copy echo data list host -> GPU";
std::shared_ptr<std::complex<double>> echo_temp = this->EchoSimulationData->getEchoArr(sprfid, PRF_len);
for (long ii = 0; ii < PRF_len; ii++) {
for (long jj = 0; jj < freqnum; jj++) {
h_echo_block_real[ii * freqnum + jj]=echo_temp.get()[ii * freqnum + jj].real();
h_echo_block_imag[ii * freqnum + jj]=echo_temp.get()[ii * freqnum + jj].imag();
h_echo_block_real[ii * freqnum + jj] = echo_temp.get()[ii * freqnum + jj].real();
h_echo_block_imag[ii * freqnum + jj] = echo_temp.get()[ii * freqnum + jj].imag();
}
}
HostToDevice(h_echo_block_real, d_echo_block_real, sizeof(float) * PRF_len* freqnum);
HostToDevice(h_echo_block_imag, d_echo_block_imag, sizeof(float) * PRF_len* freqnum);
HostToDevice(h_echo_block_real, d_echo_block_real, sizeof(float) * PRF_len * freqnum);
HostToDevice(h_echo_block_imag, d_echo_block_imag, sizeof(float) * PRF_len * freqnum);
for (startline = 0; startline < demRow; startline = startline + blockline) {
@ -804,12 +804,12 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
Eigen::MatrixXd demsloper_z = demsloperxyz.getData(startline, 0, blockline, demCol, 3);
Eigen::MatrixXd landcover = demlandcls.getData(startline, 0, blockline, demCol, 1);
long temp_dem_row = dem_x.rows();
long temp_dem_row = dem_x.rows();
long temp_dem_col = dem_x.cols();
long temp_dem_count = dem_x.count();
// 更新数据格式
for (long i = 0; i < temp_dem_row; i++) {
for (long j = 0; j < temp_dem_col; j++) {
@ -819,32 +819,32 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
h_demsloper_x[i * temp_dem_col + j] = double(demsloper_x(i, j));
h_demsloper_y[i * temp_dem_col + j] = double(demsloper_y(i, j));
h_demsloper_z[i * temp_dem_col + j] = double(demsloper_z(i, j));
h_demcls[i * temp_dem_col + j] = clamap[long(landcover(i, j))];
}
}
qDebug() << "Start PRF: " << sprfid << "\t-\t" << sprfid + PRF_len << "\t:copy target data ("<< startline<<" - "<< startline + blockline << ") host -> GPU";
HostToDevice(h_dem_x, d_dem_x , sizeof(double) * blockline * demCol);
HostToDevice(h_dem_y, d_dem_y , sizeof(double) * blockline * demCol);
HostToDevice(h_dem_z, d_dem_z , sizeof(double) * blockline * demCol);
HostToDevice(h_demsloper_x, d_demsloper_x , sizeof(double) * blockline * demCol);
HostToDevice(h_demsloper_y, d_demsloper_y , sizeof(double) * blockline * demCol);
HostToDevice(h_demsloper_z, d_demsloper_z , sizeof(double) * blockline * demCol);
HostToDevice(h_demcls, d_demcls ,sizeof(long)* blockline* demCol);
qDebug() << "Start PRF: " << sprfid << "\t-\t" << sprfid + PRF_len << "\t:copy target data (" << startline << " - " << startline + blockline << ") host -> GPU";
HostToDevice(h_dem_x, d_dem_x, sizeof(double) * blockline * demCol);
HostToDevice(h_dem_y, d_dem_y, sizeof(double) * blockline * demCol);
HostToDevice(h_dem_z, d_dem_z, sizeof(double) * blockline * demCol);
HostToDevice(h_demsloper_x, d_demsloper_x, sizeof(double) * blockline * demCol);
HostToDevice(h_demsloper_y, d_demsloper_y, sizeof(double) * blockline * demCol);
HostToDevice(h_demsloper_z, d_demsloper_z, sizeof(double) * blockline * demCol);
HostToDevice(h_demcls, d_demcls, sizeof(long) * blockline * demCol);
// 分块处理
qDebug() << "Start PRF: " << sprfid << "\t-\t" << sprfid + PRF_len << "\t:GPU Computer target data (" << startline << "-" << startline + blockline << ")";
CUDA_RFPC_MainProcess(
antptrlist->d_antpx, antptrlist->d_antpy, antptrlist->d_antpz,
antptrlist->d_antXaxisX, antptrlist->d_antXaxisY, antptrlist->d_antXaxisZ, // 天线坐标系的X轴
antptrlist->d_antYaxisX, antptrlist->d_antYaxisY, antptrlist->d_antYaxisZ,// 天线坐标系的Y轴
antptrlist->d_antZaxisX, antptrlist->d_antZaxisY, antptrlist->d_antZaxisZ,// 天线坐标系的Z轴
antptrlist->d_antdirectx, antptrlist->d_antdirecty, antptrlist->d_antdirectz,// 天线的指向
antptrlist->d_antpx, antptrlist->d_antpy, antptrlist->d_antpz,
antptrlist->d_antXaxisX, antptrlist->d_antXaxisY, antptrlist->d_antXaxisZ, // 天线坐标系的X轴
antptrlist->d_antYaxisX, antptrlist->d_antYaxisY, antptrlist->d_antYaxisZ,// 天线坐标系的Y轴
antptrlist->d_antZaxisX, antptrlist->d_antZaxisY, antptrlist->d_antZaxisZ,// 天线坐标系的Z轴
antptrlist->d_antdirectx, antptrlist->d_antdirecty, antptrlist->d_antdirectz,// 天线的指向
PRF_len, freqnum,
f0,dfreq,
Pt,
f0, dfreq,
Pt,
refphaseRange,
// 天线方向图
d_TantPattern,
@ -861,7 +861,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
d_temp_R, d_temp_amp
);
PRINT("dem : %d ~ %d / %d , echo: %d ~ %d / %d \n", startline, startline+ temp_dem_row, demRow, sprfid, sprfid+ PRF_len, PRFCount);
PRINT("dem : %d ~ %d / %d , echo: %d ~ %d / %d \n", startline, startline + temp_dem_row, demRow, sprfid, sprfid + PRF_len, PRFCount);
}
#if (defined __PRFDEBUG__) && (defined __PRFDEBUG_PRFINF__)
@ -888,7 +888,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
}
this->EchoSimulationData->saveEchoArr(echo_temp, sprfid, PRF_len);
}
@ -920,7 +920,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
FreeCUDADevice(d_demcls);
FreeCUDADevice(d_echo_block_real);
FreeCUDADevice(d_echo_block_imag);
FreeCUDADevice(d_temp_R);
FreeCUDADevice(d_temp_amp);
@ -929,19 +929,19 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
{
int num_devices=0;
int num_devices = 0;
cudaGetDeviceCount(&num_devices);
PRINT("GPU Count : %d \n", num_devices);
long prfcount = this->EchoSimulationData->getPluseCount();
size_t prfblockcount = (prfcount + num_devices +2- 1) / num_devices;
size_t prfblockcount = (prfcount + num_devices + 2 - 1) / num_devices;
PRINT("PRF COUNT : %d , child PRF COUNT: %d\n", prfcount, prfblockcount);
double prf_time = 0;
double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔
bool antflag = true; // 计算天线方向图
long double imageStarttime = this->TaskSetting->getSARImageStartTime();
std::shared_ptr<SatelliteOribtNode[]> sateOirbtNodes = this->getSatelliteOribtNodes(prf_time, dt, antflag, imageStarttime);
@ -961,22 +961,22 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount, int devId)
{
PRINT("dev ID:%d,start PRF ID: %d , PRF COUNT: %d \n", devId,startprfid,prfcount);
PRINT("dev ID:%d,start PRF ID: %d , PRF COUNT: %d \n", devId, startprfid, prfcount);
/// 显存不限制
cudaSetDevice(devId); // 确保当前线程操作指定的GPU设备
POLARTYPEENUM polartype = this->TaskSetting->getPolarType();
std::map<long, SigmaParam> clssigmaParamsDict = this->SigmaDatabasePtr->getsigmaParams(polartype);;
std::map<long, CUDASigmaParam> clsCUDASigmaParamsDict;
for (const auto& pair : clssigmaParamsDict) {
clsCUDASigmaParamsDict.insert(std::pair<long, CUDASigmaParam>(pair.first,
CUDASigmaParam{
clsCUDASigmaParamsDict.insert(std::pair<long, CUDASigmaParam>(pair.first,
CUDASigmaParam{
float(pair.second.p1),
float(pair.second.p2),
float(pair.second.p3),
float(pair.second.p4),
float(pair.second.p5),
float(pair.second.p6)
float(pair.second.p6)
}));
printf("clsid:%d, params: %e,%e,%e,%e,%e,%e \n", pair.first,
float(pair.second.p1),
@ -993,147 +993,156 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
gdalImage demxyz(this->demxyzPath);// 地面点坐标
gdalImage demlandcls(this->LandCoverPath);// 地表覆盖类型
gdalImage slpxyz(this->demsloperPath);// 地面坡向
// 处理地面坐标
long demRow = demxyz.height;
long demCol = demxyz.width;
size_t demCount = size_t(demRow) * size_t(demCol);
std::shared_ptr<double> demX = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demY = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demZ = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpX = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpY = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpZ = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<long> clsArr = readDataArr<long>(demlandcls, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
long allDemRow = Memory1MB/demxyz.width/8/3*6000;
//allDemRow = allDemRow < demxyz.height ? allDemRow : demxyz.height;
for(long demId=0;demId< demxyz.height;demId=demId+ allDemRow){
PRINT("dem cover processbar: [%f precent]\n", demId * 100.0 / demxyz.height);
long demRow = allDemRow;
demRow = demRow + demId < demxyz.height ? demRow : demxyz.height - demId;
long demCol = demxyz.width;
long long demCount = long long(demRow) * long long(demCol);
// 检索类别数量
std::map<long, size_t> clsCountDict;
for (const auto& pair : clssigmaParamsDict) {
clsCountDict.insert(std::pair<long, size_t>(pair.first, 0));
}
std::shared_ptr<double> demX = readDataArr<double>(demxyz, demId, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demY = readDataArr<double>(demxyz, demId, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demZ = readDataArr<double>(demxyz, demId, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpX = readDataArr<double>(slpxyz, demId, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpY = readDataArr<double>(slpxyz, demId, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpZ = readDataArr<double>(slpxyz, demId, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<long> clsArr = readDataArr<long>(demlandcls, demId, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
PRINT("demRow: %d , demCol:%d \n", demRow, demCol);
for (size_t i = 0; i < demCount; i++) {
long clsid = clsArr.get()[i];
if (clsCountDict.find(clsid) != clsCountDict.end()) {
clsCountDict[clsid] = clsCountDict[clsid] + 1;
}
}
std::map<long, std::shared_ptr<GoalState>> clsGoalStateDict;
for (const auto& pair : clsCountDict) {
if (pair.second > 0) {
clsGoalStateDict.insert(
std::pair<long, std::shared_ptr<GoalState>>(
pair.first,
std::shared_ptr<GoalState>((GoalState*)mallocCUDAHost(sizeof(GoalState) * pair.second), FreeCUDAHost)));
PRINT("clsid : %d ,Count: %d\n", pair.first, pair.second);
}
}
// 分块处理大小
size_t blocksize = 1000;
std::map<long, size_t> clsCountDictTemp;
for (const auto& pair : clsCountDict) {
clsCountDictTemp.insert(std::pair<long, size_t>(pair.first, pair.second));
}
double sumdemx = 0;
for (long i = 0; i < demCount; i++) {
sumdemx= sumdemx+demX.get()[i];
}
for (long i = 0; i < demCount; i++) {
long clsid = clsArr.get()[i];
size_t Currentclscount = clsCountDictTemp[clsid];
size_t allclscount = clsCountDict[clsid];
if (clsGoalStateDict.find(clsid) == clsGoalStateDict.end()) {
continue;
// 检索类别数量
std::map<long, size_t> clsCountDict;
for (const auto& pair : clssigmaParamsDict) {
clsCountDict.insert(std::pair<long, size_t>(pair.first, 0));
}
clsGoalStateDict[clsid].get()[Currentclscount - allclscount];
for (long long i = 0; i < demCount; i++) {
long clsid = clsArr.get()[i];
if (clsCountDict.find(clsid) != clsCountDict.end()) {
clsCountDict[clsid] = clsCountDict[clsid] + 1;
}
}
clsGoalStateDict[clsid].get()[allclscount- Currentclscount].Tx = demX.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].Ty = demY.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].Tz = demZ.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].TsX = slpX.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].TsY = slpY.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].TsZ = slpZ.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].cls = clsArr.get()[i];
clsCountDictTemp[clsid] = clsCountDictTemp[clsid] - 1;
}
std::map<long, std::shared_ptr<GoalState>> clsGoalStateDict;
for (const auto& pair : clsCountDict) {
if (pair.second > 0) {
clsGoalStateDict.insert(
std::pair<long, std::shared_ptr<GoalState>>(
pair.first,
std::shared_ptr<GoalState>((GoalState*)mallocCUDAHost(sizeof(GoalState) * pair.second), FreeCUDAHost)));
PRINT("clsid : %d ,Count: %d\n", pair.first, pair.second);
}
}
// 分块处理大小
size_t blocksize = 1000;
std::map<long, size_t> clsCountDictTemp;
for (const auto& pair : clsCountDict) {
clsCountDictTemp.insert(std::pair<long, size_t>(pair.first, pair.second));
}
double sumdemx = 0;
for (long i = 0; i < demCount; i++) {
sumdemx = sumdemx + demX.get()[i];
}
RFPCTask task;
// 参数声明
task.freqNum = this->EchoSimulationData->getPlusePoints();
task.prfNum = prfcount;
task.Rref = this->EchoSimulationData->getRefPhaseRange();
task.Rnear = this->EchoSimulationData->getNearRange();
task.Rfar = this->EchoSimulationData->getFarRange();
task.Pt = this->TaskSetting->getPt();
task.startFreq = this->EchoSimulationData->getCenterFreq() - this->EchoSimulationData->getBandwidth() / 2;
task.stepFreq = this->EchoSimulationData->getBandwidth() / (task.freqNum - 1);
task.d_echoData = (cuComplex*)mallocCUDADevice(prfcount * task.freqNum * sizeof(cuComplex), devId);
for (long i = 0; i < demCount; i++) {
long clsid = clsArr.get()[i];
size_t Currentclscount = clsCountDictTemp[clsid];
size_t allclscount = clsCountDict[clsid];
PRINT("Dev:%d ,freqnum%d , prfnum:%d ,Rref: %e ,Rnear : %e ,Rfar: %e , StartFreq: %e ,DeletFreq: %e \n",
devId,task.freqNum,task.prfNum,task.Rref,task.Rnear,task.Rfar,task.startFreq,task.stepFreq);
if (clsGoalStateDict.find(clsid) == clsGoalStateDict.end()) {
continue;
}
// 天线位置
{
std::shared_ptr<SatelliteAntPos> antplise = this->EchoSimulationData->getAntPosVelc();
std::shared_ptr<SateState> h_antlist((SateState*)mallocCUDAHost(prfcount * sizeof(SateState)), FreeCUDAHost);
clsGoalStateDict[clsid].get()[Currentclscount - allclscount];
for (long i = 0; i < prfcount; i++) {
h_antlist.get()[i].Px = antplise.get()[i + startprfid].Px;
h_antlist.get()[i].Py = antplise.get()[i + startprfid].Py;
h_antlist.get()[i].Pz = antplise.get()[i + startprfid].Pz;
h_antlist.get()[i].Vx = antplise.get()[i + startprfid].Vx;
h_antlist.get()[i].Vy = antplise.get()[i + startprfid].Vy;
h_antlist.get()[i].Vz = antplise.get()[i + startprfid].Vz;
h_antlist.get()[i].antDirectX = antplise.get()[i + startprfid].AntDirectX;
h_antlist.get()[i].antDirectY = antplise.get()[i + startprfid].AntDirectY;
h_antlist.get()[i].antDirectZ = antplise.get()[i + startprfid].AntDirectZ;
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].Tx = demX.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].Ty = demY.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].Tz = demZ.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].TsX = slpX.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].TsY = slpY.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].TsZ = slpZ.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].cls = clsArr.get()[i];
clsCountDictTemp[clsid] = clsCountDictTemp[clsid] - 1;
}
RFPCTask task;
// 参数声明
task.freqNum = this->EchoSimulationData->getPlusePoints();
task.prfNum = prfcount;
task.Rref = this->EchoSimulationData->getRefPhaseRange();
task.Rnear = this->EchoSimulationData->getNearRange();
task.Rfar = this->EchoSimulationData->getFarRange();
task.Pt = this->TaskSetting->getPt();
task.startFreq = this->EchoSimulationData->getCenterFreq() - this->EchoSimulationData->getBandwidth() / 2;
task.stepFreq = this->EchoSimulationData->getBandwidth() / (task.freqNum - 1);
task.d_echoData = (cuComplex*)mallocCUDADevice(prfcount * task.freqNum * sizeof(cuComplex), devId);
PRINT("Dev:%d ,freqnum%d , prfnum:%d ,Rref: %e ,Rnear : %e ,Rfar: %e , StartFreq: %e ,DeletFreq: %e \n",
devId, task.freqNum, task.prfNum, task.Rref, task.Rnear, task.Rfar, task.startFreq, task.stepFreq);
// 天线位置
{
std::shared_ptr<SatelliteAntPos> antplise = this->EchoSimulationData->getAntPosVelc();
std::shared_ptr<SateState> h_antlist((SateState*)mallocCUDAHost(prfcount * sizeof(SateState)), FreeCUDAHost);
for (long i = 0; i < prfcount; i++) {
h_antlist.get()[i].Px = antplise.get()[i + startprfid].Px;
h_antlist.get()[i].Py = antplise.get()[i + startprfid].Py;
h_antlist.get()[i].Pz = antplise.get()[i + startprfid].Pz;
h_antlist.get()[i].Vx = antplise.get()[i + startprfid].Vx;
h_antlist.get()[i].Vy = antplise.get()[i + startprfid].Vy;
h_antlist.get()[i].Vz = antplise.get()[i + startprfid].Vz;
h_antlist.get()[i].antDirectX = antplise.get()[i + startprfid].AntDirectX;
h_antlist.get()[i].antDirectY = antplise.get()[i + startprfid].AntDirectY;
h_antlist.get()[i].antDirectZ = antplise.get()[i + startprfid].AntDirectZ;
}
task.antlist = (SateState*)mallocCUDADevice(prfcount * sizeof(SateState), devId);
HostToDevice(h_antlist.get(), task.antlist, sizeof(SateState) * prfcount);
}
task.antlist = (SateState*)mallocCUDADevice(prfcount * sizeof(SateState), devId);
HostToDevice(h_antlist.get(), task.antlist, sizeof(SateState) * prfcount);
// 分块计算
for (const auto& pair : clsGoalStateDict) {
long clsid = pair.first;
size_t clscount = clsCountDict[clsid];
PRINT("Process Class ID : %d , Count: %d Device: %d\n", clsid, clscount,devId);
task.targetnum = clscount;
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
task.sigma0_cls = clsCUDASigmaParamsDict[clsid];
ProcessRFPCTask(task, devId);
FreeCUDADevice(task.goallist);
}
this->SaveBlockSimulationEchoArr(task.d_echoData, prfcount, task.freqNum, startprfid);
FreeCUDADevice(task.d_echoData);
FreeCUDADevice(task.antlist);
//FreeCUDADevice(task.goallist);
}
// 分块计算
for (const auto& pair : clsGoalStateDict) {
long clsid = pair.first;
size_t clscount = clsCountDict[clsid];
PRINT("Process Class ID : %d , Count: %d\n", clsid, clscount);
task.targetnum = clscount;
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
task.sigma0_cls = clsCUDASigmaParamsDict[clsid];
ProcessRFPCTask(task,devId);
FreeCUDADevice(task.goallist);
}
this->SaveBlockSimulationEchoArr(task.d_echoData, prfcount, task.freqNum, startprfid);
FreeCUDADevice(task.d_echoData);
FreeCUDADevice(task.antlist);
//FreeCUDADevice(task.goallist);
PRINT("dem cover processbar: [100 precent]\n");
return ErrorCode::SUCCESS;
}
ErrorCode RFPCProcessCls::SaveBlockSimulationEchoArr(cuComplex* d_echoData,size_t prfcount,size_t freqNum,long startprfid)
ErrorCode RFPCProcessCls::SaveBlockSimulationEchoArr(cuComplex* d_echoData, size_t prfcount, size_t freqNum, long startprfid)
{
// 文件读写
@ -1143,18 +1152,18 @@ ErrorCode RFPCProcessCls::SaveBlockSimulationEchoArr(cuComplex* d_echoData,size_
cuComplex* h_echoData = (cuComplex*)mallocCUDAHost(prfcount * freqNum * sizeof(cuComplex));
DeviceToHost(h_echoData, d_echoData, prfcount * freqNum * sizeof(cuComplex));
DeviceToHost(h_echoData, d_echoData, prfcount * freqNum * sizeof(cuComplex));
long prfcount_read = prfcount;
std::shared_ptr<std::complex<double>> fileEchoArr = this->EchoSimulationData->getEchoArr(startprfid, prfcount_read);
for (size_t i = 0; i < prfcount; i++) {
for (size_t j = 0; j < freqNum; j++) {
for (size_t j = 0; j < freqNum; j++) {
std::complex<double> temp = fileEchoArr.get()[i * freqNum + j];
fileEchoArr.get()[i * freqNum + j] = std::complex<double>(
temp.real() + h_echoData[i * freqNum + j].x,
temp.imag() + h_echoData[i * freqNum + j].y
);
temp.real() + h_echoData[i * freqNum + j].x,
temp.imag() + h_echoData[i * freqNum + j].y
);
}
}