调试回波仿真

pull/14/head
陈增辉 2025-03-31 02:14:22 +08:00
parent 1b373e6de1
commit 997471cba1
5 changed files with 233 additions and 293 deletions

View File

@ -1153,7 +1153,7 @@ int ResampleGDAL(const char* pszSrcFile, const char* pszOutFile, double* gt, int
GDALWarpOptions* psWo = GDALCreateWarpOptions(); GDALWarpOptions* psWo = GDALCreateWarpOptions();
CPLSetConfigOption("GDAL_NUM_THREADS", "ALL_CPUS"); // 使用所有可用的CPU核心 CPLSetConfigOption("GDAL_NUM_THREADS", "ALL_CPUS"); // 使用所有可用的CPU核心
CPLSetConfigOption("GDAL_CACHEMAX", "16000"); // 设置缓存大小为500MB CPLSetConfigOption("GDAL_CACHEMAX", "4000"); // 设置缓存大小为500MB
// psWo->papszWarpOptions = CSLDuplicate(NULL); // psWo->papszWarpOptions = CSLDuplicate(NULL);
psWo->eWorkingDataType = dataType; psWo->eWorkingDataType = dataType;
psWo->eResampleAlg = eResample; psWo->eResampleAlg = eResample;

View File

@ -110,7 +110,7 @@ __global__ void processPulseKernel(
im_final[idx].x += phCorr.x; im_final[idx].x += phCorr.x;
im_final[idx].y += phCorr.y; im_final[idx].y += phCorr.y;
//printf("r_start=%e;dr=%e;nR=%d\n", r_start, dr, nR); //printf("r_start=%e;dr=%e;nR=%d\n", r_start, dr, nR);
if (abs(phCorr.x) > 1e-100 || abs(phCorr.y > 1e-100)) { //if (abs(phCorr.x) > 1e-100 || abs(phCorr.y > 1e-100)) {
//printf( //printf(
// "[DEBUG] prfid=%-4ld | idx=%-8lld\n" // "[DEBUG] prfid=%-4ld | idx=%-8lld\n"
// " Ant: X=%-18.10e Y=%-18.10e Z=%-18.10e\n" // " Ant: X=%-18.10e Y=%-18.10e Z=%-18.10e\n"
@ -133,7 +133,7 @@ __global__ void processPulseKernel(
// phCorr.x, phCorr.y, // phCorr.x, phCorr.y,
// im_final[idx].x, im_final[idx].y // im_final[idx].x, im_final[idx].y
//); //);
} //}
} }
void bpBasic0CUDA(GPUDATA& data, int flag,double* h_R) { void bpBasic0CUDA(GPUDATA& data, int flag,double* h_R) {

View File

@ -519,12 +519,12 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern(
RstY = RstY / RstR; RstY = RstY / RstR;
RstZ = RstZ / RstR; RstZ = RstZ / RstR;
double slopeX = gp.TsX; float slopeX = gp.TsX;
double slopeY = gp.TsY; float slopeY = gp.TsY;
double slopeZ = gp.TsZ; float slopeZ = gp.TsZ;
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); // float slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
if (abs(slopR - 0) > 1e-3) { if (slopR > 1e-3) {
float localangle = acosf((RstX * slopeX + RstY * slopeY + RstZ * slopeZ) / ( slopR)); float localangle = acosf((RstX * slopeX + RstY * slopeY + RstZ * slopeZ) / ( slopR));
@ -553,10 +553,11 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern(
ampGain=2 * maxGain * (1 - (powf(diectAngle,2) / 6) ampGain=2 * maxGain * (1 - (powf(diectAngle,2) / 6)
+ (powf(diectAngle, 4) / 120) + (powf(diectAngle, 4) / 120)
- (powf(diectAngle, 6) / 5040)); //dB - (powf(diectAngle, 6) / 5040)); //dB
ampGain = powf(10.0, ampGain / 10.0); ampGain = powf(10.0, ampGain / 10.0);
ampGain = ampGain / (PI4POW2 * powf(RstR, 4)); // 反射强度 ampGain = ampGain / (PI4POW2 * powf(RstR, 4)); // 反射强度
double sigma = GPU_getSigma0dB(sigma0Params, localangle); float sigma = GPU_getSigma0dB(sigma0Params, localangle);
sigma = powf(10.0, sigma / 10.0); sigma = powf(10.0, sigma / 10.0);
double temp_amp = double(ampGain * Pt * sigma); double temp_amp = double(ampGain * Pt * sigma);
@ -573,76 +574,6 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern(
} }
} }
__global__ void CUDA_Kernel_Computer_echo_NoAntPattern(
double* d_temp_R, double* d_temp_amps, long posNum,
double f0, double dfreq,
long FreqPoints, // 当前频率的分块
long maxfreqnum, // 最大脉冲值
cuComplex* echodata,
long temp_PRF_Count
) {
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
long long tid = threadIdx.x;
long long bid = blockIdx.x;
long long idx = bid * blockDim.x + tid;
long long prfId = idx / FreqPoints; // 脉冲ID
long long fId = idx % FreqPoints;//频率ID
long long psid = 0;
long long pixelId = 0;
for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF
psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii;
pixelId = prfId * posNum + psid; //
if (psid < posNum) {
s_R[psid] = d_temp_R[pixelId];
s_amp[psid] = d_temp_amps[pixelId];
}
else {
s_R[psid] = 0;
s_amp[psid] = 0;
}
}
__syncthreads(); // 确定所有待处理数据都已经进入程序中
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
cuComplex echo = make_cuComplex(0, 0);
float temp_phi = 0;
float temp_amp = 0;
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
temp_phi = s_R[dataid] * factorjTemp;
temp_amp = s_amp[dataid];
echo.x += (temp_amp * cosf(temp_phi));
echo.y += (temp_amp * sinf(temp_phi));
//if (dataid > 5000) {
// printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]);
//}
//if (isnan(temp_phi) || isnan(temp_amp) || isnan(echo.x) || isnan(echo.y)
// || isinf(temp_phi) || isinf(temp_amp) || isinf(echo.x) || isinf(echo.y)
// ) {
// printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, echo.x, echo.y);
//}
}
echodata[echo_ID] = cuCaddf(echodata[echo_ID], echo);
}
}
__global__ void CUDA_Kernel_Computer_echo_NoAntPattern_Optimized( __global__ void CUDA_Kernel_Computer_echo_NoAntPattern_Optimized(
double* d_temp_R, double* d_temp_amps, long posNum, double* d_temp_R, double* d_temp_amps, long posNum,
double f0, double dfreq, double f0, double dfreq,
@ -652,15 +583,15 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern_Optimized(
long temp_PRF_Count long temp_PRF_Count
) { ) {
// 使用动态共享内存,根据线程块大小调整 // 使用动态共享内存,根据线程块大小调整
extern __shared__ float s_data[]; extern __shared__ double s_data[];
float* s_R = s_data; double* s_R = s_data;
float* s_amp = s_data + blockDim.x; double* s_amp = s_data + blockDim.x;
const int tid = threadIdx.x; const int tid = threadIdx.x;
const int prfId = blockIdx.x; const int prfId = blockIdx.x;
const int fId = tid; // 每个线程处理一个频率点 const int fId = tid; // 每个线程处理一个频率点
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq); double factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
cuComplex echo = make_cuComplex(0.0f, 0.0f); cuComplex echo = make_cuComplex(0.0f, 0.0f);
// 分块加载数据并计算 // 分块加载数据并计算
@ -670,8 +601,8 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern_Optimized(
// 加载当前块到共享内存 // 加载当前块到共享内存
if (psid < posNum) { if (psid < posNum) {
s_R[tid] = static_cast<float>(d_temp_R[pixelId]); s_R[tid] = static_cast<double>(d_temp_R[pixelId]);
s_amp[tid] = static_cast<float>(d_temp_amps[pixelId]); s_amp[tid] = static_cast<double>(d_temp_amps[pixelId]);
} }
else { else {
s_R[tid] = 0.0f; s_R[tid] = 0.0f;
@ -681,7 +612,7 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern_Optimized(
// 计算当前块的贡献 // 计算当前块的贡献
for (int dataid = 0; dataid < blockDim.x; ++dataid) { for (int dataid = 0; dataid < blockDim.x; ++dataid) {
float temp_phi = s_R[dataid] * factorjTemp; float temp_phi =fmod( s_R[dataid] * factorjTemp,2*PI);
float temp_amp = s_amp[dataid]; float temp_amp = s_amp[dataid];
float sin_phi, cos_phi; float sin_phi, cos_phi;
sincosf(temp_phi, &sin_phi, &cos_phi); sincosf(temp_phi, &sin_phi, &cos_phi);
@ -743,7 +674,7 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
dim3 blocks(task.prfNum); dim3 blocks(task.prfNum);
dim3 threads(BLOCK_SIZE); dim3 threads(BLOCK_SIZE);
size_t shared_mem_size = 2 * BLOCK_SIZE * sizeof(float); size_t shared_mem_size = 2 * BLOCK_SIZE * sizeof(double);
CUDA_Kernel_Computer_echo_NoAntPattern_Optimized << <blocks, threads, shared_mem_size >> > ( CUDA_Kernel_Computer_echo_NoAntPattern_Optimized << <blocks, threads, shared_mem_size >> > (
d_R, d_amps, SHAREMEMORY_FLOAT_HALF, d_R, d_amps, SHAREMEMORY_FLOAT_HALF,
@ -761,11 +692,11 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
// task.d_echoData, // task.d_echoData,
// task.prfNum // task.prfNum
// ); // );
//PrintLasterError("CUDA_Kernel_Computer_echo"); PrintLasterError("CUDA_Kernel_Computer_echo");
cudaDeviceSynchronize(); cudaDeviceSynchronize();
if ((sTi * 100.0 / task.targetnum) - process >= 1) { if ((sTi * 100.0 / task.targetnum) - process >= 10) {
process = sTi * 100.0 / task.targetnum; process = sTi * 100.0 / task.targetnum;
PRINT("TargetID [%f]: %d / %d finished %d\n", sTi * 100.0 / task.targetnum, sTi, task.targetnum,devid); PRINT("device ID : %d , TargetID [%f]: %d / %d finished %d\n",devid, sTi * 100.0 / task.targetnum, sTi, task.targetnum,devid);
} }
} }

View File

@ -91,7 +91,7 @@ extern "C" struct RFPCTask
cuComplex* d_echoData = nullptr; // »Ø²¨ cuComplex* d_echoData = nullptr; // »Ø²¨
CUDASigmaParam sigma0_cls; CUDASigmaParam sigma0_cls;
double maxGain=48; double maxGain=48;
double GainWeight=20; // 2śČˇśÎ§ double GainWeight=10; // 2śČˇśÎ§
size_t targetnum; size_t targetnum;

View File

@ -339,7 +339,7 @@ ErrorCode RFPCProcessCls::InitParams()
this->PlusePoint = freqnum;// ceil((this->TaskSetting->getFarRange() - this->TaskSetting->getNearRange()) / LIGHTSPEED * 2 * this->TaskSetting->getBandWidth()); this->PlusePoint = freqnum;// ceil((this->TaskSetting->getFarRange() - this->TaskSetting->getNearRange()) / LIGHTSPEED * 2 * this->TaskSetting->getBandWidth());
this->TaskSetting->setFarRange(this->TaskSetting->getNearRange() + (this->PlusePoint-1) * drange); this->TaskSetting->setFarRange(this->TaskSetting->getNearRange() + (this->PlusePoint - 1) * drange);
//ceil(rangeTimeSample * this->TaskSetting->getFs()); //ceil(rangeTimeSample * this->TaskSetting->getFs());
@ -456,7 +456,7 @@ std::shared_ptr<SatelliteOribtNode[]> RFPCProcessCls::getSatelliteOribtNodes(dou
void RFPCProcessMain(long num_thread, void RFPCProcessMain(long num_thread,
QString TansformPatternFilePath, QString ReceivePatternFilePath, QString TansformPatternFilePath, QString ReceivePatternFilePath,
QString simulationtaskName, QString OutEchoPath, QString simulationtaskName, QString OutEchoPath,
QString GPSXmlPath, QString TaskXmlPath,QString demTiffPath, QString sloperPath, QString LandCoverPath) QString GPSXmlPath, QString TaskXmlPath, QString demTiffPath, QString sloperPath, QString LandCoverPath)
{ {
std::shared_ptr < AbstractSARSatelliteModel> task = ReadSimulationSettingsXML(TaskXmlPath); std::shared_ptr < AbstractSARSatelliteModel> task = ReadSimulationSettingsXML(TaskXmlPath);
@ -676,7 +676,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
long startline = 0; long startline = 0;
{ {
long blokline = getBlockRows(2e4, demCol, sizeof(double),demRow); long blokline = getBlockRows(2e4, demCol, sizeof(double), demRow);
for (startline = 0; startline < demRow; startline = startline + blokline) { for (startline = 0; startline < demRow; startline = startline + blokline) {
Eigen::MatrixXd clsland = demlandcls.getData(startline, 0, blokline, demlandcls.width, 1); Eigen::MatrixXd clsland = demlandcls.getData(startline, 0, blokline, demlandcls.width, 1);
long clsrows = clsland.rows(); long clsrows = clsland.rows();
@ -694,9 +694,9 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
} }
} }
qDebug() << "class id recoding" ; qDebug() << "class id recoding";
for (long id : clamap.keys()) { for (long id : clamap.keys()) {
qDebug() << id << " -> " << clamap[id] ; qDebug() << id << " -> " << clamap[id];
} }
} }
@ -716,15 +716,15 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
} }
// 打印日志 // 打印日志
qDebug() << "sigma params:" ; qDebug() << "sigma params:";
qDebug() << "classid:\tp1\tp2\tp3\tp4\tp5\tp6" ; qDebug() << "classid:\tp1\tp2\tp3\tp4\tp5\tp6";
for (long ii = 0; ii < clamapid; ii++) { for (long ii = 0; ii < clamapid; ii++) {
qDebug() << ii << ":\t" << h_clsSigmaParam[ii].p1; qDebug() << ii << ":\t" << h_clsSigmaParam[ii].p1;
qDebug() << "\t" << h_clsSigmaParam[ii].p2; qDebug() << "\t" << h_clsSigmaParam[ii].p2;
qDebug() << "\t" << h_clsSigmaParam[ii].p3; qDebug() << "\t" << h_clsSigmaParam[ii].p3;
qDebug() << "\t" << h_clsSigmaParam[ii].p4; qDebug() << "\t" << h_clsSigmaParam[ii].p4;
qDebug() << "\t" << h_clsSigmaParam[ii].p5; qDebug() << "\t" << h_clsSigmaParam[ii].p5;
qDebug() << "\t" << h_clsSigmaParam[ii].p6 ; qDebug() << "\t" << h_clsSigmaParam[ii].p6;
} }
qDebug() << ""; qDebug() << "";
} }
@ -752,11 +752,11 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
double* d_demsloper_y = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol); double* d_demsloper_y = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
double* d_demsloper_z = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol); double* d_demsloper_z = (double*)mallocCUDADevice(sizeof(double) * blockline * demCol);
long* d_demcls = (long*) mallocCUDADevice(sizeof(long) * blockline * demCol); long* d_demcls = (long*)mallocCUDADevice(sizeof(long) * blockline * demCol);
/** 处理回波***************************************************/ /** 处理回波***************************************************/
long echo_block_rows = getBlockRows(1000, freqnum, sizeof(float)*2, PRFCount); long echo_block_rows = getBlockRows(1000, freqnum, sizeof(float) * 2, PRFCount);
float* h_echo_block_real = (float*)mallocCUDAHost(sizeof(float) * echo_block_rows * freqnum); float* h_echo_block_real = (float*)mallocCUDAHost(sizeof(float) * echo_block_rows * freqnum);
float* h_echo_block_imag = (float*)mallocCUDAHost(sizeof(float) * echo_block_rows * freqnum); float* h_echo_block_imag = (float*)mallocCUDAHost(sizeof(float) * echo_block_rows * freqnum);
@ -787,12 +787,12 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
for (long ii = 0; ii < PRF_len; ii++) { for (long ii = 0; ii < PRF_len; ii++) {
for (long jj = 0; jj < freqnum; jj++) { for (long jj = 0; jj < freqnum; jj++) {
h_echo_block_real[ii * freqnum + jj]=echo_temp.get()[ii * freqnum + jj].real(); h_echo_block_real[ii * freqnum + jj] = echo_temp.get()[ii * freqnum + jj].real();
h_echo_block_imag[ii * freqnum + jj]=echo_temp.get()[ii * freqnum + jj].imag(); h_echo_block_imag[ii * freqnum + jj] = echo_temp.get()[ii * freqnum + jj].imag();
} }
} }
HostToDevice(h_echo_block_real, d_echo_block_real, sizeof(float) * PRF_len* freqnum); HostToDevice(h_echo_block_real, d_echo_block_real, sizeof(float) * PRF_len * freqnum);
HostToDevice(h_echo_block_imag, d_echo_block_imag, sizeof(float) * PRF_len* freqnum); HostToDevice(h_echo_block_imag, d_echo_block_imag, sizeof(float) * PRF_len * freqnum);
for (startline = 0; startline < demRow; startline = startline + blockline) { for (startline = 0; startline < demRow; startline = startline + blockline) {
@ -824,14 +824,14 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
} }
} }
qDebug() << "Start PRF: " << sprfid << "\t-\t" << sprfid + PRF_len << "\t:copy target data ("<< startline<<" - "<< startline + blockline << ") host -> GPU"; qDebug() << "Start PRF: " << sprfid << "\t-\t" << sprfid + PRF_len << "\t:copy target data (" << startline << " - " << startline + blockline << ") host -> GPU";
HostToDevice(h_dem_x, d_dem_x , sizeof(double) * blockline * demCol); HostToDevice(h_dem_x, d_dem_x, sizeof(double) * blockline * demCol);
HostToDevice(h_dem_y, d_dem_y , sizeof(double) * blockline * demCol); HostToDevice(h_dem_y, d_dem_y, sizeof(double) * blockline * demCol);
HostToDevice(h_dem_z, d_dem_z , sizeof(double) * blockline * demCol); HostToDevice(h_dem_z, d_dem_z, sizeof(double) * blockline * demCol);
HostToDevice(h_demsloper_x, d_demsloper_x , sizeof(double) * blockline * demCol); HostToDevice(h_demsloper_x, d_demsloper_x, sizeof(double) * blockline * demCol);
HostToDevice(h_demsloper_y, d_demsloper_y , sizeof(double) * blockline * demCol); HostToDevice(h_demsloper_y, d_demsloper_y, sizeof(double) * blockline * demCol);
HostToDevice(h_demsloper_z, d_demsloper_z , sizeof(double) * blockline * demCol); HostToDevice(h_demsloper_z, d_demsloper_z, sizeof(double) * blockline * demCol);
HostToDevice(h_demcls, d_demcls ,sizeof(long)* blockline* demCol); HostToDevice(h_demcls, d_demcls, sizeof(long) * blockline * demCol);
// 分块处理 // 分块处理
@ -843,7 +843,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
antptrlist->d_antZaxisX, antptrlist->d_antZaxisY, antptrlist->d_antZaxisZ,// 天线坐标系的Z轴 antptrlist->d_antZaxisX, antptrlist->d_antZaxisY, antptrlist->d_antZaxisZ,// 天线坐标系的Z轴
antptrlist->d_antdirectx, antptrlist->d_antdirecty, antptrlist->d_antdirectz,// 天线的指向 antptrlist->d_antdirectx, antptrlist->d_antdirecty, antptrlist->d_antdirectz,// 天线的指向
PRF_len, freqnum, PRF_len, freqnum,
f0,dfreq, f0, dfreq,
Pt, Pt,
refphaseRange, refphaseRange,
// 天线方向图 // 天线方向图
@ -861,7 +861,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
d_temp_R, d_temp_amp d_temp_R, d_temp_amp
); );
PRINT("dem : %d ~ %d / %d , echo: %d ~ %d / %d \n", startline, startline+ temp_dem_row, demRow, sprfid, sprfid+ PRF_len, PRFCount); PRINT("dem : %d ~ %d / %d , echo: %d ~ %d / %d \n", startline, startline + temp_dem_row, demRow, sprfid, sprfid + PRF_len, PRFCount);
} }
#if (defined __PRFDEBUG__) && (defined __PRFDEBUG_PRFINF__) #if (defined __PRFDEBUG__) && (defined __PRFDEBUG_PRFINF__)
@ -929,12 +929,12 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU() {
ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern() ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
{ {
int num_devices=0; int num_devices = 0;
cudaGetDeviceCount(&num_devices); cudaGetDeviceCount(&num_devices);
PRINT("GPU Count : %d \n", num_devices); PRINT("GPU Count : %d \n", num_devices);
long prfcount = this->EchoSimulationData->getPluseCount(); long prfcount = this->EchoSimulationData->getPluseCount();
size_t prfblockcount = (prfcount + num_devices +2- 1) / num_devices; size_t prfblockcount = (prfcount + num_devices + 2 - 1) / num_devices;
PRINT("PRF COUNT : %d , child PRF COUNT: %d\n", prfcount, prfblockcount); PRINT("PRF COUNT : %d , child PRF COUNT: %d\n", prfcount, prfblockcount);
double prf_time = 0; double prf_time = 0;
double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔 double dt = 1 / this->TaskSetting->getPRF();// 获取每次脉冲的时间间隔
@ -961,7 +961,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_MultiGPU_NoAntPattern()
ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount, int devId) ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, size_t prfcount, int devId)
{ {
PRINT("dev ID:%d,start PRF ID: %d , PRF COUNT: %d \n", devId,startprfid,prfcount); PRINT("dev ID:%d,start PRF ID: %d , PRF COUNT: %d \n", devId, startprfid, prfcount);
/// 显存不限制 /// 显存不限制
cudaSetDevice(devId); // 确保当前线程操作指定的GPU设备 cudaSetDevice(devId); // 确保当前线程操作指定的GPU设备
@ -993,19 +993,25 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
gdalImage demxyz(this->demxyzPath);// 地面点坐标 gdalImage demxyz(this->demxyzPath);// 地面点坐标
gdalImage demlandcls(this->LandCoverPath);// 地表覆盖类型 gdalImage demlandcls(this->LandCoverPath);// 地表覆盖类型
gdalImage slpxyz(this->demsloperPath);// 地面坡向 gdalImage slpxyz(this->demsloperPath);// 地面坡向
// 处理地面坐标
long demRow = demxyz.height; long allDemRow = Memory1MB/demxyz.width/8/3*6000;
//allDemRow = allDemRow < demxyz.height ? allDemRow : demxyz.height;
for(long demId=0;demId< demxyz.height;demId=demId+ allDemRow){
PRINT("dem cover processbar: [%f precent]\n", demId * 100.0 / demxyz.height);
long demRow = allDemRow;
demRow = demRow + demId < demxyz.height ? demRow : demxyz.height - demId;
long demCol = demxyz.width; long demCol = demxyz.width;
size_t demCount = size_t(demRow) * size_t(demCol); long long demCount = long long(demRow) * long long(demCol);
std::shared_ptr<double> demX = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demY = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demZ = readDataArr<double>(demxyz, 0, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpX = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpY = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpZ = readDataArr<double>(slpxyz, 0, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<long> clsArr = readDataArr<long>(demlandcls, 0, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demX = readDataArr<double>(demxyz, demId, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demY = readDataArr<double>(demxyz, demId, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> demZ = readDataArr<double>(demxyz, demId, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpX = readDataArr<double>(slpxyz, demId, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpY = readDataArr<double>(slpxyz, demId, 0, demRow, demCol, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<double> slpZ = readDataArr<double>(slpxyz, demId, 0, demRow, demCol, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
std::shared_ptr<long> clsArr = readDataArr<long>(demlandcls, demId, 0, demRow, demCol, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD);
PRINT("demRow: %d , demCol:%d \n", demRow, demCol);
// 检索类别数量 // 检索类别数量
std::map<long, size_t> clsCountDict; std::map<long, size_t> clsCountDict;
@ -1013,7 +1019,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
clsCountDict.insert(std::pair<long, size_t>(pair.first, 0)); clsCountDict.insert(std::pair<long, size_t>(pair.first, 0));
} }
for (size_t i = 0; i < demCount; i++) { for (long long i = 0; i < demCount; i++) {
long clsid = clsArr.get()[i]; long clsid = clsArr.get()[i];
if (clsCountDict.find(clsid) != clsCountDict.end()) { if (clsCountDict.find(clsid) != clsCountDict.end()) {
clsCountDict[clsid] = clsCountDict[clsid] + 1; clsCountDict[clsid] = clsCountDict[clsid] + 1;
@ -1041,7 +1047,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
double sumdemx = 0; double sumdemx = 0;
for (long i = 0; i < demCount; i++) { for (long i = 0; i < demCount; i++) {
sumdemx= sumdemx+demX.get()[i]; sumdemx = sumdemx + demX.get()[i];
} }
@ -1058,7 +1064,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
clsGoalStateDict[clsid].get()[Currentclscount - allclscount]; clsGoalStateDict[clsid].get()[Currentclscount - allclscount];
clsGoalStateDict[clsid].get()[allclscount- Currentclscount].Tx = demX.get()[i]; clsGoalStateDict[clsid].get()[allclscount - Currentclscount].Tx = demX.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].Ty = demY.get()[i]; clsGoalStateDict[clsid].get()[allclscount - Currentclscount].Ty = demY.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].Tz = demZ.get()[i]; clsGoalStateDict[clsid].get()[allclscount - Currentclscount].Tz = demZ.get()[i];
clsGoalStateDict[clsid].get()[allclscount - Currentclscount].TsX = slpX.get()[i]; clsGoalStateDict[clsid].get()[allclscount - Currentclscount].TsX = slpX.get()[i];
@ -1083,7 +1089,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
task.d_echoData = (cuComplex*)mallocCUDADevice(prfcount * task.freqNum * sizeof(cuComplex), devId); task.d_echoData = (cuComplex*)mallocCUDADevice(prfcount * task.freqNum * sizeof(cuComplex), devId);
PRINT("Dev:%d ,freqnum%d , prfnum:%d ,Rref: %e ,Rnear : %e ,Rfar: %e , StartFreq: %e ,DeletFreq: %e \n", PRINT("Dev:%d ,freqnum%d , prfnum:%d ,Rref: %e ,Rnear : %e ,Rfar: %e , StartFreq: %e ,DeletFreq: %e \n",
devId,task.freqNum,task.prfNum,task.Rref,task.Rnear,task.Rfar,task.startFreq,task.stepFreq); devId, task.freqNum, task.prfNum, task.Rref, task.Rnear, task.Rfar, task.startFreq, task.stepFreq);
// 天线位置 // 天线位置
{ {
@ -1113,12 +1119,12 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
for (const auto& pair : clsGoalStateDict) { for (const auto& pair : clsGoalStateDict) {
long clsid = pair.first; long clsid = pair.first;
size_t clscount = clsCountDict[clsid]; size_t clscount = clsCountDict[clsid];
PRINT("Process Class ID : %d , Count: %d\n", clsid, clscount); PRINT("Process Class ID : %d , Count: %d Device: %d\n", clsid, clscount,devId);
task.targetnum = clscount; task.targetnum = clscount;
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId); task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount); HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
task.sigma0_cls = clsCUDASigmaParamsDict[clsid]; task.sigma0_cls = clsCUDASigmaParamsDict[clsid];
ProcessRFPCTask(task,devId); ProcessRFPCTask(task, devId);
FreeCUDADevice(task.goallist); FreeCUDADevice(task.goallist);
} }
@ -1130,10 +1136,13 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
FreeCUDADevice(task.antlist); FreeCUDADevice(task.antlist);
//FreeCUDADevice(task.goallist); //FreeCUDADevice(task.goallist);
}
PRINT("dem cover processbar: [100 precent]\n");
return ErrorCode::SUCCESS; return ErrorCode::SUCCESS;
} }
ErrorCode RFPCProcessCls::SaveBlockSimulationEchoArr(cuComplex* d_echoData,size_t prfcount,size_t freqNum,long startprfid) ErrorCode RFPCProcessCls::SaveBlockSimulationEchoArr(cuComplex* d_echoData, size_t prfcount, size_t freqNum, long startprfid)
{ {
// 文件读写 // 文件读写