调整软件计算逻辑

pull/3/head
陈增辉 2025-01-03 01:05:04 +08:00
parent 19dbddd6b5
commit 243af414f2
4 changed files with 87 additions and 192 deletions

View File

@ -549,6 +549,9 @@ __global__ void CUDAKernel_RFPC_Caluation_R_Gain(
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度 ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度
outAmp[idx] = ampGain * Pt * sigma0; outAmp[idx] = ampGain * Pt * sigma0;
outR[idx] = RstR; outR[idx] = RstR;
//if (sigma0 > 0) {
// printf("Amp=%e;localangle=%f;R=%f;sigma0=%e;\n", outAmp[idx], localangle, outR[idx], sigma0);
//}
} }
else { else {
outR[idx] = 0; outR[idx] = 0;
@ -604,30 +607,6 @@ __global__ void CUDARFPCKernel_Target_Freq_EchoData(
__global__ void CUDACkernel_Complex_SUM_reduce_dynamicshared(cuComplex* d_x, cuComplex* d_y, long N)
{
const int tid = threadIdx.x; // 某个block内的线程标号 index
const int bid = blockIdx.x; // 某个block在网格grid内的标号 index
const int n = bid * blockDim.x + tid; // n 是某个线程的标号 index
__shared__ cuComplex s_y[128]; // 分配共享内存空间不同的block都有共享内存变量的副本
s_y[tid] = (n < N) ? d_x[n] : make_cuComplex(0.0, 0.0); // 每个block的共享内存变量副本都用全局内存数组d_x来赋值最后一个多出来的用0
__syncthreads(); // 线程块内部直接同步
for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) // 折半
{
if (tid < offset) // 线程标号的index 不越界 折半
{
s_y[tid] = cuCaddf(s_y[tid], s_y[tid + offset]); // 某个block内的线程做折半规约
}
__syncthreads(); // 同步block内部的线程
}
if (tid == 0) // 某个block只做一次操作
{
d_y[bid] = s_y[0]; // 复制共享内存变量累加的结果到全局内存
}
}
__global__ void CUDAKernel_SumPRF_Temp(cuComplex* d_dem_echo, long plusepoints, long grid_size, cuComplex* d_echo_PRF) { __global__ void CUDAKernel_SumPRF_Temp(cuComplex* d_dem_echo, long plusepoints, long grid_size, cuComplex* d_echo_PRF) {
@ -641,7 +620,28 @@ __global__ void CUDAKernel_SumPRF_Temp(cuComplex* d_dem_echo, long plusepoints,
} }
} }
__global__ void CUDAKernel_PRF_CalFreqEcho(
float* Rarr, float* ampArr, long pixelcount,
float* freqpoints, long freqnum,
cuComplex* PRFEcho, long prfid) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < freqnum) {
float freq = freqpoints[idx];
float factoj = PI * 4 * freq / LIGHTSPEED;
float phi = 0;
float amptemp = 0;
cuComplex tempfreqEcho = PRFEcho[prfid * freqnum + idx];
for (long i = 0; i < pixelcount; i++) { // 区域积分
phi = factoj * Rarr[i]; // 相位
amptemp = ampArr[i];
// 欧拉公式 exp(ix)=cos(x)+isin(x)
// echo=Aexp(ix)=A*cos(x)+i*A*sin(x)
tempfreqEcho.x = tempfreqEcho.x + amptemp * cosf(phi); // 实部
tempfreqEcho.y = tempfreqEcho.y + amptemp * sinf(phi); // 虚部
}
PRFEcho[prfid*freqnum+idx] = tempfreqEcho;
}
}
@ -848,6 +848,7 @@ extern "C" void CUDARFPC_Caluation_R_Gain(
ReceiveAntpattern, ReceiveAntpattern,
Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints, Receivestarttheta, Receivestartphi, Receivedtheta, Receivedphi, Receivethetapoints, Receivephipoints,
NearR, FarR, NearR, FarR,
sigma0Paramslist, sigmaparamslistlen,
outR, outR,
outAmp outAmp
); );
@ -861,77 +862,25 @@ extern "C" void CUDARFPC_Caluation_R_Gain(
} }
#endif // __CUDADEBUG__ #endif // __CUDADEBUG__
cudaDeviceSynchronize(); cudaDeviceSynchronize();
} }
extern "C" void CUDARFPC_Target_Freq_EchoData(float* InR, extern "C" void CUDA_PRF_CalFreqEcho(
float* InlocalAngle, float* Rarr, float* ampArr, long pixelcount,
float* InampGain, float* freqpoints, long freqnum,
long* Indemcls, cuComplex* PRFEcho, long prfid)
long len,
float Pt, float freq,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
cuComplex* OutechoArr
)
{ {
int blockSize = 256; // 每个块的线程数 int blockSize = 256; // 每个块的线程数
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小 int numBlocks = (freqnum + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDARFPCKernel_Target_Freq_EchoData << <numBlocks, blockSize >> > ( CUDAKernel_PRF_CalFreqEcho << <numBlocks, blockSize >> > (
InR, Rarr, ampArr, pixelcount,
InlocalAngle, freqpoints, freqnum,
InampGain, PRFEcho, prfid
Indemcls,
len,
Pt, freq,
sigma0Paramslist, sigmaparamslistlen,
OutechoArr
); );
#ifdef __CUDADEBUG__ #ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) { if (err != cudaSuccess) {
printf("CUDARFPC_Target_Freq_EchoData CUDA Error: %s\n", cudaGetErrorString(err)); printf("CUDA_PRF_CalFreqEcho CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDA_DemEchoSUM_NoMalloc(cuComplex* d_dem_echo, long N,
cuComplex* d_echosum_temp, int grid_size) {
long NUM_REPEATS = 100;
const int smem = sizeof(float) * BLOCK_SIZE;
CUDACkernel_Complex_SUM_reduce_dynamicshared << <grid_size, BLOCK_SIZE, smem >> > (d_dem_echo, d_echosum_temp, N); //归约求和
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDALinearInterp1 CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
#endif // __CUDADEBUG__
cudaDeviceSynchronize();
}
extern "C" void CUDA_SumPRF_Temp(cuComplex* d_dem_echo, long plusepoints, long grid_size, cuComplex* d_echo_PRF)
{
int blockSize = 256; // 每个块的线程数
int numBlocks = (plusepoints + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
// 调用 CUDA 核函数
CUDAKernel_SumPRF_Temp << <numBlocks, blockSize >> > (
d_dem_echo, plusepoints, grid_size, d_echo_PRF
);
#ifdef __CUDADEBUG__
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDARFPC_Target_Freq_EchoData CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue.... // Possibly: exit(-1) if program cannot continue....
} }
#endif // __CUDADEBUG__ #endif // __CUDADEBUG__
@ -939,7 +888,6 @@ extern "C" void CUDA_SumPRF_Temp(cuComplex* d_dem_echo, long plusepoints, long g
} }
#endif #endif

View File

@ -83,27 +83,14 @@ extern "C" void CUDARFPC_Caluation_R_Gain(
); );
// ´´½¨»Ø²¨ extern "C" void CUDA_PRF_CalFreqEcho(
extern "C" void CUDARFPC_Target_Freq_EchoData( float* Rarr,float* amp,long pixelcount,//
float* InR, float* freqpoints,long freqnum,
float* InlocalAngle, cuComplex* PRFEcho,long prfid
float* InampGain,
long* Indemcls, long TargetPixelNumber,
float Pt, float freq,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,// ²åֵͼ
cuComplex* OutechoArr
); );
extern "C" void CUDA_DemEchoSUM_NoMalloc(cuComplex* d_dem_echo, long N,
cuComplex* d_echosum_temp, int grid_size
);
extern "C" void CUDA_SumPRF_Temp(
cuComplex* d_dem_echo, long plusepoints, long grid_size,
cuComplex* d_echo_PRF
);
#endif #endif

View File

@ -17,10 +17,10 @@
#define REDUCE_SCALE 4 #define REDUCE_SCALE 4
// 定义参数 // 定义参数
__device__ cuComplex cuCexpf(cuComplex x) __device__ cuComplex cuCexpf(cuComplex d)
{ {
float factor = exp(x.x); float factor = exp(d.x);
return make_cuComplex(factor * cos(x.y), factor * sin(x.y)); return make_cuComplex(factor * cos(d.y), factor * sin(d.y));
} }
__device__ CUDAVector GPU_VectorAB(CUDAVector A, CUDAVector B) { __device__ CUDAVector GPU_VectorAB(CUDAVector A, CUDAVector B) {

View File

@ -351,11 +351,6 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
double dem_row = 0, dem_col = 0, dem_alt = 0; double dem_row = 0, dem_col = 0, dem_alt = 0;
QVector<double> freqlist = this->TaskSetting->getFreqList(); QVector<double> freqlist = this->TaskSetting->getFreqList();
float* freqpoints=(float*)mallocCUDAHost(sizeof(float)*freqlist.size());
for (long ii = 0; ii < freqlist.size(); ii++) {
freqpoints[ii] = freqlist[ii];
}
long double imageStarttime = 0; long double imageStarttime = 0;
imageStarttime = this->TaskSetting->getSARImageStartTime(); imageStarttime = this->TaskSetting->getSARImageStartTime();
//std::vector<SatelliteOribtNode> sateOirbtNodes(this->PluseCount); //std::vector<SatelliteOribtNode> sateOirbtNodes(this->PluseCount);
@ -506,6 +501,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
CUDASigmaParam* h_clsSigmaParam = (CUDASigmaParam*)mallocCUDAHost(sizeof(CUDASigmaParam) * clamapid); CUDASigmaParam* h_clsSigmaParam = (CUDASigmaParam*)mallocCUDAHost(sizeof(CUDASigmaParam) * clamapid);
CUDASigmaParam* d_clsSigmaParam = (CUDASigmaParam*)mallocCUDADevice(sizeof(CUDASigmaParam) * clamapid); CUDASigmaParam* d_clsSigmaParam = (CUDASigmaParam*)mallocCUDADevice(sizeof(CUDASigmaParam) * clamapid);
{ {
std::map<long, SigmaParam> tempSigmaParam = this->SigmaDatabasePtr->getsigmaParams(polartype); std::map<long, SigmaParam> tempSigmaParam = this->SigmaDatabasePtr->getsigmaParams(polartype);
for (long id : clamap.keys()) { for (long id : clamap.keys()) {
@ -577,26 +573,27 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
d_demsloper_z=(float* )mallocCUDADevice( sizeof(float) * blokline * tempDemCols); d_demsloper_z=(float* )mallocCUDADevice( sizeof(float) * blokline * tempDemCols);
// 提前声明参数变量 // 提前声明参数变量
float* h_R;// 辐射方向 float* h_R=(float* )mallocCUDAHost(sizeof(float) * blokline * tempDemCols);
float* h_localangle;//入射角 float* d_R= (float*)mallocCUDADevice(sizeof(float) * blokline * tempDemCols);
float* d_R;// 辐射方向
float* d_localangle;//入射角
h_R=(float* )mallocCUDAHost(sizeof(float) * blokline * tempDemCols);
h_localangle= (float*)mallocCUDAHost(sizeof(float) * blokline * tempDemCols); // 11
d_R= (float*)mallocCUDADevice(sizeof(float) * blokline * tempDemCols);
d_localangle= (float*)mallocCUDADevice( sizeof(float) * blokline * tempDemCols);
float* h_amp = (float*)mallocCUDAHost( sizeof(float)* blokline* tempDemCols); float* h_amp = (float*)mallocCUDAHost( sizeof(float)* blokline* tempDemCols);
float* d_amp = (float*)mallocCUDADevice( sizeof(float) * blokline * tempDemCols); float* d_amp = (float*)mallocCUDADevice( sizeof(float) * blokline * tempDemCols);
// 回波 // 地面回波
cuComplex* h_echo; cuComplex* h_echo=(cuComplex*)mallocCUDAHost(sizeof(cuComplex) * blokline * tempDemCols);
cuComplex* d_echo; cuComplex* d_echo=(cuComplex*)mallocCUDADevice( sizeof(cuComplex) * blokline * tempDemCols); //19
h_echo=(cuComplex*)mallocCUDAHost(sizeof(cuComplex) * blokline * tempDemCols);
d_echo=(cuComplex*)mallocCUDADevice( sizeof(cuComplex) * blokline * tempDemCols); //19 long echoblockline = Memory1GB / 8 / 2 / PlusePoint*2;
// 每一行的脉冲
cuComplex* h_PRFEcho = (cuComplex*)mallocCUDAHost(sizeof(cuComplex)* echoblockline * PlusePoint);
cuComplex* d_PRFEcho = (cuComplex*)mallocCUDADevice(sizeof(cuComplex)* echoblockline * PlusePoint);
float* h_freqpoints = (float*)mallocCUDAHost(sizeof(float) * freqlist.size());
float* d_freqpoints = (float*)mallocCUDADevice(sizeof(float) * freqlist.size());
for (long ii = 0; ii < freqlist.size(); ii++) {
h_freqpoints[ii] = freqlist[ii];
}
HostToDevice(h_freqpoints, d_freqpoints, sizeof(float) * freqlist.size());
// 地表覆盖类型 // 地表覆盖类型
Eigen::MatrixXd landcover = Eigen::MatrixXd::Zero(blokline, tempDemCols);// 地面覆盖类型 Eigen::MatrixXd landcover = Eigen::MatrixXd::Zero(blokline, tempDemCols);// 地面覆盖类型
@ -615,7 +612,6 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
demsloper_x = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 1); demsloper_x = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 1);
demsloper_y = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 2); demsloper_y = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 2);
demsloper_z = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 3); demsloper_z = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 3);
sloperAngle = demsloperxyz.getData(startline, 0, newblokline, demsloperxyz.width, 4);
landcover = demlandcls.getData(startline, 0, newblokline, demlandcls.width, 1); landcover = demlandcls.getData(startline, 0, newblokline, demlandcls.width, 1);
@ -637,7 +633,6 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
h_demsloper_x = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols); h_demsloper_x = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
h_demsloper_y = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols); h_demsloper_y = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
h_demsloper_z = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols); h_demsloper_z = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
h_demsloper_angle = (float*)mallocCUDAHost(sizeof(float) * blokline * tempDemCols);
h_R = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols); h_R = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
h_amp = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols); h_amp = (float*)mallocCUDAHost(sizeof(float) * newblokline * tempDemCols);
h_echo = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * newblokline * tempDemCols); h_echo = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * newblokline * tempDemCols);
@ -649,7 +644,6 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
d_demsloper_x=(float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols); d_demsloper_x=(float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);
d_demsloper_y=(float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols); d_demsloper_y=(float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);
d_demsloper_z=(float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);//6 d_demsloper_z=(float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);//6
d_demsloper_angle=(float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);//7
d_amp =(float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols); d_amp =(float*)mallocCUDADevice(sizeof(float) * newblokline * tempDemCols);
d_echo=(cuComplex*)mallocCUDADevice(sizeof(cuComplex) * newblokline * tempDemCols); d_echo=(cuComplex*)mallocCUDADevice(sizeof(cuComplex) * newblokline * tempDemCols);
d_demcls = (long*)mallocCUDADevice(sizeof(long) * newblokline * tempDemCols); d_demcls = (long*)mallocCUDADevice(sizeof(long) * newblokline * tempDemCols);
@ -664,7 +658,6 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
h_demsloper_x[i * demxyz.width + j] = float(demsloper_x(i, j)); h_demsloper_x[i * demxyz.width + j] = float(demsloper_x(i, j));
h_demsloper_y[i * demxyz.width + j] = float(demsloper_y(i, j)); h_demsloper_y[i * demxyz.width + j] = float(demsloper_y(i, j));
h_demsloper_z[i * demxyz.width + j] = float(demsloper_z(i, j)); h_demsloper_z[i * demxyz.width + j] = float(demsloper_z(i, j));
h_demsloper_angle[i * demxyz.width + j] = float(sloperAngle(i, j));
h_demcls[i * demxyz.width + j] = clamap[long(landcover(i, j))]; h_demcls[i * demxyz.width + j] = clamap[long(landcover(i, j))];
} }
} }
@ -687,32 +680,18 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
#endif // __PRFDEBUG__ #endif // __PRFDEBUG__
long pixelcount = newblokline * tempDemCols; long pixelcount = newblokline * tempDemCols;
long echoblockline = Memory1GB / 8 / 2 / PlusePoint*2;
long startprfid = 0; long startprfid = 0;
int grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
if ((sizeof(cuComplex) * grid_size * PlusePoint/4+1) > Memory1GB ) {
grid_size =Memory1GB / sizeof(cuComplex) / PlusePoint*4;
}
cuComplex* d_echosum_temp = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * grid_size* PlusePoint);
cuComplex* h_echosum_temp = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * grid_size* PlusePoint);
cuComplex* d_echo_PRF = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * PlusePoint);
cuComplex* h_echo_PRF = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * PlusePoint);
for (long ii = 0; ii < grid_size; ii++) {
h_echosum_temp[ii] = make_cuComplex(0, 0);
}
cuComplex initSumTemp = make_cuComplex(0, 0);
for (startprfid = 0; startprfid < pluseCount; startprfid = startprfid + echoblockline) { for (startprfid = 0; startprfid < pluseCount; startprfid = startprfid + echoblockline) {
long templine = startprfid + echoblockline < PluseCount ? echoblockline : PluseCount - startprfid; long templine = startprfid + echoblockline < PluseCount ? echoblockline : PluseCount - startprfid;
Eigen::MatrixXd echoMasktemp = echoMaskImg.getData(startprfid,0, templine,echoMaskImg.width,1);
// 创建内存
std::shared_ptr<std::complex<float>> echotemp = this->EchoSimulationData->getEchoArr(startprfid, templine); std::shared_ptr<std::complex<float>> echotemp = this->EchoSimulationData->getEchoArr(startprfid, templine);
for (long tempprfid = 0; tempprfid < templine; tempprfid++) {
for (long freqid = 0; freqid < PlusePoint; freqid++) {
h_PRFEcho[tempprfid * PlusePoint + freqid].x = echotemp.get()[tempprfid * PlusePoint + freqid].real();
h_PRFEcho[tempprfid * PlusePoint + freqid].y = echotemp.get()[tempprfid * PlusePoint + freqid].imag();
}
}
HostToDevice(h_PRFEcho, d_PRFEcho, sizeof(cuComplex) * echoblockline * PlusePoint);
for (long tempprfid = 0; tempprfid < templine; tempprfid++) { for (long tempprfid = 0; tempprfid < templine; tempprfid++) {
{// 计算 {// 计算
@ -736,9 +715,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
float antZaxisX = sateOirbtNodes[prfid].AntZaxisX; float antZaxisX = sateOirbtNodes[prfid].AntZaxisX;
float antZaxisY = sateOirbtNodes[prfid].AntZaxisY; float antZaxisY = sateOirbtNodes[prfid].AntZaxisY;
float antZaxisZ = sateOirbtNodes[prfid].AntZaxisZ;//18 float antZaxisZ = sateOirbtNodes[prfid].AntZaxisZ;//18
#ifdef __PRFDEBUG__
std::cout << "ant Position=[" << antpx << "," << antpy << "," << antpz << "]" << std::endl;
#endif // __PRFDEBUG__
// 计算距离、局地入射角、增益 // 计算距离、局地入射角、增益
CUDARFPC_Caluation_R_Gain( CUDARFPC_Caluation_R_Gain(
antpx, antpy, antpz, // 天线的坐标 antpx, antpy, antpz, // 天线的坐标
@ -755,49 +732,32 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
NearRange, FarRange, NearRange, FarRange,
d_clsSigmaParam, clamapid, d_clsSigmaParam, clamapid,
d_R, // 输出距离 d_R, // 输出距离
d_amp // 输出增益 d_amp // 输出振幅
); );
// 计算某个具体回波
////DeviceToHost(h_echosum_temp, d_echosum_temp, sizeof(cuComplex)* templine* PlusePoint); CUDA_PRF_CalFreqEcho(
//for (long freqid = 0; freqid < freqlist.size(); freqid++) { d_R, d_amp, pixelcount,
// float freqpoint = freqlist[freqid]; d_freqpoints, PlusePoint,
// CUDARFPC_Target_Freq_EchoData(d_R, d_PRFEcho, tempprfid);
// d_localangle,
// d_demcls,
// pixelcount,
// Pt, freqpoint,
// d_clsSigmaParam, clamapid,
// d_echo);
// // 数据求和
// CUDA_MemsetBlock(d_echosum_temp, initSumTemp, grid_size);
// long tempechoid = tempprfid * PlusePoint + freqid;
// CUDA_DemEchoSUM_NoMalloc(d_echo, pixelcount,
// d_echosum_temp + grid_size * freqid, grid_size
// );
//}
//CUDA_SumPRF_Temp(d_echosum_temp, PlusePoint, grid_size, d_echo_PRF); if (prfid % 1000 == 0) {
//DeviceToHost(h_echo_PRF, d_echo_PRF, sizeof(cuComplex) * PlusePoint);
//for (long freqid = 0; freqid < PlusePoint; freqid++) {
// echotemp.get()[tempprfid * PlusePoint + freqid] =
// echotemp.get()[tempprfid * PlusePoint + freqid]
// + std::complex<float>(
// h_echo_PRF[freqid].x,
// h_echo_PRF[freqid].y);
//}
if (prfid % 100 == 0) {
std::cout << "[" << QDateTime::currentDateTime().toString("yyyy-MM-dd hh:mm:ss.zzz").toStdString() << "] dem:\t" << startline << "\t-\t" << startline + newblokline << "\t:\t pluse :\t" << prfid << " / " << pluseCount << std::endl; std::cout << "[" << QDateTime::currentDateTime().toString("yyyy-MM-dd hh:mm:ss.zzz").toStdString() << "] dem:\t" << startline << "\t-\t" << startline + newblokline << "\t:\t pluse :\t" << prfid << " / " << pluseCount << std::endl;
} }
} }
} }
echoMaskImg.saveImage(echoMasktemp, startprfid, 0, 1); DeviceToHost(h_PRFEcho, d_PRFEcho, sizeof(cuComplex) * echoblockline * PlusePoint);
for (long tempprfid = 0; tempprfid < templine; tempprfid++) {
for (long freqid = 0; freqid < PlusePoint; freqid++) {
echotemp.get()[tempprfid * PlusePoint + freqid].real(h_PRFEcho[tempprfid * PlusePoint + freqid].x);
echotemp.get()[tempprfid * PlusePoint + freqid].imag(h_PRFEcho[tempprfid * PlusePoint + freqid].y);
}
}
this->EchoSimulationData->saveEchoArr(echotemp, startprfid, templine); this->EchoSimulationData->saveEchoArr(echotemp, startprfid, templine);
} }
FreeCUDADevice(d_echosum_temp);
} }
@ -812,14 +772,14 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
FreeCUDAHost(h_demsloper_x); FreeCUDADevice(d_demsloper_x); FreeCUDAHost(h_demsloper_x); FreeCUDADevice(d_demsloper_x);
FreeCUDAHost(h_demsloper_y); FreeCUDADevice(d_demsloper_y); FreeCUDAHost(h_demsloper_y); FreeCUDADevice(d_demsloper_y);
FreeCUDAHost(h_demsloper_z); FreeCUDADevice(d_demsloper_z); //6 FreeCUDAHost(h_demsloper_z); FreeCUDADevice(d_demsloper_z); //6
FreeCUDAHost(h_demsloper_angle); FreeCUDADevice(d_demsloper_angle); //7
// 临时变量释放 // 临时变量释放
FreeCUDAHost(h_R); FreeCUDADevice(d_R); FreeCUDAHost(h_R); FreeCUDADevice(d_R);
FreeCUDAHost(h_amp); FreeCUDADevice(d_amp); FreeCUDAHost(h_amp); FreeCUDADevice(d_amp);
FreeCUDAHost(h_demcls); FreeCUDADevice(d_demcls); FreeCUDAHost(h_demcls); FreeCUDADevice(d_demcls);
FreeCUDAHost(freqpoints); FreeCUDAHost(h_freqpoints); FreeCUDADevice(d_freqpoints);
FreeCUDAHost(h_PRFEcho); FreeCUDADevice(d_PRFEcho);
#endif #endif