修复bug
parent
6615e35332
commit
e3a6585adb
|
@ -604,7 +604,16 @@ __global__ void CUDACkernel_Complex_SUM_reduce_dynamicshared(cuComplex* d_x, cu
|
|||
}
|
||||
|
||||
|
||||
|
||||
__global__ void CUDAKernel_SumPRF_Temp(cuComplex* d_dem_echo, long plusepoints, long grid_size, cuComplex* d_echo_PRF) {
|
||||
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < plusepoints) {
|
||||
cuComplex echo_PRF = make_cuComplex(0, 0);
|
||||
for (long i = 0; i < grid_size; i++) {
|
||||
echo_PRF = cuCaddf(echo_PRF, d_dem_echo[idx * grid_size + i]);
|
||||
}
|
||||
d_echo_PRF[idx] = echo_PRF;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -863,15 +872,11 @@ extern "C" void CUDARFPC_Target_Freq_EchoData(float* InR,
|
|||
|
||||
|
||||
extern "C" void CUDA_DemEchoSUM_NoMalloc(cuComplex* d_dem_echo,long N,
|
||||
cuComplex* d_echosum_temp, int grid_size,
|
||||
cuComplex* d_echo,long ehcoid
|
||||
) {
|
||||
cuComplex* d_echosum_temp, int grid_size) {
|
||||
|
||||
long NUM_REPEATS = 100;
|
||||
const int smem = sizeof(float) * BLOCK_SIZE;
|
||||
for (long i = 0; i < grid_size; i++) { // ³õʼ»¯
|
||||
d_echosum_temp[i] = make_cuComplex(0,0);
|
||||
}
|
||||
|
||||
|
||||
CUDACkernel_Complex_SUM_reduce_dynamicshared << <grid_size, BLOCK_SIZE, smem >> > (d_dem_echo, d_echosum_temp,N); //¹éÔ¼ÇóºÍ
|
||||
#ifdef __CUDADEBUG__
|
||||
cudaError_t err = cudaGetLastError();
|
||||
|
@ -881,13 +886,25 @@ extern "C" void CUDA_DemEchoSUM_NoMalloc(cuComplex* d_dem_echo,long N,
|
|||
}
|
||||
#endif // __CUDADEBUG__
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
|
||||
for (int n = 0; n < grid_size; ++n)
|
||||
{
|
||||
d_echo[ehcoid] =cuCaddf(d_echo[ehcoid],d_echosum_temp[n]);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void CUDA_SumPRF_Temp(cuComplex* d_dem_echo, long plusepoints, long grid_size, cuComplex* d_echo_PRF)
|
||||
{
|
||||
int blockSize = 256; // 每个块的线程数
|
||||
int numBlocks = (plusepoints + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
|
||||
// 调用 CUDA 核函数
|
||||
CUDAKernel_SumPRF_Temp << <numBlocks, blockSize >> > (
|
||||
d_dem_echo, plusepoints, grid_size, d_echo_PRF
|
||||
);
|
||||
#ifdef __CUDADEBUG__
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (err != cudaSuccess) {
|
||||
printf("CUDARFPC_Target_Freq_EchoData CUDA Error: %s\n", cudaGetErrorString(err));
|
||||
// Possibly: exit(-1) if program cannot continue....
|
||||
}
|
||||
#endif // __CUDADEBUG__
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -68,15 +68,12 @@ extern "C" void CUDARFPC_Caluation_R_Gain(
|
|||
float antX,float antY,float antZ, // 天线的坐标
|
||||
float* targetX,float* targetY, float* targetZ, long TargetPixelNumber, // 地面坐标
|
||||
float* demSlopeX, float* demSlopeY, float* demSlopeZ, // 地表坡度矢量
|
||||
|
||||
float antXaxisX, float antXaxisY, float antXaxisZ, // 天线坐标系的X轴
|
||||
float antYaxisX, float antYaxisY, float antYaxisZ,// 天线坐标系的Y轴
|
||||
float antZaxisX, float antZaxisY, float antZaxisZ,// 天线坐标系的Z轴
|
||||
float antDirectX, float antDirectY, float antDirectZ,// 天线的指向
|
||||
|
||||
float* TransAntpattern, float Transtarttheta, float Transstartphi, float Transdtheta, float Transdphi, int Transthetapoints, int Transphipoints, // 发射天线方向图
|
||||
float* ReceiveAntpattern, float Receivestarttheta, float Receivestartphi, float Receivedtheta, float Receivedphi, int Receivethetapoints, int Receivephipoints,//接收天线方向图
|
||||
|
||||
float NearR,float FarR, // 距离范围
|
||||
|
||||
float* outR, // 输出距离
|
||||
|
@ -99,13 +96,13 @@ extern "C" void CUDARFPC_Target_Freq_EchoData(
|
|||
|
||||
|
||||
extern "C" void CUDA_DemEchoSUM_NoMalloc(cuComplex* d_dem_echo, long N,
|
||||
cuComplex* d_echosum_temp, int grid_size,
|
||||
cuComplex* d_echo, long ehcoid
|
||||
cuComplex* d_echosum_temp, int grid_size
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
extern "C" void CUDA_SumPRF_Temp(
|
||||
cuComplex* d_dem_echo, long plusepoints, long grid_size,
|
||||
cuComplex* d_echo_PRF
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -45,7 +45,12 @@ __device__ float GPU_CosAngle_VectorA_VectorB(CUDAVector A, CUDAVector B) {
|
|||
|
||||
|
||||
|
||||
|
||||
__global__ void CUDAKernel_MemsetBlock(cuComplex* data, cuComplex init0, long len) {
|
||||
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < len) {
|
||||
data[idx] = init0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -160,18 +165,21 @@ __global__ void CUDA_GridPoint_Linear_Interp1(float* v, float* q, float* qv, lon
|
|||
|
||||
|
||||
|
||||
extern "C" void CUDA_MemsetBlock(cuComplex* data, cuComplex init0, long len) {
|
||||
int blockSize = 256; // 每个块的线程数
|
||||
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
|
||||
// 调用 CUDA 核函数
|
||||
CUDAKernel_MemsetBlock << <numBlocks, blockSize >> > (data, init0, len);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef __CUDADEBUG__
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (err != cudaSuccess) {
|
||||
printf("CUDAmake_VectorA_B CUDA Error: %s\n", cudaGetErrorString(err));
|
||||
// Possibly: exit(-1) if program cannot continue....
|
||||
}
|
||||
#endif // __CUDADEBUG__
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
//错误提示
|
||||
|
@ -303,7 +311,6 @@ extern "C" void CUDABdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float
|
|||
}
|
||||
|
||||
extern "C" void CUDAmake_VectorA_B(float sX, float sY, float sZ, float* tX, float* tY, float* tZ, float* RstX, float* RstY, float* RstZ, long len) {
|
||||
// 设置 CUDA 核函数的网格和块的尺寸
|
||||
int blockSize = 256; // 每个块的线程数
|
||||
int numBlocks = (len + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
|
||||
// 调用 CUDA 核函数
|
||||
|
|
|
@ -63,7 +63,7 @@ extern "C" void FreeCUDADevice(void* ptr);
|
|||
extern "C" void HostToDevice(void* hostptr, void* deviceptr, long memsize);//GPU 内存数据转移 设备 -> GPU
|
||||
extern "C" void DeviceToHost(void* hostptr, void* deviceptr, long memsize);//GPU 内存数据转移 GPU -> 设备
|
||||
|
||||
|
||||
extern "C" void CUDA_MemsetBlock(cuComplex* data, cuComplex init0, long len);
|
||||
// 矢量基础运算函数
|
||||
extern "C" void CUDAdistanceAB(float* Ax, float* Ay, float* Az, float* Bx, float* By, float* Bz, float* R, long member);
|
||||
extern "C" void CUDABdistanceAs(float* Ax, float* Ay, float* Az, float Bx, float By, float Bz, float* R, long member);
|
||||
|
@ -76,7 +76,5 @@ extern "C" void CUDAcosAngle_VA_AB(float* Ax, float* Ay, float* Az, float* Bx, f
|
|||
extern "C" void CUDAGridPointLinearInterp1(float* v, float* q, float* qv,long xlen, long qlen);
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -476,8 +476,6 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
|
|||
}
|
||||
HostToDevice(h_RantPattern, d_RantPattern, sizeof(float) * Rthetanum * Rphinum);
|
||||
|
||||
|
||||
|
||||
//处理地表覆盖
|
||||
QMap<long, long> clamap;
|
||||
long clamapid = 0;
|
||||
|
@ -697,30 +695,33 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
|
|||
testOutClsArr( "h_demcls.bin" , h_demcls, newblokline, tempDemCols);
|
||||
#endif // __PRFDEBUG__
|
||||
|
||||
|
||||
|
||||
long pixelcount = newblokline * tempDemCols;
|
||||
|
||||
long echoblockline = Memory1MB / 8 / 2 / PlusePoint*2;
|
||||
long echoblockline = Memory1GB / 8 / 2 / PlusePoint*2;
|
||||
|
||||
long startprfid = 0;
|
||||
int grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
|
||||
if ((sizeof(cuComplex) * grid_size * PlusePoint/4+1) > Memory1GB ) {
|
||||
grid_size =Memory1GB / sizeof(cuComplex) / PlusePoint*4;
|
||||
}
|
||||
|
||||
cuComplex* d_echosum_temp = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * grid_size);
|
||||
cuComplex* d_echosum_temp = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * grid_size* PlusePoint);
|
||||
cuComplex* h_echosum_temp = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * grid_size* PlusePoint);
|
||||
|
||||
cuComplex* d_echo_PRF = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * PlusePoint);
|
||||
cuComplex* h_echo_PRF = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * PlusePoint);
|
||||
|
||||
for (long ii = 0; ii < grid_size; ii++) {
|
||||
h_echosum_temp[ii] = make_cuComplex(0, 0);
|
||||
}
|
||||
cuComplex initSumTemp = make_cuComplex(0, 0);
|
||||
|
||||
for (startprfid = 0; startprfid < pluseCount; startprfid = startprfid + echoblockline) {
|
||||
long templine = startprfid + echoblockline < PluseCount ? echoblockline : PluseCount - startprfid;
|
||||
Eigen::MatrixXd echoMasktemp = echoMaskImg.getData(startprfid,0, templine,echoMaskImg.width,1);
|
||||
// 创建内存
|
||||
cuComplex* h_echoData_tempblock = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * templine * PlusePoint);
|
||||
cuComplex* d_echoData_tempblock = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * templine * PlusePoint);
|
||||
|
||||
for (long tempprfid = 0; tempprfid < templine; tempprfid++) {
|
||||
for (long j = 0; j < PlusePoint; j++) {
|
||||
h_echoData_tempblock[tempprfid * PlusePoint + j] = make_cuComplex(0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
HostToDevice(h_echoData_tempblock, d_echoData_tempblock, sizeof(cuComplex) * templine * PlusePoint); // »Ø²¨¸´ÖÆ
|
||||
std::shared_ptr<std::complex<float>> echotemp = this->EchoSimulationData->getEchoArr(startprfid, templine);
|
||||
|
||||
for (long tempprfid = 0; tempprfid < templine; tempprfid++) {
|
||||
{// 计算
|
||||
|
@ -767,6 +768,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
|
|||
);
|
||||
// 计算某个具体回波
|
||||
|
||||
//DeviceToHost(h_echosum_temp, d_echosum_temp, sizeof(cuComplex)* templine* PlusePoint);
|
||||
for (long freqid = 0; freqid < freqlist.size(); freqid++) {
|
||||
float freqpoint = freqlist[freqid];
|
||||
CUDARFPC_Target_Freq_EchoData(d_R,
|
||||
|
@ -778,39 +780,28 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU( )
|
|||
d_clsSigmaParam, clamapid,
|
||||
d_echo);
|
||||
// 数据求和
|
||||
CUDA_MemsetBlock(d_echosum_temp, initSumTemp, grid_size);
|
||||
long tempechoid = tempprfid * PlusePoint + freqid;
|
||||
CUDA_DemEchoSUM_NoMalloc(d_echo, pixelcount,
|
||||
d_echosum_temp, grid_size,
|
||||
d_echoData_tempblock, tempechoid
|
||||
d_echosum_temp + grid_size * freqid, grid_size
|
||||
);
|
||||
}
|
||||
if (prfid % 1000 == 0) {
|
||||
|
||||
CUDA_SumPRF_Temp(d_echosum_temp, PlusePoint, grid_size, d_echo_PRF);
|
||||
DeviceToHost(h_echo_PRF, d_echo_PRF, sizeof(cuComplex) * PlusePoint);
|
||||
for (long freqid = 0; freqid < PlusePoint; freqid++) {
|
||||
echotemp.get()[tempprfid * PlusePoint + freqid] = echotemp.get()[tempprfid * PlusePoint + freqid]
|
||||
+ std::complex<float>(
|
||||
h_echo_PRF[freqid].x,
|
||||
h_echo_PRF[freqid].y);
|
||||
}
|
||||
if (prfid % 100 == 0) {
|
||||
std::cout << "[" << QDateTime::currentDateTime().toString("yyyy-MM-dd hh:mm:ss.zzz").toStdString() << "] dem:\t" << startline << "\t-\t" << startline + newblokline << "\t:\t pluse :\t" << prfid << " / " << pluseCount << std::endl;
|
||||
}
|
||||
#ifdef __PRFDEBUG__
|
||||
|
||||
|
||||
//this->EchoSimulationData->saveEchoArr(echotemp, startprfid, templine);
|
||||
|
||||
//exit(0);
|
||||
#endif // __PRFDEBUG__
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
DeviceToHost(h_echoData_tempblock, d_echoData_tempblock, sizeof(cuComplex) * templine * PlusePoint); // »Ø²¨¸´ÖÆ
|
||||
std::shared_ptr<std::complex<float>> echotemp = this->EchoSimulationData->getEchoArr(startprfid, templine);
|
||||
for (long tempprfid = 0; tempprfid < templine; tempprfid++) {
|
||||
for (long j = 0; j < PlusePoint; j++) {
|
||||
echotemp.get()[tempprfid * PlusePoint + j] = echotemp.get()[tempprfid * PlusePoint + j]
|
||||
+ std::complex<float>(h_echoData_tempblock[tempprfid * PlusePoint + j].x,
|
||||
h_echoData_tempblock[tempprfid * PlusePoint + j].y);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
echoMaskImg.saveImage(echoMasktemp, startprfid, 0, 1);
|
||||
this->EchoSimulationData->saveEchoArr(echotemp, startprfid, templine);
|
||||
}
|
||||
|
|
|
@ -207,7 +207,7 @@ QVector<double> AbstractSARSatelliteModel::getFreqList()
|
|||
double farR = this->getFarRange();
|
||||
// 计算分辨率
|
||||
double Resolution = LIGHTSPEED / 2.0 / bandwidth; // 计算分辨率
|
||||
double freqpoints = (farR - nearR) / Resolution + 1;
|
||||
long freqpoints = (farR - nearR) / Resolution + 1;
|
||||
|
||||
double minFreq = centerFreq - bandwidth / 2.0;// 最小频率
|
||||
double maxFreq = minFreq + bandwidth;
|
||||
|
@ -220,7 +220,6 @@ QVector<double> AbstractSARSatelliteModel::getFreqList()
|
|||
freqlist[i] = minFreq + i * deltaFreq;
|
||||
}
|
||||
return freqlist;
|
||||
|
||||
}
|
||||
|
||||
POLARTYPEENUM AbstractSARSatelliteModel::getPolarType()
|
||||
|
|
Loading…
Reference in New Issue