修改多卡无天线辐射图代码

pull/13/head
陈增辉 2025-03-24 10:36:46 +08:00
parent 96eb60bbec
commit 4cf63eee36
3 changed files with 256 additions and 74 deletions

View File

@ -21,8 +21,8 @@ extern __device__ double GPU_getSigma0dB(CUDASigmaParam param, double theta) {/
__device__ double GPU_getSigma0dB_params( __device__ double GPU_getSigma0dB_params(
const double p1, const double p2, const double p3, const double p4, const double p5, const double p6, const double p1, const double p2, const double p3, const double p4, const double p5, const double p6,
double theta) {//线性值 double theta) {//线性值
return p1 + p2 * exp(-p3 * theta) + p4 * cos(p5 * theta + p6); return p1 + p2 * exp(-p3 * theta) + p4 * cos(p5 * theta + p6);
} }
@ -71,8 +71,8 @@ extern __device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal(
// 计算theta 与 phi // 计算theta 与 phi
double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
double Zn = Zant / Norm; double Zn = Zant / Norm;
double ThetaAnt = ( - 1 > Zn) ? PI : (Zn > 1 ? 0 : acos(Zn));// acosf(Zant / Norm); // theta 与 Z轴的夹角 double ThetaAnt = (-1 > Zn) ? PI : (Zn > 1 ? 0 : acos(Zn));// acosf(Zant / Norm); // theta 与 Z轴的夹角
double PhiAnt = abs(Xant)<PRECISIONTOLERANCE ?0: atanf(Yant / Xant); // -pi/2 ~pi/2 double PhiAnt = abs(Xant) < PRECISIONTOLERANCE ? 0 : atanf(Yant / Xant); // -pi/2 ~pi/2
if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上 if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上
PhiAnt = 0; PhiAnt = 0;
@ -161,9 +161,9 @@ extern __device__ double GPU_BillerInterpAntPattern(double* antpattern,
return GainValue; return GainValue;
} }
} }
/* 核函数 ****************************************************************************************************************************/ /* 核函数 ****************************************************************************************************************************/
// 计算每块 // 计算每块
__global__ void CUDA_Kernel_Computer_R_amp( __global__ void CUDA_Kernel_Computer_R_amp(
@ -173,8 +173,8 @@ __global__ void CUDA_Kernel_Computer_R_amp(
double* antZaxisX, double* antZaxisY, double* antZaxisZ, double* antZaxisX, double* antZaxisY, double* antZaxisZ,
double* antDirectX, double* antDirectY, double* antDirectZ, double* antDirectX, double* antDirectY, double* antDirectZ,
long PRFCount, // 整体的脉冲数, long PRFCount, // 整体的脉冲数,
double* targetX, double* targetY, double* targetZ, long* demCls, double* targetX, double* targetY, double* targetZ, long* demCls,
double* demSlopeX, double* demSlopeY, double* demSlopeZ , double* demSlopeX, double* demSlopeY, double* demSlopeZ,
long startPosId, long pixelcount, long startPosId, long pixelcount,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen, CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
double Pt, double Pt,
@ -189,8 +189,8 @@ __global__ void CUDA_Kernel_Computer_R_amp(
) { ) {
long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码 long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
long prfId = idx / SHAREMEMORY_FLOAT_HALF; long prfId = idx / SHAREMEMORY_FLOAT_HALF;
long posId = idx % SHAREMEMORY_FLOAT_HALF+ startPosId; // 当前线程对应的影像点 long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点
if (prfId < PRFCount && posId < pixelcount) { if (prfId < PRFCount && posId < pixelcount) {
double RstX = antX[prfId] - targetX[posId]; // 计算坐标矢量 double RstX = antX[prfId] - targetX[posId]; // 计算坐标矢量
double RstY = antY[prfId] - targetY[posId]; double RstY = antY[prfId] - targetY[posId];
@ -206,13 +206,13 @@ __global__ void CUDA_Kernel_Computer_R_amp(
double slopeX = demSlopeX[posId]; double slopeX = demSlopeX[posId];
double slopeY = demSlopeY[posId]; double slopeY = demSlopeY[posId];
double slopeZ = demSlopeZ[posId]; double slopeZ = demSlopeZ[posId];
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); // double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
if (abs(slopR - 0) > 1e-3) { if (abs(slopR - 0) > 1e-3) {
double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ; double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
double localangle = acos(dotAB / (RstR * slopR)); double localangle = acos(dotAB / (RstR * slopR));
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2|| isnan(localangle)) { if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) {
d_temp_R[idx] = 0; d_temp_R[idx] = 0;
d_temp_amps[idx] = 0; d_temp_amps[idx] = 0;
return; return;
@ -248,7 +248,7 @@ __global__ void CUDA_Kernel_Computer_R_amp(
//printf("clsid=%d\n", clsid); //printf("clsid=%d\n", clsid);
CUDASigmaParam tempsigma = sigma0Paramslist[clsid]; CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
if (abs(tempsigma.p1) < PRECISIONTOLERANCE && if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
abs(tempsigma.p2) < PRECISIONTOLERANCE && abs(tempsigma.p2) < PRECISIONTOLERANCE &&
abs(tempsigma.p3) < PRECISIONTOLERANCE && abs(tempsigma.p3) < PRECISIONTOLERANCE &&
@ -276,19 +276,19 @@ __global__ void CUDA_Kernel_Computer_R_amp(
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度 ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度
float temp_amp = float(ampGain * Pt * sigma0); float temp_amp = float(ampGain * Pt * sigma0);
float temp_R = float(RstR - refPhaseRange); float temp_R = float(RstR - refPhaseRange);
if (isnan(temp_amp) || isnan(temp_R)|| isinf(temp_amp) || isinf(temp_R)) { if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) {
printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R); printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R);
d_temp_R[idx] = 0; d_temp_R[idx] = 0;
d_temp_amps[idx] = 0; d_temp_amps[idx] = 0;
return; return;
} }
else {} else {}
d_temp_amps[idx] = temp_amp; d_temp_amps[idx] = temp_amp;
d_temp_R[idx] = temp_R; d_temp_R[idx] = temp_R;
return; return;
@ -313,12 +313,12 @@ __global__ void CUDA_Kernel_Computer_R_amp(
__global__ void CUDA_Kernel_Computer_echo( __global__ void CUDA_Kernel_Computer_echo(
float* d_temp_R, float* d_temp_amps, long posNum, float* d_temp_R, float* d_temp_amps, long posNum,
float f0, float dfreq, float f0, float dfreq,
long FreqPoints, // 当前频率的分块 long FreqPoints, // 当前频率的分块
long maxfreqnum, // 最大脉冲值 long maxfreqnum, // 最大脉冲值
float* d_temp_echo_real, float* d_temp_echo_imag, float* d_temp_echo_real, float* d_temp_echo_imag,
long temp_PRF_Count long temp_PRF_Count
) { ) {
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存 __shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF]; __shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
@ -349,7 +349,7 @@ __global__ void CUDA_Kernel_Computer_echo(
if (fId < maxfreqnum && prfId < temp_PRF_Count) { if (fId < maxfreqnum && prfId < temp_PRF_Count) {
long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置 long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq); float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
float temp_real = 0; float temp_real = 0;
@ -368,9 +368,9 @@ __global__ void CUDA_Kernel_Computer_echo(
if (isnan(temp_phi) || isnan(temp_amp) || isnan(temp_real) || isnan(temp_imag) if (isnan(temp_phi) || isnan(temp_amp) || isnan(temp_real) || isnan(temp_imag)
|| isinf(temp_phi) || isinf(temp_amp) || isinf(temp_real) || isinf(temp_imag) || isinf(temp_phi) || isinf(temp_amp) || isinf(temp_real) || isinf(temp_imag)
) { ) {
printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n",temp_amp,temp_phi,temp_real,temp_imag); printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, temp_real, temp_imag);
} }
} }
//printf("echo_ID=%d; ehodata=(%f,%f)\n", echo_ID, temp_real, temp_imag); //printf("echo_ID=%d; ehodata=(%f,%f)\n", echo_ID, temp_real, temp_imag);
//printf("(%f %f %f) ", factorjTemp, s_amp[0], s_R[0]); //printf("(%f %f %f) ", factorjTemp, s_amp[0], s_R[0]);
@ -390,7 +390,7 @@ void CUDA_RFPC_MainProcess(
double* antYaxisX, double* antYaxisY, double* antYaxisZ, double* antYaxisX, double* antYaxisY, double* antYaxisZ,
double* antZaxisX, double* antZaxisY, double* antZaxisZ, double* antZaxisX, double* antZaxisY, double* antZaxisZ,
double* antDirectX, double* antDirectY, double* antDirectZ, double* antDirectX, double* antDirectY, double* antDirectZ,
long PRFCount, long FreqNum, long PRFCount, long FreqNum,
float f0, float dfreq, float f0, float dfreq,
double Pt, double Pt,
double refPhaseRange, double refPhaseRange,
@ -401,7 +401,7 @@ void CUDA_RFPC_MainProcess(
double maxTransAntPatternValue, double maxReceiveAntPatternValue, double maxTransAntPatternValue, double maxReceiveAntPatternValue,
double NearR, double FarR, double NearR, double FarR,
double* targetX, double* targetY, double* targetZ, long* demCls, long TargetNumber, double* targetX, double* targetY, double* targetZ, long* demCls, long TargetNumber,
double* demSlopeX, double* demSlopeY, double* demSlopeZ, double* demSlopeX, double* demSlopeY, double* demSlopeZ,
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen, CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
float* out_echoReal, float* out_echoImag, float* out_echoReal, float* out_echoImag,
float* d_temp_R, float* d_temp_amp float* d_temp_R, float* d_temp_amp
@ -420,9 +420,9 @@ void CUDA_RFPC_MainProcess(
antYaxisX, antYaxisY, antYaxisZ, antYaxisX, antYaxisY, antYaxisZ,
antZaxisX, antZaxisY, antZaxisZ, antZaxisX, antZaxisY, antZaxisZ,
antDirectX, antDirectY, antDirectZ, antDirectX, antDirectY, antDirectZ,
PRFCount, PRFCount,
targetX, targetY, targetZ, demCls, targetX, targetY, targetZ, demCls,
demSlopeX, demSlopeY, demSlopeZ, demSlopeX, demSlopeY, demSlopeZ,
sTi, TargetNumber, sTi, TargetNumber,
sigma0Paramslist, sigmaparamslistlen, sigma0Paramslist, sigmaparamslistlen,
Pt, Pt,
@ -437,23 +437,23 @@ void CUDA_RFPC_MainProcess(
); );
PrintLasterError("CUDA_Kernel_Computer_R_amp"); PrintLasterError("CUDA_Kernel_Computer_R_amp");
cudaBlocknum = (PRFCount * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE; cudaBlocknum = (PRFCount * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDA_Kernel_Computer_echo << <cudaBlocknum, BLOCK_SIZE >> > ( CUDA_Kernel_Computer_echo << <cudaBlocknum, BLOCK_SIZE >> > (
d_temp_R, d_temp_amp, SHAREMEMORY_FLOAT_HALF, d_temp_R, d_temp_amp, SHAREMEMORY_FLOAT_HALF,
f0, dfreq, f0, dfreq,
freqpoints, FreqNum, freqpoints, FreqNum,
out_echoReal, out_echoImag, out_echoReal, out_echoImag,
PRFCount PRFCount
); );
PrintLasterError("CUDA_Kernel_Computer_echo"); PrintLasterError("CUDA_Kernel_Computer_echo");
if ((sTi * 100.0 / TargetNumber ) - process >= 1) { if ((sTi * 100.0 / TargetNumber) - process >= 1) {
process = sTi * 100.0 / TargetNumber; process = sTi * 100.0 / TargetNumber;
PRINT("TargetID [%f]: %d / %d finished\n", sTi*100.0/ TargetNumber,sTi, TargetNumber); PRINT("TargetID [%f]: %d / %d finished\n", sTi * 100.0 / TargetNumber, sTi, TargetNumber);
} }
} }
@ -473,6 +473,150 @@ void CUDA_RFPC_MainProcess(
/* 核函数 ****************************************************************************************************************************/ /* 核函数 ****************************************************************************************************************************/
__global__ void Kernel_Computer_R_amp_NoAntPattern(
SateState* antlist,
long PRFCount,
GoalState* goallist,
long demLen,
long startPosId, long pixelcount,
CUDASigmaParam sigma0Params,
double Pt,
double refPhaseRange,
double NearR, double FarR,
double* d_temp_R, double* d_temp_amps// 计算输出
) {
long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
long prfId = idx / SHAREMEMORY_FLOAT_HALF;
long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点
if (prfId < PRFCount && posId < pixelcount) {
double RstX = antlist[prfId].Px - goallist[posId].Tx; // 计算坐标矢量
double RstY = antlist[prfId].Py - goallist[posId].Ty;
double RstZ = antlist[prfId].Pz - goallist[posId].Tz;
double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离
if (RstR<NearR || RstR>FarR) {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
else {
double slopeX = goallist[posId].TsX;
double slopeY = goallist[posId].TsY;
double slopeZ = goallist[posId].TsZ;
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
if (abs(slopR - 0) > 1e-3) {
double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
double localangle = acos(dotAB / (RstR * slopR));
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) {
d_temp_R[idx] = 0;
d_temp_amps[idx] = 0;
return;
}
else {}
double ampGain = 1;
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度
double sigma = GPU_getSigma0dB(sigma0Params, localangle);
sigma = powf(10.0, sigma / 10.0);
double temp_amp = double(ampGain * Pt * sigma);
double temp_R = double(RstR - refPhaseRange);
bool isNan = !(isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R));
d_temp_amps[idx] = temp_amp * isNan;
d_temp_R[idx] = temp_R * isNan;
return;
}
}
}
}
__global__ void CUDA_Kernel_Computer_echo_NoAntPattern(
double* d_temp_R, double* d_temp_amps, long posNum,
double f0, double dfreq,
long FreqPoints, // 当前频率的分块
long maxfreqnum, // 最大脉冲值
cuComplex* echodata,
long temp_PRF_Count
) {
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
long tid = threadIdx.x;
long bid = blockIdx.x;
long idx = bid * blockDim.x + tid;
long prfId = idx / FreqPoints; // 脉冲ID
long fId = idx % FreqPoints;//频率ID
long psid = 0;
long pixelId = 0;
for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF
psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii;
pixelId = prfId * posNum + psid; //
if (psid < posNum) {
s_R[psid] = d_temp_R[pixelId];
s_amp[psid] = d_temp_amps[pixelId];
}
else {
s_R[psid] = 0;
s_amp[psid] = 0;
}
}
__syncthreads(); // 确定所有待处理数据都已经进入程序中
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
cuComplex echo = make_cuComplex(0, 0);
float temp_phi = 0;
float temp_amp = 0;
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
temp_phi = s_R[dataid] * factorjTemp;
temp_amp = s_amp[dataid];
echo.x += (temp_amp * cosf(temp_phi));
echo.y += (temp_amp * sinf(temp_phi));
//if (dataid > 5000) {
// printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]);
//}
if (isnan(temp_phi) || isnan(temp_amp) || isnan(echo.x) || isnan(echo.y)
|| isinf(temp_phi) || isinf(temp_amp) || isinf(echo.x) || isinf(echo.y)
) {
printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, echo.x, echo.y);
}
}
echodata[echo_ID] = cuCaddf(echodata[echo_ID], echo);
}
}
__global__ void CUDA_Kernel_RFPC( __global__ void CUDA_Kernel_RFPC(
SateState* antlist, SateState* antlist,
long PRFCount, long Freqcount, // 整体的脉冲数, long PRFCount, long Freqcount, // 整体的脉冲数,
@ -483,12 +627,12 @@ __global__ void CUDA_Kernel_RFPC(
double NearR, double FarR, double NearR, double FarR,
CUDASigmaParam clsSigma0, CUDASigmaParam clsSigma0,
cuComplex* echodata cuComplex* echodata
) )
{ {
__shared__ GoalState Ts[SHAREMEMORY_DEM_STEP]; __shared__ GoalState Ts[SHAREMEMORY_DEM_STEP];
size_t threadid = threadIdx.x; size_t threadid = threadIdx.x;
size_t idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码 size_t idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
size_t prfid = floorf(idx / Freqcount); size_t prfid = floorf(idx / Freqcount);
size_t freqid = idx % Freqcount; size_t freqid = idx % Freqcount;
@ -512,37 +656,37 @@ __global__ void CUDA_Kernel_RFPC(
for (long tid = 0;tid < demLen;tid++) { for (long tid = 0; tid < demLen; tid++) {
GoalState p = goallist[tid]; GoalState p = goallist[tid];
Tx = p.Tx; Tx = p.Tx;
Ty = p.Ty; Ty = p.Ty;
Tz = p.Tz; Tz = p.Tz;
Tx = antPos.Px - Tx; // T->P Tx = antPos.Px - Tx; // T->P
Ty = antPos.Py - Ty; Ty = antPos.Py - Ty;
Tz = antPos.Pz - Tz; Tz = antPos.Pz - Tz;
R = sqrt(Tx * Tx + Ty * Ty + Tz * Tz); R = sqrt(Tx * Tx + Ty * Ty + Tz * Tz);
bool isNearFar = (R < NearR || R > FarR) && ((abs(p.TsX) > 1000) || (abs(p.TsY) > 1000) || (abs(p.TsZ) > 1000)); bool isNearFar = (R < NearR || R > FarR) && ((abs(p.TsX) > 1000) || (abs(p.TsY) > 1000) || (abs(p.TsZ) > 1000));
incAngle = sqrt(p.TsX * p.TsX + p.TsY * p.TsY + p.TsZ * p.TsZ); incAngle = sqrt(p.TsX * p.TsX + p.TsY * p.TsY + p.TsZ * p.TsZ);
incAngle = acos((Tx * p.TsX + Ty * p.TsY + Tz * p.TsZ) / (R * incAngle)); incAngle = acos((Tx * p.TsX + Ty * p.TsY + Tz * p.TsZ) / (R * incAngle));
incAngle = GPU_getSigma0dB_params(clsSigma0.p1, clsSigma0.p2, clsSigma0.p3, clsSigma0.p4, clsSigma0.p5, clsSigma0.p6, incAngle); // sigma incAngle = GPU_getSigma0dB_params(clsSigma0.p1, clsSigma0.p2, clsSigma0.p3, clsSigma0.p4, clsSigma0.p5, clsSigma0.p6, incAngle); // sigma
incAngle = pow(10.0, incAngle / 10.0); // amp incAngle = pow(10.0, incAngle / 10.0); // amp
incAngle = incAngle / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); // incAngle = incAngle / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); //
R = (R - refPhaseRange); R = (R - refPhaseRange);
R = factorjTemp * R; R = factorjTemp * R;
echo_real = incAngle * cos(R) * isNearFar; echo_real = incAngle * cos(R) * isNearFar;
echo_imag = incAngle * sin(R) * isNearFar; echo_imag = incAngle * sin(R) * isNearFar;
echo.x = echo.x + echo_real; echo.x = echo.x + echo_real;
echo.y = echo.y + echo_imag; echo.y = echo.y + echo_imag;
if (idx == 0 && tid % (10 * SHAREMEMORY_DEM_STEP) == 0) { if (idx == 0 && tid % (10 * SHAREMEMORY_DEM_STEP) == 0) {
printf("Idx:%d , TsID: %d, TSCOUNT: %d \n", idx, tid, demLen); printf("Idx:%d , TsID: %d, TSCOUNT: %d \n", idx, tid, demLen);
} }
} }
echodata[idx] = cuCaddf(echodata[idx], echo); echodata[idx] = cuCaddf(echodata[idx], echo);
@ -552,24 +696,62 @@ __global__ void CUDA_Kernel_RFPC(
/** 分块处理 ****************************************************************************************************************/ /** 分块处理 ****************************************************************************************************************/
extern "C" void ProcessRFPCTask(RFPCTask& task) extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
{ {
size_t pixelcount = task.prfNum * task.freqNum; size_t pixelcount = task.prfNum * task.freqNum;
size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE; size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
printf("start %d,%d ,%d,%d\n", pixelcount, task.targetnum, grid_size, BLOCK_SIZE); printf("start %d,%d ,%d,%d\n", pixelcount, task.targetnum, grid_size, BLOCK_SIZE);
CUDA_Kernel_RFPC << <grid_size, BLOCK_SIZE >> > (
task.antlist, double* d_R = (double*)mallocCUDADevice(task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(double), devid);
task.prfNum,task.freqNum, double* d_amps = (double*)mallocCUDADevice(task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(double), devid);
task.goallist,
task.targetnum,
task.startFreq,task.stepFreq, long BLOCK_FREQNUM = NextBlockPad(task.freqNum, BLOCK_SIZE); // 256*freqBlockID
task.Rref,task.Rnear,task.Rfar, long cudaBlocknum = 0;
task.sigma0_cls, long freqpoints = BLOCK_FREQNUM;
task.d_echoData printf("freqpoints:%d\n", freqpoints);
long process = 0;
for (long sTi = 0; sTi < task.targetnum; sTi = sTi + SHAREMEMORY_FLOAT_HALF) {
cudaBlocknum = (task.prfNum * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE;
Kernel_Computer_R_amp_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> >(
task.antlist,
task.prfNum,
task.goallist,
task.targetnum,
sTi, task.targetnum,
task.sigma0_cls,
1,
task.Rref,
task.Rnear, task.Rfar,
d_R, d_amps// 计算输出
); );
PrintLasterError("ProcessRFPCTask"); PrintLasterError("CUDA_Kernel_Computer_R_amp");
cudaBlocknum = (task.prfNum * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDA_Kernel_Computer_echo_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> > (
d_R, d_amps, SHAREMEMORY_FLOAT_HALF,
task.startFreq, task.stepFreq,
freqpoints, task.freqNum,
task.d_echoData,
task.prfNum
);
PrintLasterError("CUDA_Kernel_Computer_echo");
if ((sTi * 100.0 / task.targetnum) - process >= 1) {
process = sTi * 100.0 / task.targetnum;
PRINT("TargetID [%f]: %d / %d finished\n", sTi * 100.0 / task.targetnum, sTi, task.targetnum);
}
}
cudaDeviceSynchronize(); cudaDeviceSynchronize();
printf("start %d \n", task.targetnum);
FreeCUDADevice(d_R);
FreeCUDADevice(d_amps);
} }

View File

@ -144,7 +144,7 @@ extern "C" void CUDA_RFPC_MainProcess(
extern "C" double* hostSigmaData_toDevice(int devid); extern "C" double* hostSigmaData_toDevice(int devid);
extern "C" void ProcessRFPCTask(RFPCTask& task); extern "C" void ProcessRFPCTask(RFPCTask& task,long devid);

View File

@ -1103,7 +1103,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId); task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount); HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
task.sigma0_cls = clsCUDASigmaParamsDict[clsid]; task.sigma0_cls = clsCUDASigmaParamsDict[clsid];
ProcessRFPCTask(task); ProcessRFPCTask(task,devId);
FreeCUDADevice(task.goallist); FreeCUDADevice(task.goallist);
} }