修改多卡无天线辐射图代码
parent
96eb60bbec
commit
4cf63eee36
|
@ -21,8 +21,8 @@ extern __device__ double GPU_getSigma0dB(CUDASigmaParam param, double theta) {/
|
|||
|
||||
|
||||
__device__ double GPU_getSigma0dB_params(
|
||||
const double p1, const double p2, const double p3, const double p4, const double p5, const double p6,
|
||||
double theta) {//线性值
|
||||
const double p1, const double p2, const double p3, const double p4, const double p5, const double p6,
|
||||
double theta) {//线性值
|
||||
return p1 + p2 * exp(-p3 * theta) + p4 * cos(p5 * theta + p6);
|
||||
}
|
||||
|
||||
|
@ -71,8 +71,8 @@ extern __device__ CUDAVectorEllipsoidal GPU_SatelliteAntDirectNormal(
|
|||
// 计算theta 与 phi
|
||||
double Norm = sqrtf(Xant * Xant + Yant * Yant + Zant * Zant); // 计算 pho
|
||||
double Zn = Zant / Norm;
|
||||
double ThetaAnt = ( - 1 > Zn) ? PI : (Zn > 1 ? 0 : acos(Zn));// acosf(Zant / Norm); // theta 与 Z轴的夹角
|
||||
double PhiAnt = abs(Xant)<PRECISIONTOLERANCE ?0: atanf(Yant / Xant); // -pi/2 ~pi/2
|
||||
double ThetaAnt = (-1 > Zn) ? PI : (Zn > 1 ? 0 : acos(Zn));// acosf(Zant / Norm); // theta 与 Z轴的夹角
|
||||
double PhiAnt = abs(Xant) < PRECISIONTOLERANCE ? 0 : atanf(Yant / Xant); // -pi/2 ~pi/2
|
||||
|
||||
if (abs(Yant) < PRECISIONTOLERANCE) { // X轴上
|
||||
PhiAnt = 0;
|
||||
|
@ -161,9 +161,9 @@ extern __device__ double GPU_BillerInterpAntPattern(double* antpattern,
|
|||
return GainValue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* 核函数 ****************************************************************************************************************************/
|
||||
// 计算每块
|
||||
__global__ void CUDA_Kernel_Computer_R_amp(
|
||||
|
@ -173,8 +173,8 @@ __global__ void CUDA_Kernel_Computer_R_amp(
|
|||
double* antZaxisX, double* antZaxisY, double* antZaxisZ,
|
||||
double* antDirectX, double* antDirectY, double* antDirectZ,
|
||||
long PRFCount, // 整体的脉冲数,
|
||||
double* targetX, double* targetY, double* targetZ, long* demCls,
|
||||
double* demSlopeX, double* demSlopeY, double* demSlopeZ ,
|
||||
double* targetX, double* targetY, double* targetZ, long* demCls,
|
||||
double* demSlopeX, double* demSlopeY, double* demSlopeZ,
|
||||
long startPosId, long pixelcount,
|
||||
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
|
||||
double Pt,
|
||||
|
@ -189,8 +189,8 @@ __global__ void CUDA_Kernel_Computer_R_amp(
|
|||
) {
|
||||
long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
|
||||
long prfId = idx / SHAREMEMORY_FLOAT_HALF;
|
||||
long posId = idx % SHAREMEMORY_FLOAT_HALF+ startPosId; // 当前线程对应的影像点
|
||||
|
||||
long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点
|
||||
|
||||
if (prfId < PRFCount && posId < pixelcount) {
|
||||
double RstX = antX[prfId] - targetX[posId]; // 计算坐标矢量
|
||||
double RstY = antY[prfId] - targetY[posId];
|
||||
|
@ -206,13 +206,13 @@ __global__ void CUDA_Kernel_Computer_R_amp(
|
|||
double slopeX = demSlopeX[posId];
|
||||
double slopeY = demSlopeY[posId];
|
||||
double slopeZ = demSlopeZ[posId];
|
||||
|
||||
|
||||
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
|
||||
if (abs(slopR - 0) > 1e-3) {
|
||||
double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
|
||||
double localangle = acos(dotAB / (RstR * slopR));
|
||||
double localangle = acos(dotAB / (RstR * slopR));
|
||||
|
||||
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2|| isnan(localangle)) {
|
||||
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) {
|
||||
d_temp_R[idx] = 0;
|
||||
d_temp_amps[idx] = 0;
|
||||
return;
|
||||
|
@ -248,7 +248,7 @@ __global__ void CUDA_Kernel_Computer_R_amp(
|
|||
//printf("clsid=%d\n", clsid);
|
||||
CUDASigmaParam tempsigma = sigma0Paramslist[clsid];
|
||||
|
||||
|
||||
|
||||
if (abs(tempsigma.p1) < PRECISIONTOLERANCE &&
|
||||
abs(tempsigma.p2) < PRECISIONTOLERANCE &&
|
||||
abs(tempsigma.p3) < PRECISIONTOLERANCE &&
|
||||
|
@ -276,19 +276,19 @@ __global__ void CUDA_Kernel_Computer_R_amp(
|
|||
|
||||
|
||||
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度
|
||||
|
||||
|
||||
float temp_amp = float(ampGain * Pt * sigma0);
|
||||
float temp_R = float(RstR - refPhaseRange);
|
||||
|
||||
if (isnan(temp_amp) || isnan(temp_R)|| isinf(temp_amp) || isinf(temp_R)) {
|
||||
if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) {
|
||||
printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R);
|
||||
d_temp_R[idx] = 0;
|
||||
d_temp_amps[idx] = 0;
|
||||
return;
|
||||
}
|
||||
else {}
|
||||
|
||||
|
||||
|
||||
|
||||
d_temp_amps[idx] = temp_amp;
|
||||
d_temp_R[idx] = temp_R;
|
||||
return;
|
||||
|
@ -313,12 +313,12 @@ __global__ void CUDA_Kernel_Computer_R_amp(
|
|||
|
||||
__global__ void CUDA_Kernel_Computer_echo(
|
||||
float* d_temp_R, float* d_temp_amps, long posNum,
|
||||
float f0, float dfreq,
|
||||
float f0, float dfreq,
|
||||
long FreqPoints, // 当前频率的分块
|
||||
long maxfreqnum, // 最大脉冲值
|
||||
float* d_temp_echo_real, float* d_temp_echo_imag,
|
||||
long temp_PRF_Count
|
||||
) {
|
||||
) {
|
||||
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存
|
||||
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
|
||||
|
||||
|
@ -349,7 +349,7 @@ __global__ void CUDA_Kernel_Computer_echo(
|
|||
|
||||
|
||||
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
|
||||
|
||||
|
||||
long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置
|
||||
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
|
||||
float temp_real = 0;
|
||||
|
@ -368,9 +368,9 @@ __global__ void CUDA_Kernel_Computer_echo(
|
|||
if (isnan(temp_phi) || isnan(temp_amp) || isnan(temp_real) || isnan(temp_imag)
|
||||
|| isinf(temp_phi) || isinf(temp_amp) || isinf(temp_real) || isinf(temp_imag)
|
||||
) {
|
||||
printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n",temp_amp,temp_phi,temp_real,temp_imag);
|
||||
printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, temp_real, temp_imag);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
//printf("echo_ID=%d; ehodata=(%f,%f)\n", echo_ID, temp_real, temp_imag);
|
||||
//printf("(%f %f %f) ", factorjTemp, s_amp[0], s_R[0]);
|
||||
|
@ -390,7 +390,7 @@ void CUDA_RFPC_MainProcess(
|
|||
double* antYaxisX, double* antYaxisY, double* antYaxisZ,
|
||||
double* antZaxisX, double* antZaxisY, double* antZaxisZ,
|
||||
double* antDirectX, double* antDirectY, double* antDirectZ,
|
||||
long PRFCount, long FreqNum,
|
||||
long PRFCount, long FreqNum,
|
||||
float f0, float dfreq,
|
||||
double Pt,
|
||||
double refPhaseRange,
|
||||
|
@ -401,7 +401,7 @@ void CUDA_RFPC_MainProcess(
|
|||
double maxTransAntPatternValue, double maxReceiveAntPatternValue,
|
||||
double NearR, double FarR,
|
||||
double* targetX, double* targetY, double* targetZ, long* demCls, long TargetNumber,
|
||||
double* demSlopeX, double* demSlopeY, double* demSlopeZ,
|
||||
double* demSlopeX, double* demSlopeY, double* demSlopeZ,
|
||||
CUDASigmaParam* sigma0Paramslist, long sigmaparamslistlen,
|
||||
float* out_echoReal, float* out_echoImag,
|
||||
float* d_temp_R, float* d_temp_amp
|
||||
|
@ -420,9 +420,9 @@ void CUDA_RFPC_MainProcess(
|
|||
antYaxisX, antYaxisY, antYaxisZ,
|
||||
antZaxisX, antZaxisY, antZaxisZ,
|
||||
antDirectX, antDirectY, antDirectZ,
|
||||
PRFCount,
|
||||
targetX, targetY, targetZ, demCls,
|
||||
demSlopeX, demSlopeY, demSlopeZ,
|
||||
PRFCount,
|
||||
targetX, targetY, targetZ, demCls,
|
||||
demSlopeX, demSlopeY, demSlopeZ,
|
||||
sTi, TargetNumber,
|
||||
sigma0Paramslist, sigmaparamslistlen,
|
||||
Pt,
|
||||
|
@ -437,23 +437,23 @@ void CUDA_RFPC_MainProcess(
|
|||
);
|
||||
|
||||
PrintLasterError("CUDA_Kernel_Computer_R_amp");
|
||||
|
||||
|
||||
|
||||
|
||||
cudaBlocknum = (PRFCount * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
CUDA_Kernel_Computer_echo << <cudaBlocknum, BLOCK_SIZE >> > (
|
||||
d_temp_R, d_temp_amp, SHAREMEMORY_FLOAT_HALF,
|
||||
f0, dfreq,
|
||||
f0, dfreq,
|
||||
freqpoints, FreqNum,
|
||||
out_echoReal, out_echoImag,
|
||||
PRFCount
|
||||
);
|
||||
PrintLasterError("CUDA_Kernel_Computer_echo");
|
||||
|
||||
if ((sTi * 100.0 / TargetNumber ) - process >= 1) {
|
||||
if ((sTi * 100.0 / TargetNumber) - process >= 1) {
|
||||
process = sTi * 100.0 / TargetNumber;
|
||||
PRINT("TargetID [%f]: %d / %d finished\n", sTi*100.0/ TargetNumber,sTi, TargetNumber);
|
||||
PRINT("TargetID [%f]: %d / %d finished\n", sTi * 100.0 / TargetNumber, sTi, TargetNumber);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
@ -473,6 +473,150 @@ void CUDA_RFPC_MainProcess(
|
|||
|
||||
|
||||
/* 核函数 ****************************************************************************************************************************/
|
||||
|
||||
__global__ void Kernel_Computer_R_amp_NoAntPattern(
|
||||
SateState* antlist,
|
||||
long PRFCount,
|
||||
GoalState* goallist,
|
||||
long demLen,
|
||||
long startPosId, long pixelcount,
|
||||
CUDASigmaParam sigma0Params,
|
||||
double Pt,
|
||||
double refPhaseRange,
|
||||
double NearR, double FarR,
|
||||
double* d_temp_R, double* d_temp_amps// 计算输出
|
||||
) {
|
||||
long idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
|
||||
long prfId = idx / SHAREMEMORY_FLOAT_HALF;
|
||||
long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点
|
||||
|
||||
if (prfId < PRFCount && posId < pixelcount) {
|
||||
double RstX = antlist[prfId].Px - goallist[posId].Tx; // 计算坐标矢量
|
||||
double RstY = antlist[prfId].Py - goallist[posId].Ty;
|
||||
double RstZ = antlist[prfId].Pz - goallist[posId].Tz;
|
||||
|
||||
double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离
|
||||
if (RstR<NearR || RstR>FarR) {
|
||||
d_temp_R[idx] = 0;
|
||||
d_temp_amps[idx] = 0;
|
||||
return;
|
||||
}
|
||||
else {
|
||||
double slopeX = goallist[posId].TsX;
|
||||
double slopeY = goallist[posId].TsY;
|
||||
double slopeZ = goallist[posId].TsZ;
|
||||
|
||||
double slopR = sqrtf(slopeX * slopeX + slopeY * slopeY + slopeZ * slopeZ); //
|
||||
if (abs(slopR - 0) > 1e-3) {
|
||||
double dotAB = RstX * slopeX + RstY * slopeY + RstZ * slopeZ;
|
||||
double localangle = acos(dotAB / (RstR * slopR));
|
||||
|
||||
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) {
|
||||
d_temp_R[idx] = 0;
|
||||
d_temp_amps[idx] = 0;
|
||||
return;
|
||||
}
|
||||
else {}
|
||||
|
||||
double ampGain = 1;
|
||||
ampGain = ampGain / (powf(4 * LAMP_CUDA_PI, 2) * powf(RstR, 4)); // 反射强度
|
||||
double sigma = GPU_getSigma0dB(sigma0Params, localangle);
|
||||
sigma = powf(10.0, sigma / 10.0);
|
||||
|
||||
double temp_amp = double(ampGain * Pt * sigma);
|
||||
double temp_R = double(RstR - refPhaseRange);
|
||||
|
||||
bool isNan = !(isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R));
|
||||
|
||||
|
||||
d_temp_amps[idx] = temp_amp * isNan;
|
||||
d_temp_R[idx] = temp_R * isNan;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
__global__ void CUDA_Kernel_Computer_echo_NoAntPattern(
|
||||
double* d_temp_R, double* d_temp_amps, long posNum,
|
||||
double f0, double dfreq,
|
||||
long FreqPoints, // 当前频率的分块
|
||||
long maxfreqnum, // 最大脉冲值
|
||||
cuComplex* echodata,
|
||||
long temp_PRF_Count
|
||||
) {
|
||||
__shared__ float s_R[SHAREMEMORY_FLOAT_HALF]; // 注意一个完整的block_size 共享相同内存
|
||||
__shared__ float s_amp[SHAREMEMORY_FLOAT_HALF];
|
||||
|
||||
long tid = threadIdx.x;
|
||||
long bid = blockIdx.x;
|
||||
long idx = bid * blockDim.x + tid;
|
||||
long prfId = idx / FreqPoints; // 脉冲ID
|
||||
long fId = idx % FreqPoints;//频率ID
|
||||
|
||||
long psid = 0;
|
||||
long pixelId = 0;
|
||||
for (long ii = 0; ii < SHAREMEMORY_FLOAT_HALF_STEP; ii++) { // SHAREMEMORY_FLOAT_HALF_STEP * BLOCK_SIZE=SHAREMEMORY_FLOAT_HALF
|
||||
psid = tid * SHAREMEMORY_FLOAT_HALF_STEP + ii;
|
||||
pixelId = prfId * posNum + psid; //
|
||||
if (psid < posNum) {
|
||||
s_R[psid] = d_temp_R[pixelId];
|
||||
s_amp[psid] = d_temp_amps[pixelId];
|
||||
}
|
||||
else {
|
||||
s_R[psid] = 0;
|
||||
s_amp[psid] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__syncthreads(); // 确定所有待处理数据都已经进入程序中
|
||||
|
||||
|
||||
|
||||
if (fId < maxfreqnum && prfId < temp_PRF_Count) {
|
||||
|
||||
long echo_ID = prfId * maxfreqnum + fId; // 计算对应的回波位置
|
||||
float factorjTemp = RFPCPIDIVLIGHT * (f0 + fId * dfreq);
|
||||
cuComplex echo = make_cuComplex(0, 0);
|
||||
float temp_phi = 0;
|
||||
float temp_amp = 0;
|
||||
for (long dataid = 0; dataid < SHAREMEMORY_FLOAT_HALF; dataid++) {
|
||||
|
||||
temp_phi = s_R[dataid] * factorjTemp;
|
||||
temp_amp = s_amp[dataid];
|
||||
echo.x += (temp_amp * cosf(temp_phi));
|
||||
echo.y += (temp_amp * sinf(temp_phi));
|
||||
//if (dataid > 5000) {
|
||||
// printf("echo_ID=%d; dataid=%d;ehodata=(%f,%f);R=%f;amp=%f;\n", echo_ID, dataid, temp_real, temp_imag, s_R[0], s_amp[0]);
|
||||
//}
|
||||
if (isnan(temp_phi) || isnan(temp_amp) || isnan(echo.x) || isnan(echo.y)
|
||||
|| isinf(temp_phi) || isinf(temp_amp) || isinf(echo.x) || isinf(echo.y)
|
||||
) {
|
||||
printf("[amp,phi,real,imag]=[%f,%f,%f,%f];\n", temp_amp, temp_phi, echo.x, echo.y);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
echodata[echo_ID] = cuCaddf(echodata[echo_ID], echo);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__global__ void CUDA_Kernel_RFPC(
|
||||
SateState* antlist,
|
||||
long PRFCount, long Freqcount, // 整体的脉冲数,
|
||||
|
@ -483,12 +627,12 @@ __global__ void CUDA_Kernel_RFPC(
|
|||
double NearR, double FarR,
|
||||
CUDASigmaParam clsSigma0,
|
||||
cuComplex* echodata
|
||||
)
|
||||
)
|
||||
{
|
||||
__shared__ GoalState Ts[SHAREMEMORY_DEM_STEP];
|
||||
|
||||
size_t threadid = threadIdx.x;
|
||||
|
||||
|
||||
size_t idx = blockIdx.x * blockDim.x + threadIdx.x; // 获取当前的线程编码
|
||||
size_t prfid = floorf(idx / Freqcount);
|
||||
size_t freqid = idx % Freqcount;
|
||||
|
@ -512,37 +656,37 @@ __global__ void CUDA_Kernel_RFPC(
|
|||
|
||||
|
||||
|
||||
for (long tid = 0;tid < demLen;tid++) {
|
||||
GoalState p = goallist[tid];
|
||||
Tx = p.Tx;
|
||||
Ty = p.Ty;
|
||||
Tz = p.Tz;
|
||||
for (long tid = 0; tid < demLen; tid++) {
|
||||
GoalState p = goallist[tid];
|
||||
Tx = p.Tx;
|
||||
Ty = p.Ty;
|
||||
Tz = p.Tz;
|
||||
|
||||
Tx = antPos.Px - Tx; // T->P
|
||||
Ty = antPos.Py - Ty;
|
||||
Tz = antPos.Pz - Tz;
|
||||
Tx = antPos.Px - Tx; // T->P
|
||||
Ty = antPos.Py - Ty;
|
||||
Tz = antPos.Pz - Tz;
|
||||
|
||||
R = sqrt(Tx * Tx + Ty * Ty + Tz * Tz);
|
||||
bool isNearFar = (R < NearR || R > FarR) && ((abs(p.TsX) > 1000) || (abs(p.TsY) > 1000) || (abs(p.TsZ) > 1000));
|
||||
R = sqrt(Tx * Tx + Ty * Ty + Tz * Tz);
|
||||
bool isNearFar = (R < NearR || R > FarR) && ((abs(p.TsX) > 1000) || (abs(p.TsY) > 1000) || (abs(p.TsZ) > 1000));
|
||||
|
||||
|
||||
incAngle = sqrt(p.TsX * p.TsX + p.TsY * p.TsY + p.TsZ * p.TsZ);
|
||||
incAngle = acos((Tx * p.TsX + Ty * p.TsY + Tz * p.TsZ) / (R * incAngle));
|
||||
incAngle = GPU_getSigma0dB_params(clsSigma0.p1, clsSigma0.p2, clsSigma0.p3, clsSigma0.p4, clsSigma0.p5, clsSigma0.p6, incAngle); // sigma
|
||||
incAngle = pow(10.0, incAngle / 10.0); // amp
|
||||
incAngle = incAngle / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); //
|
||||
incAngle = sqrt(p.TsX * p.TsX + p.TsY * p.TsY + p.TsZ * p.TsZ);
|
||||
incAngle = acos((Tx * p.TsX + Ty * p.TsY + Tz * p.TsZ) / (R * incAngle));
|
||||
incAngle = GPU_getSigma0dB_params(clsSigma0.p1, clsSigma0.p2, clsSigma0.p3, clsSigma0.p4, clsSigma0.p5, clsSigma0.p6, incAngle); // sigma
|
||||
incAngle = pow(10.0, incAngle / 10.0); // amp
|
||||
incAngle = incAngle / (powf(4 * LAMP_CUDA_PI, 2) * powf(R, 4)); //
|
||||
|
||||
R = (R - refPhaseRange);
|
||||
R = factorjTemp * R;
|
||||
R = (R - refPhaseRange);
|
||||
R = factorjTemp * R;
|
||||
|
||||
echo_real = incAngle * cos(R) * isNearFar;
|
||||
echo_imag = incAngle * sin(R) * isNearFar;
|
||||
echo.x = echo.x + echo_real;
|
||||
echo.y = echo.y + echo_imag;
|
||||
echo_real = incAngle * cos(R) * isNearFar;
|
||||
echo_imag = incAngle * sin(R) * isNearFar;
|
||||
echo.x = echo.x + echo_real;
|
||||
echo.y = echo.y + echo_imag;
|
||||
|
||||
if (idx == 0 && tid % (10 * SHAREMEMORY_DEM_STEP) == 0) {
|
||||
printf("Idx:%d , TsID: %d, TSCOUNT: %d \n", idx, tid, demLen);
|
||||
}
|
||||
if (idx == 0 && tid % (10 * SHAREMEMORY_DEM_STEP) == 0) {
|
||||
printf("Idx:%d , TsID: %d, TSCOUNT: %d \n", idx, tid, demLen);
|
||||
}
|
||||
}
|
||||
|
||||
echodata[idx] = cuCaddf(echodata[idx], echo);
|
||||
|
@ -552,24 +696,62 @@ __global__ void CUDA_Kernel_RFPC(
|
|||
|
||||
/** 分块处理 ****************************************************************************************************************/
|
||||
|
||||
extern "C" void ProcessRFPCTask(RFPCTask& task)
|
||||
extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
|
||||
{
|
||||
size_t pixelcount = task.prfNum * task.freqNum;
|
||||
size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
printf("start %d,%d ,%d,%d\n", pixelcount, task.targetnum, grid_size, BLOCK_SIZE);
|
||||
CUDA_Kernel_RFPC << <grid_size, BLOCK_SIZE >> > (
|
||||
task.antlist,
|
||||
task.prfNum,task.freqNum,
|
||||
task.goallist,
|
||||
task.targetnum,
|
||||
task.startFreq,task.stepFreq,
|
||||
task.Rref,task.Rnear,task.Rfar,
|
||||
task.sigma0_cls,
|
||||
task.d_echoData
|
||||
|
||||
double* d_R = (double*)mallocCUDADevice(task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(double), devid);
|
||||
double* d_amps = (double*)mallocCUDADevice(task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(double), devid);
|
||||
|
||||
|
||||
long BLOCK_FREQNUM = NextBlockPad(task.freqNum, BLOCK_SIZE); // 256*freqBlockID
|
||||
long cudaBlocknum = 0;
|
||||
long freqpoints = BLOCK_FREQNUM;
|
||||
printf("freqpoints:%d\n", freqpoints);
|
||||
long process = 0;
|
||||
for (long sTi = 0; sTi < task.targetnum; sTi = sTi + SHAREMEMORY_FLOAT_HALF) {
|
||||
cudaBlocknum = (task.prfNum * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
Kernel_Computer_R_amp_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> >(
|
||||
task.antlist,
|
||||
task.prfNum,
|
||||
task.goallist,
|
||||
task.targetnum,
|
||||
sTi, task.targetnum,
|
||||
task.sigma0_cls,
|
||||
1,
|
||||
task.Rref,
|
||||
task.Rnear, task.Rfar,
|
||||
d_R, d_amps// 计算输出
|
||||
);
|
||||
PrintLasterError("ProcessRFPCTask");
|
||||
PrintLasterError("CUDA_Kernel_Computer_R_amp");
|
||||
|
||||
|
||||
cudaBlocknum = (task.prfNum * BLOCK_FREQNUM + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
CUDA_Kernel_Computer_echo_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> > (
|
||||
d_R, d_amps, SHAREMEMORY_FLOAT_HALF,
|
||||
task.startFreq, task.stepFreq,
|
||||
freqpoints, task.freqNum,
|
||||
task.d_echoData,
|
||||
task.prfNum
|
||||
);
|
||||
PrintLasterError("CUDA_Kernel_Computer_echo");
|
||||
|
||||
if ((sTi * 100.0 / task.targetnum) - process >= 1) {
|
||||
process = sTi * 100.0 / task.targetnum;
|
||||
PRINT("TargetID [%f]: %d / %d finished\n", sTi * 100.0 / task.targetnum, sTi, task.targetnum);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
printf("start %d \n", task.targetnum);
|
||||
|
||||
FreeCUDADevice(d_R);
|
||||
FreeCUDADevice(d_amps);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -144,7 +144,7 @@ extern "C" void CUDA_RFPC_MainProcess(
|
|||
|
||||
|
||||
extern "C" double* hostSigmaData_toDevice(int devid);
|
||||
extern "C" void ProcessRFPCTask(RFPCTask& task);
|
||||
extern "C" void ProcessRFPCTask(RFPCTask& task,long devid);
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1103,7 +1103,7 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
|
|||
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
|
||||
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
|
||||
task.sigma0_cls = clsCUDASigmaParamsDict[clsid];
|
||||
ProcessRFPCTask(task);
|
||||
ProcessRFPCTask(task,devId);
|
||||
FreeCUDADevice(task.goallist);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue