修改默认参数,提交代码用于检查
parent
bdaa4f22f7
commit
934a39cbff
|
@ -490,7 +490,10 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern(
|
||||||
double maxGain, double GainWeight,
|
double maxGain, double GainWeight,
|
||||||
float* d_temp_R, float* d_temp_amps// 计算输出
|
float* d_temp_R, float* d_temp_amps// 计算输出
|
||||||
) {
|
) {
|
||||||
long long idx = blockIdx.x * blockDim.x + threadIdx.x; // »ñÈ¡µ±Ç°µÄÏ̱߳àÂë
|
long long tid = threadIdx.x;
|
||||||
|
long long bid = blockIdx.x;
|
||||||
|
long long dmx = blockDim.x;
|
||||||
|
long long idx = bid*dmx+tid; // »ñÈ¡µ±Ç°µÄÏ̱߳àÂë
|
||||||
long long prfId = idx / SHAREMEMORY_FLOAT_HALF;
|
long long prfId = idx / SHAREMEMORY_FLOAT_HALF;
|
||||||
long long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点
|
long long posId = idx % SHAREMEMORY_FLOAT_HALF + startPosId; // 当前线程对应的影像点
|
||||||
|
|
||||||
|
@ -506,26 +509,13 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern(
|
||||||
double RstZ = antp.Pz - gp.Tz;
|
double RstZ = antp.Pz - gp.Tz;
|
||||||
double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离
|
double RstR = sqrt(RstX * RstX + RstY * RstY + RstZ * RstZ); // 矢量距离
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (RstR<NearR || RstR>FarR) {
|
if (RstR<NearR || RstR>FarR) {
|
||||||
|
d_temp_R[idx] = RstR;
|
||||||
if (prfId > 200) {
|
|
||||||
printf("error prfId:%d ,idx:%d RstR:%e antP=[%e,%e,%e] ,gp=[%e,%e,%e]\n", prfId, idx, RstR,
|
|
||||||
antp.Px, antp.Py, antp.Pz, gp.Tx, gp.Ty, gp.Tz
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
d_temp_R[idx] = 0;
|
|
||||||
d_temp_amps[idx] = 0;
|
d_temp_amps[idx] = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (prfId > 200) {
|
|
||||||
printf(" prfId:%d ,idx:%d RstR:%e antP=[%e,%e,%e] ,gp=[%e,%e,%e]\n", prfId, idx, RstR,
|
|
||||||
antp.Px, antp.Py, antp.Pz, gp.Tx, gp.Ty, gp.Tz
|
|
||||||
);
|
|
||||||
}
|
|
||||||
RstX = RstX / RstR;
|
RstX = RstX / RstR;
|
||||||
RstY = RstY / RstR;
|
RstY = RstY / RstR;
|
||||||
RstZ = RstZ / RstR;
|
RstZ = RstZ / RstR;
|
||||||
|
@ -540,8 +530,8 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern(
|
||||||
float localangle = acosf(dotAB / (slopR));
|
float localangle = acosf(dotAB / (slopR));
|
||||||
|
|
||||||
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) {
|
if (localangle < 0 || localangle >= LAMP_CUDA_PI / 2 || isnan(localangle)) {
|
||||||
d_temp_R[idx] = 0;
|
d_temp_R[idx] = RstR;
|
||||||
d_temp_amps[idx] = 0;
|
d_temp_amps[idx] = localangle;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else {}
|
else {}
|
||||||
|
@ -572,21 +562,35 @@ __global__ void Kernel_Computer_R_amp_NoAntPattern(
|
||||||
sigma = powf(10.0, sigma / 10.0);
|
sigma = powf(10.0, sigma / 10.0);
|
||||||
|
|
||||||
float temp_amp = float(ampGain * Pt * sigma);
|
float temp_amp = float(ampGain * Pt * sigma);
|
||||||
float temp_R = float(double(RstR - refPhaseRange));
|
float temp_R = float(RstR - refPhaseRange);
|
||||||
|
|
||||||
if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) {
|
if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) {
|
||||||
printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R);
|
|
||||||
d_temp_R[idx] = 0;
|
|
||||||
d_temp_amps[idx] = 0;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
d_temp_amps[idx] = temp_amp ;
|
||||||
d_temp_amps[idx] = temp_amp ;
|
d_temp_R[idx] = temp_R ;
|
||||||
d_temp_R[idx] = temp_R;
|
//if (prfId > 200) {
|
||||||
|
// printf("idx:%lld , prfid:%lld , d_temp_amp=%e,d_temp_R=%e\n", idx, prfId,d_temp_amps[idx], d_temp_R[idx]);
|
||||||
return;
|
//}
|
||||||
|
//return;
|
||||||
}
|
}
|
||||||
|
//if (isnan(temp_amp) || isnan(temp_R) || isinf(temp_amp) || isinf(temp_R)) {
|
||||||
|
// printf("amp is nan or R is nan,amp=%f;R=%f; \n", temp_amp, temp_R);
|
||||||
|
// d_temp_R[idx] = 0;
|
||||||
|
// d_temp_amps[idx] = 0;
|
||||||
|
// return;
|
||||||
|
//}
|
||||||
|
//else {
|
||||||
|
// if (prfId > 200) {
|
||||||
|
// printf("error prfId:%ld RstR:%e antP=[%e,%e,%e] ,gp=[%e,%e,%e],temp_amp=%e,temp_R=%e\n", prfId, RstR,
|
||||||
|
// antp.Px, antp.Py, antp.Pz, gp.Tx, gp.Ty, gp.Tz,
|
||||||
|
// temp_amp, temp_R);
|
||||||
|
// }
|
||||||
|
// d_temp_amps[idx] = temp_amp;
|
||||||
|
// d_temp_R[idx] = temp_R;
|
||||||
|
// return;
|
||||||
|
//}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -622,7 +626,9 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern(
|
||||||
s_R[psid] = d_temp_R[pixelId];
|
s_R[psid] = d_temp_R[pixelId];
|
||||||
s_amp[psid] = d_temp_amps[pixelId];
|
s_amp[psid] = d_temp_amps[pixelId];
|
||||||
if (prfId > 200) {
|
if (prfId > 200) {
|
||||||
printf("prfId:%d ,idx:%d s_R:%f S_amp:%f \n", prfId, idx, s_R[psid], s_amp[psid]);
|
printf(" s_R=%e S_amp=%e d_temp_R=%e d_temp_amps=%e idx=%lld prfid=%lld\n", s_R[psid], s_amp[psid],
|
||||||
|
d_temp_R[pixelId], d_temp_amps[pixelId], idx, prfId
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -667,7 +673,7 @@ __global__ void CUDA_Kernel_Computer_echo_NoAntPattern(
|
||||||
|
|
||||||
|
|
||||||
/** 分块处理 ****************************************************************************************************************/
|
/** 分块处理 ****************************************************************************************************************/
|
||||||
extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
|
extern "C" void ProcessRFPCTask(RFPCTask& task, long devid, float* h_R, float* h_amp)
|
||||||
{
|
{
|
||||||
size_t pixelcount = task.prfNum * task.freqNum;
|
size_t pixelcount = task.prfNum * task.freqNum;
|
||||||
size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||||
|
@ -686,6 +692,7 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
|
||||||
long prfcount = task.prfNum;
|
long prfcount = task.prfNum;
|
||||||
long process = 0;
|
long process = 0;
|
||||||
for (long sTi = 0; sTi < task.targetnum; sTi = sTi + SHAREMEMORY_FLOAT_HALF) {
|
for (long sTi = 0; sTi < task.targetnum; sTi = sTi + SHAREMEMORY_FLOAT_HALF) {
|
||||||
|
|
||||||
cudaBlocknum = (task.prfNum * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
cudaBlocknum = (task.prfNum * SHAREMEMORY_FLOAT_HALF + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||||
Kernel_Computer_R_amp_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> > (
|
Kernel_Computer_R_amp_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> > (
|
||||||
task.antlist,
|
task.antlist,
|
||||||
|
@ -703,6 +710,9 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
|
||||||
PrintLasterError("CUDA_Kernel_Computer_R_amp");
|
PrintLasterError("CUDA_Kernel_Computer_R_amp");
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
|
DeviceToHost(h_R, d_R, task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float));
|
||||||
|
DeviceToHost(h_amp, d_amps, task.prfNum * SHAREMEMORY_FLOAT_HALF * sizeof(float));
|
||||||
|
break;
|
||||||
|
|
||||||
cudaBlocknum = (task.prfNum * BLOCK_FREQNUM + BLOCK_SIZE- 1) / BLOCK_SIZE;
|
cudaBlocknum = (task.prfNum * BLOCK_FREQNUM + BLOCK_SIZE- 1) / BLOCK_SIZE;
|
||||||
CUDA_Kernel_Computer_echo_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> > (
|
CUDA_Kernel_Computer_echo_NoAntPattern << <cudaBlocknum, BLOCK_SIZE >> > (
|
||||||
|
@ -718,6 +728,11 @@ extern "C" void ProcessRFPCTask(RFPCTask& task, long devid)
|
||||||
process = sTi * 100.0 / task.targetnum;
|
process = sTi * 100.0 / task.targetnum;
|
||||||
PRINT("device ID : %d , TargetID [%f]: %d / %d finished %d\n", devid, sTi * 100.0 / task.targetnum, sTi, task.targetnum, devid);
|
PRINT("device ID : %d , TargetID [%f]: %d / %d finished %d\n", devid, sTi * 100.0 / task.targetnum, sTi, task.targetnum, devid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -16,11 +16,7 @@
|
||||||
|
|
||||||
|
|
||||||
extern "C" struct SateState {
|
extern "C" struct SateState {
|
||||||
double Px, Py, Pz, Vx, Vy, Vz;
|
double Px, Py, Pz, Vx, Vy, Vz,antDirectX, antDirectY, antDirectZ;
|
||||||
//double antXaxisX, antXaxisY, antXaxisZ;
|
|
||||||
//double antYaxisX, antYaxisY, antYaxisZ;
|
|
||||||
//double antZaxisX, antZaxisY, antZaxisZ;
|
|
||||||
double antDirectX, antDirectY, antDirectZ;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -152,7 +148,7 @@ extern "C" void CUDA_RFPC_MainProcess(
|
||||||
|
|
||||||
|
|
||||||
extern "C" double* hostSigmaData_toDevice(int devid);
|
extern "C" double* hostSigmaData_toDevice(int devid);
|
||||||
extern "C" void ProcessRFPCTask(RFPCTask& task,long devid);
|
extern "C" void ProcessRFPCTask(RFPCTask& task,long devid,float* h_R,float* h_amp);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -103,7 +103,7 @@
|
||||||
</size>
|
</size>
|
||||||
</property>
|
</property>
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>D:/FZSimulation/LTDQ/Input/LandCover.dat</string>
|
<string>D:/FZSimulation/LTDQ/Input/testEcho/LandCover.dat</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
@ -233,7 +233,7 @@
|
||||||
</size>
|
</size>
|
||||||
</property>
|
</property>
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>D:/FZSimulation/LTDQ/Input/DEM_XYZ.dat</string>
|
<string>D:/FZSimulation/LTDQ/Input/testEcho/DEMXYZ.dat</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
@ -350,7 +350,7 @@
|
||||||
</size>
|
</size>
|
||||||
</property>
|
</property>
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>D:/FZSimulation/LTDQ/Input/DEM_Sloper.dat</string>
|
<string>D:/FZSimulation/LTDQ/Input/testEcho/DEMSloper.dat</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
|
|
@ -1114,9 +1114,18 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
|
||||||
|
|
||||||
task.antlist = (SateState*)mallocCUDADevice(prfcount * sizeof(SateState), devId);
|
task.antlist = (SateState*)mallocCUDADevice(prfcount * sizeof(SateState), devId);
|
||||||
HostToDevice(h_antlist.get(), task.antlist, sizeof(SateState) * prfcount);
|
HostToDevice(h_antlist.get(), task.antlist, sizeof(SateState) * prfcount);
|
||||||
|
printf("h_antlist: %e,%e,%e,%e,%e,%e,%e,%e,%e \n",
|
||||||
|
h_antlist.get()[0].Px, h_antlist.get()[0].Py, h_antlist.get()[0].Pz,
|
||||||
|
h_antlist.get()[0].Vx, h_antlist.get()[0].Vy, h_antlist.get()[0].Vz,
|
||||||
|
h_antlist.get()[0].antDirectX, h_antlist.get()[0].antDirectY, h_antlist.get()[0].antDirectZ
|
||||||
|
);
|
||||||
|
testOutAmpArr(QString("antlist_%1.dat").arg(devId), (double*)(h_antlist.get()), prfcount, 9);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
float* h_R = (float*)mallocCUDAHost(sizeof(float) * prfcount * SHAREMEMORY_FLOAT_HALF); //2GB 距离
|
||||||
|
float* h_amp = (float*)mallocCUDAHost(sizeof(float) * prfcount * SHAREMEMORY_FLOAT_HALF);//2GB 强度
|
||||||
|
|
||||||
// 分块计算
|
// 分块计算
|
||||||
for (const auto& pair : clsGoalStateDict) {
|
for (const auto& pair : clsGoalStateDict) {
|
||||||
long clsid = pair.first;
|
long clsid = pair.first;
|
||||||
|
@ -1128,7 +1137,12 @@ ErrorCode RFPCProcessCls::RFPCMainProcess_GPU_NoAntPattern(size_t startprfid, si
|
||||||
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
|
task.goallist = (GoalState*)mallocCUDADevice(clscount * sizeof(GoalState), devId);
|
||||||
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
|
HostToDevice(clsGoalStateDict[clsid].get(), task.goallist, sizeof(GoalState) * clscount);
|
||||||
|
|
||||||
ProcessRFPCTask(task, devId);
|
|
||||||
|
|
||||||
|
|
||||||
|
ProcessRFPCTask(task, devId, h_R, h_amp);
|
||||||
|
testOutDataArr(QString("h_R_%1.bin").arg(devId), h_R, prfcount, SHAREMEMORY_FLOAT_HALF);
|
||||||
|
exit(-1);
|
||||||
FreeCUDADevice(task.goallist);
|
FreeCUDADevice(task.goallist);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue